I want to read the content of excel file stored in GitHub repository using python.
access_token = '****'
def run_query(query, access_token):
try:
url = 'https://api.github.***.com/graphql'
data = json.dumps({'query':query})
req = urllib2.Request(url, data)
req.add_header('Authorization', 'Bearer '+access_token)
f = urllib2.urlopen(req, context=ssl._create_unverified_context())
response = f.read()
f.close()
return response
except Exception as e:
print("Graphql fetch error in run_query function,%s",e)
return '{"data":{"repository":{"object":{"text":"giterror"}}}}'
def createGraphqlQuery(ownerVal, repoVal, branchVal, pathVal):
query = """
{
repository(owner: \""""+ownerVal+"""\", name: \""""+repoVal+"""\") {
object(expression: \""""+branchVal+":"+pathVal+"""\") {
... on Blob {
text
}
}
}
}
"""
return query
ownerVal = "user"
repoVal = "TestRepo"
branchVal = "master"
pathVal = "Teradata/testfile.xlsx"
query = createGraphqlQuery(ownerVal, repoVal, branchVal, pathVal)
result = json.loads(run_query(query,access_token))
print(result)
I have tried the above code but it is only working for '.txt' file. is there any way to achieve this?
I tried using pandas also but it is giving the below error:
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain (_ssl.c:1051)>
code:
df = pd.read_csv('https://github.***.com/user/TestRepo/tree/master/Teradata/testfile.xlsx',
sep='\t')
print(df)