使用Python从Google Drive/Workspace下载电子表格
你能给我一个Python的例子,教我怎么根据Google Sheets的关键字和工作表ID(gid
)下载一个电子表格吗?我搞不定。
我查了API的1、2和3版本,但都没找到解决办法。我搞不懂他们复杂的类似ATOM的API,gdata.docs.service.DocsService._DownloadFile
这个私有方法还说我没有权限,我也不想自己写一个完整的Google登录认证系统。真是让我快崩溃了。
我有几个电子表格,想这样访问它们:
username = 'mygooglelogin@gmail.com'
password = getpass.getpass()
def get_spreadsheet(key, gid=0):
... (help!) ...
for row in get_spreadsheet('5a3c7f7dcee4b4f'):
cell1, cell2, cell3 = row
...
求你帮帮我。
更新 1:我试过以下方法,但无论是Download()
还是Export()
都没用。(关于DocsService
的文档可以在这里找到)
import gdata.docs.service
import getpass
import os
import tempfile
import csv
def get_csv(file_path):
return csv.reader(file(file_path).readlines())
def get_spreadsheet(key, gid=0):
gd_client = gdata.docs.service.DocsService()
gd_client.email = 'xxxxxxxxx@gmail.com'
gd_client.password = getpass.getpass()
gd_client.ssl = False
gd_client.source = "My Fancy Spreadsheet Downloader"
gd_client.ProgrammaticLogin()
file_path = tempfile.mktemp(suffix='.csv')
uri = 'http://docs.google.com/feeds/documents/private/full/%s' % key
try:
entry = gd_client.GetDocumentListEntry(uri)
# XXXX - The following dies with RequestError "Unauthorized"
gd_client.Download(entry, file_path)
return get_csv(file_path)
finally:
try:
os.remove(file_path)
except OSError:
pass
13 个回答
18
你可以试试文档中提到的AuthSub方法,具体在导出电子表格的部分。
为电子表格服务获取一个单独的登录令牌,然后用这个令牌来替代导出时使用的。把这个添加到get_spreadsheet
的代码里对我来说是有效的:
import gdata.spreadsheet.service
def get_spreadsheet(key, gid=0):
# ...
spreadsheets_client = gdata.spreadsheet.service.SpreadsheetsService()
spreadsheets_client.email = gd_client.email
spreadsheets_client.password = gd_client.password
spreadsheets_client.source = "My Fancy Spreadsheet Downloader"
spreadsheets_client.ProgrammaticLogin()
# ...
entry = gd_client.GetDocumentListEntry(uri)
docs_auth_token = gd_client.GetClientLoginToken()
gd_client.SetClientLoginToken(spreadsheets_client.GetClientLoginToken())
gd_client.Export(entry, file_path)
gd_client.SetClientLoginToken(docs_auth_token) # reset the DocList auth token
注意我还用了Export
,因为Download
似乎只会下载PDF文件。
38
gspread这个库是一个更新、更简单的方法来与Google表格进行互动。相比之下,之前的解决方案推荐使用gdata
库,但这个库不仅太底层,而且还复杂得让人头疼。
你还需要创建并下载一个服务账户密钥,格式为JSON。可以在这里找到:https://console.developers.google.com/apis/credentials/serviceaccountkey
下面是一个使用它的例子:
import csv
import gspread
from oauth2client.service_account import ServiceAccountCredentials
scope = ['https://spreadsheets.google.com/feeds']
credentials = ServiceAccountCredentials.from_json_keyfile_name('credentials.json', scope)
docid = "0zjVQXjJixf-SdGpLKnJtcmQhNjVUTk1hNTRpc0x5b9c"
client = gspread.authorize(credentials)
spreadsheet = client.open_by_key(docid)
for i, worksheet in enumerate(spreadsheet.worksheets()):
filename = docid + '-worksheet' + str(i) + '.csv'
with open(filename, 'wb') as f:
writer = csv.writer(f)
writer.writerows(worksheet.get_all_values())
20
如果有人遇到这个问题想找个快速解决办法,这里有一个目前有效的解决方案,它不依赖于gdata客户端库:
#!/usr/bin/python
import re, urllib, urllib2
class Spreadsheet(object):
def __init__(self, key):
super(Spreadsheet, self).__init__()
self.key = key
class Client(object):
def __init__(self, email, password):
super(Client, self).__init__()
self.email = email
self.password = password
def _get_auth_token(self, email, password, source, service):
url = "https://www.google.com/accounts/ClientLogin"
params = {
"Email": email, "Passwd": password,
"service": service,
"accountType": "HOSTED_OR_GOOGLE",
"source": source
}
req = urllib2.Request(url, urllib.urlencode(params))
return re.findall(r"Auth=(.*)", urllib2.urlopen(req).read())[0]
def get_auth_token(self):
source = type(self).__name__
return self._get_auth_token(self.email, self.password, source, service="wise")
def download(self, spreadsheet, gid=0, format="csv"):
url_format = "https://spreadsheets.google.com/feeds/download/spreadsheets/Export?key=%s&exportFormat=%s&gid=%i"
headers = {
"Authorization": "GoogleLogin auth=" + self.get_auth_token(),
"GData-Version": "3.0"
}
req = urllib2.Request(url_format % (spreadsheet.key, format, gid), headers=headers)
return urllib2.urlopen(req)
if __name__ == "__main__":
import getpass
import csv
email = "" # (your email here)
password = getpass.getpass()
spreadsheet_id = "" # (spreadsheet id here)
# Create client and spreadsheet objects
gs = Client(email, password)
ss = Spreadsheet(spreadsheet_id)
# Request a file-like object containing the spreadsheet's contents
csv_file = gs.download(ss)
# Parse as CSV and print the rows
for row in csv.reader(csv_file):
print ", ".join(row)