我正在编写从azure DevOps下载大型审计日志csv文件并根据给定条件过滤数据的脚本。这适用于小csv文件,但对于包含大数据的文件,它会失败
字段=下一个(读卡器) 停止迭代
有人可以帮助修改脚本吗?我正在MacOs上使用python 3.7.9
def getproject(url,pat):
response = requests.get(url, auth=HTTPBasicAuth(username='',password=pat))
if response.status_code == 200:
url_data = response.content
tempfile = open("temp.csv","wb")
tempfile.write(url_data)
tempfile.close()
return url_data
else:
print("\nERROR : Unable to conect The server...")
def FilterData():
lists =[]
pro_name=[]
RepoId =[]
RepoName=[]
new_file = open("temp_new.csv", 'w',newline='')
writer = csv.writer(new_file)
with open("temp.csv", 'r') as readFile:
reader = csv.reader(readFile)
fields = next(reader)
lists.append(fields)
for row in reader:
for field in row:
if field == "Git.RepositoryCreated":
lists.append(row)
writer.writerows(lists)
readFile.close()
new_file.close()
os.remove("temp.csv")
timestamp = (datetime.datetime.now())
timestamp = timestamp.strftime("%d%B%Y_%H%M%S")
file_name = "Data2_"+str(timestamp)+".csv"
file1 = open("temp_new.csv",'r')
df = pd.read_csv(file1)
for i in df["Data"]:
res = json.loads(i)
pro_name.append(res['ProjectName'])
RepoId.append(res['RepoId'])
RepoName.append(res['RepoName'])
Disp_Name = df["ActorDisplayName"]
ActionId = df["ActionId"]
TimeStamp = df["Timestamp"]
file1.close()
os.remove("temp_new.csv")
Header = ["Actor Display Name","Project
Name","RepoName","RepoId","ActionId","Timestamp"]
d=[Disp_Name,pro_name,RepoName,RepoId,ActionId,TimeStamp]
export_data = zip_longest(*d, fillvalue = '')
with open(file_name, 'w',newline='') as myfile:
wr = csv.writer(myfile)
wr.writerow(Header)
wr.writerows(export_data)
myfile.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser("This is used for getting list of the projects")
parser.add_argument("-o" , dest="org", help="org name")
parser.add_argument("-p" , dest="pat", help="pat value")
parser.add_argument("-sd" , dest="sdate", help="Start Date")
parser.add_argument("-ed" , dest="edate", help="End Date")
args = parser.parse_args()
org = args.org
token = args.pat
startdate = args.sdate
enddate = args.edate
url = "https://auditservice.dev.azure.com/{org_name}/_apis/audit/downloadlog?
format=csv&startTime={startdt}&endTime={enddt}&api-version=6.1-
preview.1".format(org_name=org,startdt=startdate,enddt=enddate)
#call "getproject" function to check url and token to further create required csv
getproject(url,token)
FilterData()
[+]在getproject函数中, 您应该使用try-except块来处理http错误等
[+]如果您试图下载的csv文件很大,最好将数据分块写入
至于字段=下一个(读卡器)STOPPR。 我不确定。¯_(ツ)_/¯ 尝试将代码放入调试器并单步执行
见:download large file in python with requests
相关问题 更多 >
编程相关推荐