python脚本无法读取csvfile,错误为StopIteration

2024-05-12 21:27:57 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在编写从azure DevOps下载大型审计日志csv文件并根据给定条件过滤数据的脚本。这适用于小csv文件,但对于包含大数据的文件,它会失败

字段=下一个(读卡器) 停止迭代

有人可以帮助修改脚本吗?我正在MacOs上使用python 3.7.9

def getproject(url,pat):

    response = requests.get(url, auth=HTTPBasicAuth(username='',password=pat))

    if response.status_code == 200:
        url_data = response.content
        tempfile = open("temp.csv","wb")
        tempfile.write(url_data)
        tempfile.close()
        return url_data

    else:
        print("\nERROR : Unable to conect The server...")


def FilterData():
    lists   =[]
    pro_name=[]
    RepoId  =[]
    RepoName=[]

    new_file = open("temp_new.csv", 'w',newline='')
    writer = csv.writer(new_file)
    with open("temp.csv", 'r') as readFile:
    reader = csv.reader(readFile)
    fields = next(reader) 
    lists.append(fields)
    for row in reader:
        for field in row:
            if field == "Git.RepositoryCreated":
                lists.append(row)
    writer.writerows(lists) 
    readFile.close()
    new_file.close()
    os.remove("temp.csv")

    timestamp = (datetime.datetime.now())
    timestamp = timestamp.strftime("%d%B%Y_%H%M%S") 
    file_name = "Data2_"+str(timestamp)+".csv"

    file1 = open("temp_new.csv",'r')
    df = pd.read_csv(file1)
    for i in df["Data"]:
       res = json.loads(i) 
       pro_name.append(res['ProjectName'])
       RepoId.append(res['RepoId'])
       RepoName.append(res['RepoName'])
    Disp_Name = df["ActorDisplayName"]
    ActionId  = df["ActionId"]
    TimeStamp = df["Timestamp"]
    file1.close()
    os.remove("temp_new.csv")


    Header = ["Actor Display Name","Project 
               Name","RepoName","RepoId","ActionId","Timestamp"]  
    d=[Disp_Name,pro_name,RepoName,RepoId,ActionId,TimeStamp]
    export_data = zip_longest(*d, fillvalue = '')
    with open(file_name, 'w',newline='') as myfile:
        wr = csv.writer(myfile)
        wr.writerow(Header)
        wr.writerows(export_data)
    myfile.close()
           

if __name__ == '__main__':

    parser = argparse.ArgumentParser("This is used for getting list of the projects")
    parser.add_argument("-o" , dest="org", help="org name")
    parser.add_argument("-p" , dest="pat", help="pat value")
    parser.add_argument("-sd" , dest="sdate", help="Start Date")
    parser.add_argument("-ed" , dest="edate", help="End Date")

    args = parser.parse_args()
    org  = args.org
    token = args.pat
    startdate = args.sdate
    enddate = args.edate
    
    url = "https://auditservice.dev.azure.com/{org_name}/_apis/audit/downloadlog?  
        format=csv&startTime={startdt}&endTime={enddt}&api-version=6.1- 
        preview.1".format(org_name=org,startdt=startdate,enddt=enddate)

    #call "getproject" function to check url and token to further create required csv
    getproject(url,token)
 
    FilterData()

Tags: csvnameorgparserurlnewclosedata
1条回答
网友
1楼 · 发布于 2024-05-12 21:27:57

[+]在getproject函数中, 您应该使用try-except块来处理http错误等

[+]如果您试图下载的csv文件很大,最好将数据分块写入

至于字段=下一个(读卡器)STOPPR。 我不确定。¯_(ツ)_/¯ 尝试将代码放入调试器并单步执行

见:download large file in python with requests

def getproject(url,pat):
    try:
        # NOTE the stream=True parameter below
        with requests.get(url, auth=HTTPBasicAuth(username='',password=pat), stream=True) as r:
            r.raise_for_status()
            with open('tmp.csv', 'wb') as f:
                for chunk in r.iter_content(chunk_size=8192): 
                    # If you have chunk encoded response uncomment if
                    # and set chunk_size parameter to None.
                    #if chunk: 
                    f.write(chunk)

    except requests.exceptions.ConnectionError as c_error:
        print(f"[-] Connection Error: {c_error}")
    except requests.exceptions.Timeout as t_error:
        print(f"[-] Connection Timeout Error: {t_error}")
    except requests.exceptions.RequestException as req_error:
        print(f"[-] Some Ambiguous Exception: {req_error}")


# This way seems faster based upon the comments of the link i shared
import requests
import shutil

def download_file(url):
    local_filename = url.split('/')[-1]
    with requests.get(url, stream=True) as r:
        with open(local_filename, 'wb') as f:
            shutil.copyfileobj(r.raw, f)

    return local_filename

相关问题 更多 >