Python中的关键错误，使用Pycharm和Pandas

import requests import json import pandas as pd from bs4 import BeautifulSoup import re import concurrent.futures import sys URL = 'https://www.vault.com/vault/api/Rankings/LoadMoreCompanyRanksJSON' pg = 0 info = [] rank = input('Enter Rank\n 2 -All Law Firms\n 20 -IP Law Firms\n 275 - IP boutique law firms\n') year = input('Which Year? ') filename = input('Filename? ') while True: pg += 1 params = {'rank': rank, 'year': year, 'category': 'LBACCompany', 'pg': pg} response = requests.get(URL, params=params) if len(json.loads(response.content)) == 0: break print(pg) info = info + json.loads(response.content) def run(i): response = requests.get('http://vault.com' + info[i]['URL']) print(i) bs = BeautifulSoup(response.content, 'lxml') US_OFFICES = bs.find('strong', {'class': 'inlineOnly'}, text='No. of U.S. Offices:').next_sibling.strip() info[i]['# of U.S offices'] = US_OFFICES INT_OFFICES = bs.find('strong', {'class': 'inlineOnly'}, text='No. of International Offices:').next_sibling.strip() info[i]['# of International Offices'] = INT_OFFICES MAJOR_OFFICE = bs.find('div', {'class': 'col-lg-12 col-md-4'}).p.text.strip() info[i]['Major Office'] = MAJOR_OFFICE MAJOR_OFFICE_LOC = bs.find('strong', text='Major Office Locations').parent.p.text.strip() info[i]['Major Office Locations'] = MAJOR_OFFICE_LOC MAJOR_DEP = bs.find('strong', text='Major Departments').parent.p.text.strip() info[i]['Major Departments'] = MAJOR_DEP ATT = bs.find('strong', text='Firm Stats').parent.p.text ATT = re.search(r'Total No. Attorneys \d\d\d\d:\r\n.*', ATT) if ATT is not None: ATT = re.search(r'\r\n.*[0-9K+]', ATT.group()).group().strip() info[i]['# of Attorneys'] = ATT else: info[i]['# of Attorneys'] = '' with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor: executor.map(run, list(range(len(info)))) df = pd.DataFrame(data=info) df = df[['Year', 'Title', 'Rank', '# of U.S offices', '# of International Offices', 'Major Office', 'Major Office Locations', 'Major Departments', '# of Attorneys']] df.to_excel(filename, index=False) print('DONE!!!!!!!!!!!!!!!!!!')

1条回答

网友

1楼 · 发布于 2024-05-23 13:30:59

看起来您正在尝试设置数据帧的标题（如果我猜对的话）。如果是这种情况，您可以在创建数据帧时通过传入columns关键字参数传入名称，如下所示：

df = pd.DataFrame(data=info, columns=['Year', 'Title', 'Rank', '# of U.S offices', '# of International Offices', 'Major Office', 'Major Office Locations', 'Major Departments', '# of Attorneys'])

相关问题更多 >

编程相关推荐

热门问题

热门文章