使用BeautifulSoup在网站的不同页面中循环收集到的数据

import requests import csv from bs4 import BeautifulSoup team_list={'yankees','redsox'} for team in team_list: page = requests.get('http://m.{}.mlb.com/roster/'.format(team)) soup = BeautifulSoup(page.text, 'html.parser') soup.find(class_='nav-tabset-container').decompose() soup.find(class_='column secondary span-5 right').decompose() roster = soup.find(class_='layout layout-roster') names = [n.contents[0] for n in roster.find_all('a')] ids = [n['href'].split('/')[2] for n in roster.find_all('a')] number = [n.contents[0] for n in roster.find_all('td', index='0')] handedness = [n.contents[0] for n in roster.find_all('td', index='3')] height = [n.contents[0] for n in roster.find_all('td', index='4')] weight = [n.contents[0] for n in roster.find_all('td', index='5')] DOB = [n.contents[0] for n in roster.find_all('td', index='6')] team = [soup.find('meta',property='og:site_name')['content']] * len(names) with open('MLB_Active_Roster.csv', 'w', newline='') as fp: f = csv.writer(fp) f.writerow(['Name','ID','Number','Hand','Height','Weight','DOB','Team']) f.writerows(zip(names, ids, number, handedness, height, weight, DOB, team))

1条回答

网友

1楼 · 发布于 2024-04-18 07:51:34

我相信用一个列表代替你的字典，你应该可以解决这个问题：

import requests
import csv
import pandas as pd

from bs4 import BeautifulSoup

team_list=['yankees','redsox']
output = []

for team in team_list:
    page = requests.get('http://m.{}.mlb.com/roster/'.format(team))
    soup = BeautifulSoup(page.text, 'html.parser')

    soup.find(class_='nav-tabset-container').decompose()
    soup.find(class_='column secondary span-5 right').decompose()

    roster = soup.find(class_='layout layout-roster')
    names = [n.contents[0] for n in roster.find_all('a')]
    ids = [n['href'].split('/')[2] for n in roster.find_all('a')]
    number = [n.contents[0] for n in roster.find_all('td', index='0')]
    handedness = [n.contents[0] for n in roster.find_all('td', index='3')]
    height = [n.contents[0] for n in roster.find_all('td', index='4')]
    weight = [n.contents[0] for n in roster.find_all('td', index='5')]
    DOB = [n.contents[0] for n in roster.find_all('td', index='6')]
    team = [soup.find('meta',property='og:site_name')['content']] * len(names)

    output.append([names, ids, number, handedness, height, weight, DOB, team])

pd.DataFrame(data=output, columns=['Name','ID','Number','Hand','Height','Weight','DOB','Team']).tocsv('csvfilename.csv')

相关问题更多 >

编程相关推荐

热门问题

热门文章