从J后面访问数据

2024-04-25 16:32:16 发布

您现在位置:Python中文网/ 问答频道 /正文

我试着从每个fixture的下拉列表中提取目标时间http://www.bbc.co.uk/sport/football/league-one/results

我似乎找不到数据时,搜索-任何想法为什么

import requests
from bs4 import BeautifulSoup

# Load Page Data
r = requests.get("http://www.bbc.co.uk/sport/football/league-one/results")
soup = BeautifulSoup(r.content)
print soup.prettify()

# Save Teams
for link in soup.find_all("a"):
    print link.text

# Save Results
for link in soup.find_all("abbr"):
    print link.text

ff公司


Tags: importhttpwwwlinkrequestsoneresultsbbc
1条回答
网友
1楼 · 发布于 2024-04-25 16:32:16

所以这是一个非常大的数据量(更不用说加载所有独立页面的速度太慢了),它们最终可能会阻止您处理过多的请求,但这是我看到的唯一方法。我要做的是遍历并获取与Results按钮相关联的href,加载该页面并对其进行解析,以从中获取分数信息

import requests
from bs4 import BeautifulSoup

def parse_page(data):
        subsoup = BeautifulSoup(data)
        matchoverview = subsoup.find('div', attrs={'id':'match-overview'})
        print '       '
        homeTeam = matchoverview.find('div', attrs={'class':'team-match-details'}).findNext('span').findNext('a').text
        homeScore = matchoverview.find('div', attrs={'class':'team-match-details'}).findNext('span').findNext('span').text
        homeGoalScorers = ["Home Goal Scorers:"]
        for goals in matchoverview.find('div', attrs={'class':'team-match-details'}).findNext('p').find_all('span'):
            homeGoalScorers.append(goals.text.replace(u'\u2032', "'"))
        homeGoals = "\n".join(homeGoalScorers)
        awayTeam = matchoverview.find('div', attrs={'id': 'away-team'}).find('div', attrs={'class':'team-match-details'}).findNext('span').findNext('a').text
        awayScore = matchoverview.find('div', attrs={'id': 'away-team'}).find('div', attrs={'class':'team-match-details'}).findNext('span').findNext('span').text
        awayGoalScorers = ["Away Goal Scorers:"]
        for goals in matchoverview.find('div', attrs={'id': 'away-team'}).find('div', attrs={'class':'team-match-details'}).findNext('p').find_all('span'):
            awayGoalScorers.append(goals.text.replace(u'\u2032', "'"))
        awayGoals = "\n".join(awayGoalScorers)
        print '{0} {1} - {2} {3}'.format(homeTeam, homeScore, awayTeam, awayScore)
        print homeGoals
        print awayGoals

def all_league_results():
    r = requests.get("http://www.bbc.co.uk/sport/football/league-one/results")
    soup = BeautifulSoup(r.content)

    # Save Teams
    for link in soup.find_all("a", attrs={'class': 'report'}):
        fullLink = 'http://www.bbc.com' + link['href']
        subr = requests.get(fullLink)
        parse_page(subr.text)

def specific_game_results(url):
    subr = requests.get(url)
    parse_page(subr.text)

#get specific games results
specific_game_results('http://www.bbc.co.uk/sport/0/football/32460049')
#get all current league results
all_league_results()

相关问题 更多 >