从J后面访问数据

import requests from bs4 import BeautifulSoup # Load Page Data r = requests.get("http://www.bbc.co.uk/sport/football/league-one/results") soup = BeautifulSoup(r.content) print soup.prettify() # Save Teams for link in soup.find_all("a"): print link.text # Save Results for link in soup.find_all("abbr"): print link.text

1条回答

网友

1楼 · 发布于 2024-04-25 16:32:16

所以这是一个非常大的数据量（更不用说加载所有独立页面的速度太慢了），它们最终可能会阻止您处理过多的请求，但这是我看到的唯一方法。我要做的是遍历并获取与Results按钮相关联的href，加载该页面并对其进行解析，以从中获取分数信息

import requests
from bs4 import BeautifulSoup

def parse_page(data):
        subsoup = BeautifulSoup(data)
        matchoverview = subsoup.find('div', attrs={'id':'match-overview'})
        print '       '
        homeTeam = matchoverview.find('div', attrs={'class':'team-match-details'}).findNext('span').findNext('a').text
        homeScore = matchoverview.find('div', attrs={'class':'team-match-details'}).findNext('span').findNext('span').text
        homeGoalScorers = ["Home Goal Scorers:"]
        for goals in matchoverview.find('div', attrs={'class':'team-match-details'}).findNext('p').find_all('span'):
            homeGoalScorers.append(goals.text.replace(u'\u2032', "'"))
        homeGoals = "\n".join(homeGoalScorers)
        awayTeam = matchoverview.find('div', attrs={'id': 'away-team'}).find('div', attrs={'class':'team-match-details'}).findNext('span').findNext('a').text
        awayScore = matchoverview.find('div', attrs={'id': 'away-team'}).find('div', attrs={'class':'team-match-details'}).findNext('span').findNext('span').text
        awayGoalScorers = ["Away Goal Scorers:"]
        for goals in matchoverview.find('div', attrs={'id': 'away-team'}).find('div', attrs={'class':'team-match-details'}).findNext('p').find_all('span'):
            awayGoalScorers.append(goals.text.replace(u'\u2032', "'"))
        awayGoals = "\n".join(awayGoalScorers)
        print '{0} {1} - {2} {3}'.format(homeTeam, homeScore, awayTeam, awayScore)
        print homeGoals
        print awayGoals

def all_league_results():
    r = requests.get("http://www.bbc.co.uk/sport/football/league-one/results")
    soup = BeautifulSoup(r.content)

    # Save Teams
    for link in soup.find_all("a", attrs={'class': 'report'}):
        fullLink = 'http://www.bbc.com' + link['href']
        subr = requests.get(fullLink)
        parse_page(subr.text)

def specific_game_results(url):
    subr = requests.get(url)
    parse_page(subr.text)

#get specific games results
specific_game_results('http://www.bbc.co.uk/sport/0/football/32460049')
#get all current league results
all_league_results()

相关问题更多 >

编程相关推荐

热门问题

热门文章

从J后面访问数据

相关问题 更多 >

编程相关推荐

热门问题

热门文章

相关问题更多 >