通过在代码中迭代字符串来创建循环

from urllib.request import urlopen from bs4 import BeautifulSoup import pandas as pd import requests from requests import get date = [] tourney_round = [] result = [] winner_odds = [] loser_odds = [] surface = [] players_and_tourney response = get('http://www.tennisexplorer.com/player/humbert-e2553/?annual=all') page_html = BeautifulSoup(response.text, 'html.parser') results2018_containers = page_html.find_all('div', id = 'matches-2018-1-data') for container in results2018_containers: played_date_2018 = results2018_containers[0].findAll('td', class_ = 'first time') for i in played_date_2018: date.append(i.text) string_2018 = '2018' date = [x + string_2018 for x in date] for container in results2018_containers: rounds_2018 = results2018_containers[0].findAll('td', class_ = 'round') for i in rounds_2018: tourney_round.append(i.text) for container in results2018_containers: results_2018 = results2018_containers[0].findAll('td', class_ = 'tl') for i in results_2018: result.append(i.text) for container in results2018_containers: surfaces_2018 = results2018_containers[0].findAll('td', class_ = 's-color') for i in surfaces_2018: surface.append(i.find('span')['title']) for container in results2018_containers: odds_2018 = results2018_containers[0].findAll('td', class_ = 'course') winner_odds_2018 = odds_2018[0:][::2] for i in winner_odds_2018: winner_odds.append(i.text) loser_odds_2018 = odds_2018[1:][::2] for i in loser_odds_2018: loser_odds.append(i.text) for container in results2018_containers: namesandtourney_2018 = results2018_containers[0].findAll('td', class_ = 't-name') for i in namesandtourney_2018: players_and_tourney.append(i.text) from itertools import chain, groupby, repeat chainer = chain.from_iterable def condition(x): return x.startswith('\xa0') elements = [list(j) for i, j in groupby(players_and_tourney, key=condition) if not i] # create list of headers headers = [next(j) for i, j in groupby(players_and_tourney, key=condition) if i] # chain list of lists, and use repeat for headers initial_df_2018 = pd.DataFrame({'Date': date, 'Surface': surface, 'Players': list(chainer(elements)), 'Tournament': list(chainer(repeat(i, j) for i, j in \ zip(headers, map(len, elements)))), 'Round': tourney_round, 'Result': result, 'Winner Odds': winner_odds, 'Loser Odds' : loser_odds}) initial_df_2018['Winner'], initial_df_2018['Loser'] = initial_df_2018['Players'].str.split(' - ', 1).str del initial_df_2018['Players'] initial_df_2018 = initial_df_2018[['Date','Surface','Tournament','Winner','Loser','Result','Winner Odds','Loser Odds']]

3条回答

网友

1楼 · 编辑于 2024-05-20 00:54:20

循环使用它是没有问题的，但是你需要定义你想要的结果。我在这里使用了一个字典，我把你的代码变成了一个可以用变量调用的函数：

def get_data(year):
    date =[]

    response = get('http://www.example.com')

    page_html = BeautifulSoup(response.text, 'html.parser')

    results_containers = page_html.find_all('div', id = 'played-{year}-data'.format(year))

    for container in results_containers:
        played_date = results_containers[0].findAll('td', class_ = 'plays')
        for i in played_date:
            date.append(i.text)

    return date

现在我所要做的就是创建一个range的可能年份，并每次调用函数，这可以简单地做到：

all_data = {year: get_data(year) for year in range(2018, 2004, -1)}

网友

2楼 · 编辑于 2024-05-20 00:54:20

您可以将年份存储为整数，但仍可以在字符串中使用它。你知道吗

for year in range(2018, 2004, -1):
    print(f"Happy New Year {year}")

在字符串中包含数字的其他方法有"Happy New Year {}".format(year)或"it is now " + str(year) + " more text"。你知道吗

另外，我不认为你会这样做，但是如果有人发现这个并且真的想“迭代一个字符串”caesar ciphers是一个很好的地方。你知道吗

网友

3楼 · 编辑于 2024-05-20 00:54:20

如果我对您的理解正确，您希望完成2018年的申请，从2005年到2018年。你知道吗

我所做的是在这些范围内循环你的代码多年，每次都替换id并将所有数据添加到列表中。你知道吗

response = get('http://www.example.com')

page_html = BeautifulSoup(response.text, 'html.parser')
date_dict = {}

for year in range(2019, 1, -1):
    date = []
    string_id = "played-{}-data".format(year)
    results_containers = page_html.find_all('div', id = string_id)

    if (results_containers == None):
        continue
    for container in results_containers :
        played_date = results_containers [0].findAll('td', class_ = 'plays')
        for i in played_date :
            date.append(i.text)
    if not (year in date_dict):
        date_dict[year] = []
    date_dict[year] += date

相关问题更多 >

编程相关推荐

热门问题

热门文章