Python从datafram中删除行

2024-04-25 02:03:07 发布

您现在位置:Python中文网/ 问答频道 /正文

这应该很简单,但我被难住了。下面的代码收集了一些统计信息,我想从表中删除第20行(以及其他行)。当我运行代码时,我的CSV文件仍然包含第20行。有人能帮忙吗?你知道吗

import requests
from random import choice
from bs4 import BeautifulSoup
import pandas as pd

desktop_agents = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
             'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
             'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
             'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14',
             'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
             'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
             'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
             'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
             'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
             'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0']

url_template = "https://www.basketball-reference.com/teams/ATL/2018/gamelog-advanced/"

def random_headers():
return {'User-Agent': choice(desktop_agents),'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'}

url = url_template

page_request = requests.get(url,headers=random_headers())
soup = BeautifulSoup(page_request.text,"lxml")

column_headers = [th.getText() for th in 
        soup.findAll('tr', limit=2)[1].findAll('th')]

# get lineup data
data_rows = soup.findAll('tr')[2:] 
lineup_data = [[td.getText() for td in data_rows[i].findAll(['td','th'])]
    for i in range(len(data_rows))]

# Turn page data into a DataFrame
page_df = pd.DataFrame(lineup_data, columns=column_headers)
page_df.drop([20])

page_df.to_csv("GameLog.csv")

Tags: importurlmozilladatawindowspagechromesafari