我终于能够从网站上抓取数据了!并将标题和日期打印到终端。但是我想把它保存到一个CSV文件中,其中有一列标题和一列日期。我该怎么做
我的代码附在下面:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument('disable-infobars')
driver = webdriver.Chrome(
chrome_options=options,
executable_path=r"//usr/local/Caskroom/chromedriver/81.0.4044.69/chromedriver")
driver.get(
"https://www.nytimes.com/search?dropmab=true&endDate=20180111&query=nyc§ions=New%20York%7Cnyt%3A%2F%2Fsection%2F39480374-66d3-5603-9ce1-58cfa12988e2&sort=best&startDate=20180107")
myLength = len(WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located(
(By.XPATH, "//figure[@class='css-tap2ym']//following::a[1]"))))
while True:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
try:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable(
(By.XPATH, "//div[@class='css-vsuiox']//button[@data-testid='search-show-more-button']"))).click()
WebDriverWait(driver, 20).until(lambda driver: len(driver.find_elements_by_xpath(
"//figure[@class='css-tap2ym']//following::a[1]")) > myLength)
titles = driver.find_elements_by_xpath(
"//figure[@class='css-tap2ym']//following::a[1]")
myLength = len(titles)
except TimeoutException:
break
headlines_element = driver.find_elements_by_xpath('//p[@class="css-16nhkrn"]')
headlines = [x.text for x in eheadlines_element]
print('headlines:')
print(headlines, '\n')
dates_element = driver.find_elements_by_xpath("//time[@class='css-17ubb9w']")
dates = [x.text for x in dates_element]
print("dates:")
print(dates, '\n')
for headlines, dates in zip(headlines, dates):
print("Headlines : Dates")
print(headlines + ": " + dates, '\n')
driver.quit()
最后一段代码才是标题和日期。提前感谢您的帮助
您可以使用^{} 将数据写入csv文件
使用:
相关问题 更多 >
编程相关推荐