Python Selenium将数据存储到CSV中的特定列？

from bs4 import BeautifulSoup from selenium import webdriver import html5lib import time import requests driver_path = '/usr/local/bin/chromedriver 2' driver = webdriver.Chrome(driver_path) driver.implicitly_wait(10) driver.get('https://www.tenniswarehouse-europe.com/zzz/producttracker_bl.html?ccode=SWIMG030') try: iframe = driver.find_elements_by_tag_name('iframe') for i in range(0, len(iframe)): f = driver.find_elements_by_tag_name('iframe')[i] driver.switch_to.frame(i) # your work to extract link text = driver.find_element_by_tag_name('body').text text = text.replace("Code: ","") text = text.replace("No Copy Images to TW Server","") print(text) driver.switch_to_default_content() finally: driver.quit() resp = requests.get('https://www.tenniswarehouse-europe.com/zzz/producttracker_bl.html?ccode=SWIMG030') soup = BeautifulSoup(resp.text,"lxml") for frame in soup.findAll('img'): link = (frame['src']) link = link.split('=')[1] print ((link[0:-9]))

1条回答

网友

1楼 · 发布于 2024-04-19 16:50:21

当您编写driver.switch_to.frame(i)时，您基本上是在访问iframe html元素。像普通的html页面一样，您也可以访问它的内部元素。在

从你之前的问题来看，iframe就像

<body>
<a href="http://www.test2.com" target="_blank">
<img src="https://img2.test2.com/LWBAD-1.jpg"></a>
<br/>Code: LWBAD

您可以通过

^{pr2}$

并将其存储在csv文件中

代码：

from bs4 import BeautifulSoup
from selenium import webdriver
import html5lib
import time
import requests
import csv

driver_path = '/usr/local/bin/chromedriver 2'
driver = webdriver.Chrome(driver_path)
driver.implicitly_wait(10)

driver.get('https://www.example.com')

iframe = driver.find_elements_by_tag_name('iframe')
images = driver.find_elements_by_tag_name('img')
with open('file_name.csv', 'w', newline='') as csvfile:
    field_names = ['text', 'src']
    writer = csv.DictWriter(csvfile, fieldnames=field_names)
    writer.writerow({'text': 'text', 'src': 'src'})
    for i in range(0, len(iframe)):
        f = driver.find_elements_by_tag_name('iframe')[i]
        img_src = images[i].get_attribute('src')

        # do the src splitting here
        img_src = img_src.split('=')[1]

        driver.switch_to.frame(i)

        text = driver.find_element_by_tag_name('body').text


        text = text.replace("Code: ", "")
        text = text.replace("No Copy Images to TW Server", "")
        print(text)
        writer.writerow({'text': text, 'src': img_src})

        driver.switch_to_default_content()
driver.quit()

相关问题更多 >

编程相关推荐

热门问题

热门文章