创建一个python脚本，它将读取csv文件，并使用该输入从web抓取数据finviz.com网站然后将数据导出到csv fi中

import csv import urllib.request from bs4 import BeautifulSoup with open('shortlist.csv', 'r') as csvfile: reader = csv.reader(csvfile, delimiter=',') name = None for row in reader: if row[0]: name = row[0] print(name) write_header = True sauce = print(name) soup = BeautifulSoup(sauce.text, 'html.parser') print(soup.title.text) symbols = name """" print(symbols) """ URL_BASE = "https://finviz.com/quote.ashx?t=" with open('output.csv', 'w', newline='') as file: writer = csv.writer(file) for ticker in symbols: URL = URL_BASE + ticker try: fpage = urllib.request.urlopen(URL) fsoup = BeautifulSoup(fpage, 'html.parser') if write_header: # note the change writer.writerow(['ticker'] + list(map(lambda e: e.text, fsoup.find_all('td', {'class': 'snapshot-td2-cp'})))) write_header = False # note the change writer.writerow([ticker] + list(map(lambda e: e.text, fsoup.find_all('td', {'class': 'snapshot-td2'})))) except urllib.request.HTTPError: print("{} - not found".format(URL))

1条回答

网友

1楼 · 发布于 2024-04-19 05:49:39

import csv
import urllib.request
from bs4 import BeautifulSoup

with open('shortlist.csv', 'r') as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    name = None
    for row in reader:
        if row[0]:
            name = row[0]
        print(name)
write_header = True

#sauce = print(name)
#soup = BeautifulSoup(sauce.text, 'html.parser')

#print(soup.title.text)

symbols = name
""""
print(symbols)
"""
URL_BASE = "https://finviz.com/quote.ashx?t="

with open('output.csv', 'w', newline='') as file:
    writer = csv.writer(file)

    for ticker in symbols:
        URL = URL_BASE + ticker
        try:
            fpage = urllib.request.urlopen(URL)
            fsoup = BeautifulSoup(fpage, 'html.parser')

            if write_header:
                # note the change
                writer.writerow(['ticker'] + list(map(lambda e: e.text, fsoup.find_all('td', {'class': 'snapshot-td2-cp'}))))
                write_header = False

            # note the change
            writer.writerow([ticker] + list(map(lambda e: e.text, fsoup.find_all('td', {'class': 'snapshot-td2'}))))
        except urllib.request.HTTPError:

这是输出： enter image description here

相关问题更多 >

编程相关推荐

热门问题

热门文章