如何在Python中每列保存一个单词的字符串？

<span class="name"> <img src="/images/famt-placeholder-sm.jpg" class="thumb" alt="Tiffani D Abraham"> Tiffani D Abraham</span> import mechanize from lxml import html import csv import io from time import sleep def save_products (products, writer): for product in products: for price in product['prices']: writer.writerow([ product["title"].encode('utf-8') ]) writer.writerow([ price["contact"].encode('utf-8') ]) writer.writerow([ price["services"].encode('utf-8') ]) f_out = open('mtResult.csv', 'wb') writer = csv.writer(f_out) links = ["https://www.amtamassage.org/findamassage/results.html?match=exact&l=NY","https://www.amtamassage.org/findamassage/results.html?match=exact&l=NY&PageIndex=2&PageSize=10","https://www.amtamassage.org/findamassage/results.html?match=exact&l=NY&PageIndex=3&PageSize=10","https://www.amtamassage.org/findamassage/results.html?match=exact&l=NY&PageIndex=4&PageSize=10","https://www.amtamassage.org/findamassage/results.html?match=exact&l=NY&PageIndex=5&PageSize=10","https://www.amtamassage.org/findamassage/results.html?match=exact&l=NY&PageIndex=6&PageSize=10","https://www.amtamassage.org/findamassage/results.html?match=exact&l=NY&PageIndex=7&PageSize=10", "https://www.amtamassage.org/findamassage/results.html?match=exact&l=NY&PageIndex=8&PageSize=10", "https://www.amtamassage.org/findamassage/results.html?match=exact&l=NY&PageIndex=9&PageSize=10", "https://www.amtamassage.org/findamassage/results.html?match=exact&l=NY&PageIndex=10&PageSize=10" ] br = mechanize.Browser() for link in links: print(link) r = br.open(link) content = r.read() products = [] tree = html.fromstring(content) product_nodes = tree.xpath('//ul[@class="famt-results"]/li') for product_node in product_nodes: product = {} price_nodes = product_node.xpath('.//a') product['prices'] = [] for price_node in price_nodes: price = {} try: product['title'] = product_node.xpath('.//span[1]/text()')[0] except: product['title'] = "" try: price['services'] = price_node.xpath('./span[2]/text()')[0] except: price['services'] = "" try: price['contact'] = price_node.xpath('./span[3]/text()')[0] except: price['contact'] = "" product['prices'].append(price) products.append(product) save_products(products, writer) f_out.close()

1条回答

网友

1楼 · 发布于 2024-06-16 09:52:09

我不确定这是否解决了你的问题，但无论哪种方式有一些改进和修改，你可能会感兴趣。你知道吗

例如，由于每个链接因页面索引而异，因此您可以轻松地循环浏览链接，而不是将所有50个链接复制到一个列表中。每页的每个治疗师都有自己的索引，所以你也可以通过XPath循环每个治疗师的信息。你知道吗

#import modules
import mechanize
from lxml import html
import csv
import io

#open browser
br = mechanize.Browser()

#create file headers
titles = ["NAME"]
services = ["TECHNIQUE(S)"]
contacts = ["CONTACT INFO"]

#loop through all 50 webpages for therapist data
for link_index in range(1,50):

    link = "https://www.amtamassage.org/findamassage/results.html?match=exact&l=NY&PageIndex=" + str(link_index) + "&PageSize=10"
    r = br.open(link)
    page = r.read()      
    tree = html.fromstring(page)        

    #loop through therapist data for each therapist per page
    for therapist_index in range(1,10):

        #store names
        title = tree.xpath('//*[@id="content"]/div[2]/ul[1]/li[' + str(therapist_index) + ']/a/span[1]/text()')
        titles.append(" ".join(title))

        #store techniques and convert to unicode
        service = tree.xpath('//*[@id="content"]/div[2]/ul[1]/li[' + str(therapist_index) + ']/a/span[2]/text()')
        try:
            services.append(service[0].encode("utf-8"))
        except:
            services.append(" ")

        #store contact info and convert to unicode
        contact = tree.xpath('//*[@id="content"]/div[2]/ul[1]/li[' + str(therapist_index) + ']/a/span[3]/text()')
        try:
            contacts.append(contact[0].encode("utf-8"))
        except:
            contacts.append(" ")

#open file to write to
f_out = open('mtResult.csv', 'wb')
writer = csv.writer(f_out)

#get rows in correct format
rows = zip(titles, services, contacts)

#write csv line by line
for row in rows:
    writer.writerow(row)
f_out.close()

脚本在提供的网页上循环浏览了所有50个链接，如果提供的话，似乎是在为每个治疗师搜集所有相关信息。最后，它将所有数据打印到一个csv中，所有数据存储在“Name”、“technology（s）”和“Contact Info”的相应列中，如果这是您最初遇到的问题。你知道吗

希望这有帮助！你知道吗

相关问题更多 >

编程相关推荐

热门问题

热门文章