如何将python中的数据保存到csv-fi中

import shlex import subprocess import os import platform from bs4 import BeautifulSoup import re import csv import pickle def rename_files(): file_list = os.listdir(r"C:\\PROJECT\\pdfs") print(file_list) saved_path = os.getcwd() print('Current working directory is '+saved_path) os.chdir(r'C:\\PROJECT\\pdfs') for file_name in file_list: os.rename(file_name, file_name.translate(None, " ")) os.chdir(saved_path) rename_files() def run(command): if platform.system() != 'Windows': args = shlex.split(command) else: args = command s = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output, errors = s.communicate() return s.returncode == 0, output, errors # Change this to your PDF file base directory base_directory = 'C:\\PROJECT\\pdfs' if not os.path.isdir(base_directory): print "%s is not a directory" % base_directory exit(1) # Change this to your pdf2htmlEX executable location bin_path = 'C:\\Python27\\pdfminer-20140328\\tools\\pdf2txt.py' if not os.path.isfile(bin_path): print "Could not find %s" % bin_path exit(1) for dir_path, dir_name_list, file_name_list in os.walk(base_directory): for file_name in file_name_list: # If this is not a PDF file if not file_name.endswith('.pdf'): # Skip it continue file_path = os.path.join(dir_path, file_name) # Convert your PDF to HTML here args = (bin_path, file_name, file_path) success, output, errors = run("python %s -o %s.html %s " %args) if not success: print "Could not convert %s to HTML" % file_path print "%s" % errors htmls_path = 'C:\\PROJECT' for dir_path, dir_name_list, file_name_list in os.walk(htmls_path): for file_name in file_name_list: if not file_name.endswith('.html'): continue with open(file_name) as markup: soup = BeautifulSoup(markup.read()) text = soup.get_text() match = re.findall("PA/(\S*)\s*(\S*)", text) print(match) with open ('score.csv', 'w') as f: writer = csv.writer(f) writer.writerows('%s' %match)

2条回答

网友

1楼 · 编辑于 2024-04-26 11:51:37

假设已经有了“match”，就可以在Python中使用CSV module。作家应该完成你的工作。在

如果你能详细说明你的数据格式会更有帮助。在

网友

2楼 · 编辑于 2024-04-26 11:51:37

按照代码的结构方式，迭代for循环中的匹配项，然后在循环完成后，将最后一个匹配项保存在CSV中。您可能希望将每个匹配项都写入CSV中，for循环中。在

尝试将代码的最后几行（从最后一个for循环开始）替换为：

with open('score.csv', 'wt') as f:
    writer = csv.writer(f)
    for dir_path, dir_name_list, file_name_list in os.walk(htmls_path):
        for file_name in file_name_list:
            if not file_name.endswith('.html'):
                continue
            with open(file_name) as markup:
                soup = BeautifulSoup(markup.read())
                text = soup.get_text()
                match = re.findall("PA/(\S*)\s*(\S*)", text)
                print(match)
                writer.writerow(match)

相关问题更多 >

编程相关推荐

热门问题

热门文章