如何将python中的数据保存到csv-fi中

2024-04-26 11:51:37 发布

您现在位置:Python中文网/ 问答频道 /正文

我有一个程序,在最后打印一个“匹配”,我想把这个“匹配”中的数据保存到一个csv文件中,我该怎么做?我写了一些代码来保存这个变量,但是它什么也没写 我的代码是:

import shlex
import subprocess
import os
import platform
from bs4 import BeautifulSoup
import re
import csv
import pickle
def rename_files():
    file_list = os.listdir(r"C:\\PROJECT\\pdfs")
    print(file_list)
    saved_path = os.getcwd()
    print('Current working directory is '+saved_path)
    os.chdir(r'C:\\PROJECT\\pdfs')
    for file_name in file_list:
        os.rename(file_name, file_name.translate(None, " "))
    os.chdir(saved_path)
rename_files()

def run(command):
    if platform.system() != 'Windows':
        args = shlex.split(command)
    else:
        args = command
    s = subprocess.Popen(args,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    output, errors = s.communicate()
    return s.returncode == 0, output, errors

# Change this to your PDF file base directory
base_directory = 'C:\\PROJECT\\pdfs'
if not os.path.isdir(base_directory):
    print "%s is not a directory" % base_directory
    exit(1)
# Change this to your pdf2htmlEX executable location
bin_path = 'C:\\Python27\\pdfminer-20140328\\tools\\pdf2txt.py'
if not os.path.isfile(bin_path):
    print "Could not find %s" % bin_path
    exit(1)
for dir_path, dir_name_list, file_name_list in os.walk(base_directory):
    for file_name in file_name_list:
        # If this is not a PDF file
        if not file_name.endswith('.pdf'):
            # Skip it
            continue
        file_path = os.path.join(dir_path, file_name)
        # Convert your PDF to HTML here
        args = (bin_path, file_name, file_path)
        success, output, errors = run("python %s -o %s.html %s " %args)
        if not success:
            print "Could not convert %s to HTML" % file_path
            print "%s" % errors
htmls_path = 'C:\\PROJECT'
for dir_path, dir_name_list, file_name_list in os.walk(htmls_path):
    for file_name in file_name_list:
        if not file_name.endswith('.html'):
            continue
        with open(file_name) as markup:
            soup = BeautifulSoup(markup.read())
            text = soup.get_text()
            match = re.findall("PA/(\S*)\s*(\S*)", text)
            print(match)
with open ('score.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerows('%s' %match)

我试图将它保存到csv文件中的部分是最后3行代码。 这是“匹配”格式的打印件:https://gyazo.com/930f9dad12109bc50825c91b51fb31f3


Tags: csvpathnameinimportforbaseif
2条回答

假设已经有了“match”,就可以在Python中使用CSV module。作家应该完成你的工作。在

如果你能详细说明你的数据格式会更有帮助。在

按照代码的结构方式,迭代for循环中的匹配项,然后在循环完成后,将最后一个匹配项保存在CSV中。您可能希望将每个匹配项都写入CSV中,for循环中。在

尝试将代码的最后几行(从最后一个for循环开始)替换为:

with open('score.csv', 'wt') as f:
    writer = csv.writer(f)
    for dir_path, dir_name_list, file_name_list in os.walk(htmls_path):
        for file_name in file_name_list:
            if not file_name.endswith('.html'):
                continue
            with open(file_name) as markup:
                soup = BeautifulSoup(markup.read())
                text = soup.get_text()
                match = re.findall("PA/(\S*)\s*(\S*)", text)
                print(match)
                writer.writerow(match)

相关问题 更多 >