如何使用python将.txt文件转换为.xml文件

2024-04-19 15:36:38 发布

您现在位置:Python中文网/ 问答频道 /正文

我的abc.txt文档文件如下所示:

1
76 45 146 87

这是我的预期产出xyz.xml文件文件:

^{pr2}$

这是我尝试过的源代码:

import xml.etree.cElementTree as ET
root = ET.Element("root")
object = ET.SubElement(root, "object")
ET.SubElement(object, "label").text = "1"
cordinates = ET.SubElement(root, "cordinates")
ET.SubElement(cordinates, "xmin").text = "76"
ET.SubElement(cordinates, "ymin").text = "45"
ET.SubElement(cordinates, "xmin").text = "146"
ET.SubElement(cordinates, "xmax").text = "87"
tree = ET.ElementTree(root)
tree.write("xyz.xml")

但问题是,我已经手动做了标记和坐标。我想提取abc.txt文档在这个程序中文件和做的工作自动超过350.txt文件。有人能帮我重新写代码吗?任何帮助都将不胜感激。谢谢!在


Tags: 文件texttxttreeobject源代码rootxml
3条回答
import xml.etree.cElementTree as ET
import os


def toxml(lines, save_filepath):
    def generate_xml(obj, cordinates_arr, save_filepath):
        root_node = ET.Element("root")
        object_node = ET.SubElement(root_node, "object")
        ET.SubElement(object_node, "label").text = obj
        cordinates_node = ET.SubElement(root_node, "cordinates")
        ET.SubElement(cordinates_node, "xmin").text = cordinates_arr[0]
        ET.SubElement(cordinates_node, "ymin").text = cordinates_arr[1]
        ET.SubElement(cordinates_node, "xmin").text = cordinates_arr[2]
        ET.SubElement(cordinates_node, "xmax").text = cordinates_arr[3]
        tree = ET.ElementTree(root_node)
        tree.write(save_filepath)

    if len(lines) != 2:
        print("Invalid content: {}".format(lines))
    obj = lines[0].strip()
    cordinates = lines[1].strip()
    if len(obj) == '' or len(cordinates.split()) != 4:
        print("Invalid line format: {}".format(lines))
    # start generate
    generate_xml(obj, cordinates, save_filepath)

def entry(target_dir_path, save_dri_path):
    assert os.path.exists(target_dir_path), "Target directory is not exist: {}".format(target_dir_path)
    assert os.path.exists(save_dir_path), "Save directory is not exist: {}".format(target_dir_path)

    for filename in os.listdir(target_dir_path):
        file_full_path = os.path.join(target_dir_path, filename)
        filename_prefix, _ = os.path.splitext(filename)
        save_path = os.path.join(save_dir_path, "{}.xml".format(filename_prefix))
        try:
            with open(file_full_path) as ff:
                toxml(ff.readlines(), save_path)
        except Exception as ex:
            print("Generate {0} failed, with error msg: {1}.".format(filename, ex.__str__()))


if __name__ == '__main__':
    target_dir_path = '/path/to/you/wanna/convert'
    save_dir_path = '/path/to/you/wanna/save'
    entry(target_dir_path, save_dir_path)

代码已经测试过了,希望它能帮助你

您可以从创建一个函数开始,该函数接受txt_filexml_file的名称和输入,然后将txt_file的内容写入xml_file

import xml.etree.cElementTree as ET

def write_xml(txt_file, xml_file):
    label = 0
    text_list = []

    #list of name of coordinates
    coords_list = ['xmin', 'ymin', 'xmax', 'ymax']

    #Open the text file
    with open(txt_file) as fp:
        #Read the label and text string
        label, text_str = fp.readlines()
        #Create the list of text
        text_list = [item for item in text_str.split()]

    #Create the coordinates dictionary, with key as item of coords_list and values as item of text_list
    coord_dict = dict(zip(coords_list, text_list))

    #Create the xml file
    root = ET.Element("root")
    object = ET.SubElement(root, "object")
    ET.SubElement(object, "label").text = label

    cordinates = ET.SubElement(root, "cordinates")

    #Iterate through the coordinates dictionary and assign the elements
    for key, value in coord_dict.items():
        ET.SubElement(cordinates, key).text = value

    #Write to the xml file
    tree = ET.ElementTree(root)
    tree.write(xml_file)

然后可以调用函数write_xml('file.txt', 'xyz.xml')

现在您可以在一个循环中为所有txt文件调用此函数

^{pr2}$

您可以使用os.listdir列出txt files文件夹中的所有文件,然后使用os.path.join创建文件路径并将它们添加到列表中

import os
txt_files_folder = '<folder_with_txt_files>'
txt_file_names = []

for file in os.listdir(txt_files_folder):

    txt_file_path = os.path.join(txt_files_folder, file)
    txt_file_names.append(txt_file_path)

这段代码将读取对象名并将其协调到字典中,然后在字典中迭代以创建所需的相应元素。在

import xml.etree.cElementTree as ET

file_list = ['abc.txt', 'def.txt']
obj_coord = {}

for file in file_list:
    f = open(file, "r")
    object = f.readline()
    coord_string = f.readline()
    if not coord_string: 
        break
    coord_list = coord_string.split() 
    obj_coord[object] = coord_string
    f.close()


root = ET.Element("root")

for obj, coord in obj_coord.items():
    object = ET.SubElement(root, "object")
    ET.SubElement(object, "label").text = obj 
    cordinates = ET.SubElement(root, "cordinates")
    ET.SubElement(cordinates, "xmin").text = coord[0]
    ET.SubElement(cordinates, "ymin").text = coord[1]
    ET.SubElement(cordinates, "xmin").text = coord[2]
    ET.SubElement(cordinates, "xmax").text = coord[3]

tree = ET.ElementTree(root)
tree.write("xyz.xml")

相关问题 更多 >