使用Python创建的XML中代码被丢弃

0 投票
2 回答
1618 浏览
提问于 2025-04-17 12:05

我正在用Python复制并更新一个元数据的xml文件,这个过程运行得很好,除了原始元文件中的以下代码被删除了。

<?xml version="1.0" encoding="utf-8"?><?xml-stylesheet type='text/xsl' href='ANZMeta.xsl'?>

这段代码需要放在文件的开头。

在PHP中解决这个问题的方法是 @ 在xml文件的特定位置插入内容,但我需要一个Python的解决方案。

代码和详细解释在我最初的帖子里,但我把这个问题分开来问,因为它和我之前遇到的问题不同。 使用Python在xml/text文件中搜索和替换多行

谢谢,

完整代码

import os, xml, arcpy, shutil, datetime, Tkinter, tkFileDialog, tkSimpleDialog
from xml.etree import ElementTree as et 

path=os.getcwd()
RootDirectory=path
currentPath=path
arcpy.env.workspace = path
Count=0
DECLARATION = """<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type='text/xsl' href='ANZMeta.xsl'?>\n"""
Generated_XMLs=RootDirectory+'\GeneratedXML_LOG.txt'
f = open(Generated_XMLs, 'a')
f.write("Log of Metadata Creation Process - Update: "+str(datetime.datetime.now())+"\n")
f.close()

for root, dirs, files in os.walk(RootDirectory, topdown=False):
    #print root, dirs
    for directory in dirs:
        try:
            currentPath=os.path.join(root,directory)
        except:
            pass
        os.chdir(currentPath)
        arcpy.env.workspace = currentPath
        print currentPath
#def Create_xml(currentPath):

        FileList = arcpy.ListFeatureClasses()
        zone="_Zone"

        for File in FileList:
            Count+=1
            FileDesc_obj = arcpy.Describe(File)
            FileNm=FileDesc_obj.file
            check_meta=os.listdir(currentPath)
            existingXML=FileNm[:FileNm.find('.')]
            existingExtension=FileNm[FileNm.find('.'):]
            print "XML: "+existingXML
            #print check_meta
            #if  existingXML+'.xml' in check_meta:
            #newMetaFile='new'
            for f in check_meta:
                if f.startswith(existingXML) and f.endswith('.xml'):
                    print "exists, file name:", f
                    newMetaFile=FileNm+"_2012Metadata.xml"
                    try:
                        shutil.copy2(f, newMetaFile)
                    except:
                        pass
                    break
                else:
                    #print "Does not exist"
                    newMetaFile=FileNm+"_BaseMetadata.xml"

            print "New meta file: "+newMetaFile+ " for: "+File
            if newMetaFile.endswith('_BaseMetadata.xml'):        
                print "calling tkinter"
                root = Tkinter.Tk()
                root.withdraw()
                file = tkFileDialog.askopenfile(parent=root,mode='rb',title='Choose a xml base file to match with: '+File)
                if file != None:
                    metafile=os.path.abspath(file.name)
                    file.close()
                    #print metafile
                    shutil.copy2(metafile,newMetaFile)
                    print "copied"+metafile
                    root.destroy

                else:
                    shutil.copy2('L:\Data_Admin\QA\Metadata_python_toolset\Master_Metadata.xml', newMetaFile)
                    #root = Tkinter.Tk()
                    #root.withdraw()
                    #newTitle=tkSimpleDialog.askstring('title', 'prompt')
                    #root.destroy
                    #print newTitle

            print "Parsing meta file: "+newMetaFile
            tree=et.parse(newMetaFile)        
            print "Processing: "+str(File)

            for node in tree.findall('.//title'):
                node.text = str(FileNm)
            for node in tree.findall('.//procstep/srcused'):
                node.text = str(currentPath+"\\"+existingXML+".xml")
            dt=dt=str(datetime.datetime.now())
            for node in tree.findall('.//procstep/date'):
                node.text = str(dt[:10])
            for node in tree.findall('.//procstep/time'):
                node.text = str(dt[11:13]+dt[16:19])
            for node in tree.findall('.//metd/date'):
                node.text = str(dt[:10])
            for node in tree.findall('.//northbc'):
                node.text = str(FileDesc_obj.extent.YMax)
            for node in tree.findall('.//southbc'):
                node.text = str(FileDesc_obj.extent.YMin)
            for node in tree.findall('.//westbc'):
                node.text = str(FileDesc_obj.extent.XMin)
            for node in tree.findall('.//eastbc'):
                node.text = str(FileDesc_obj.extent.XMax)        
            for node in tree.findall('.//native/nondig/formname'):
                node.text = str(os.getcwd()+"\\"+File)
            for node in tree.findall('.//native/digform/formname'):
                node.text = str(FileDesc_obj.featureType)
            for node in tree.findall('.//avlform/nondig/formname'):
                node.text = str(FileDesc_obj.extension)
            for node in tree.findall('.//avlform/digform/formname'):
                node.text = str(float(os.path.getsize(File))/int(1024))+" KB"
            for node in tree.findall('.//theme'):
                node.text = str(FileDesc_obj.spatialReference.name +" ; EPSG: "+str(FileDesc_obj.spatialReference.factoryCode))
            print node.text
            projection_info=[]
            Zone=FileDesc_obj.spatialReference.name

            if "GCS" in str(FileDesc_obj.spatialReference.name):
                projection_info=[FileDesc_obj.spatialReference.GCSName, FileDesc_obj.spatialReference.angularUnitName, FileDesc_obj.spatialReference.datumName, FileDesc_obj.spatialReference.spheroidName]
                print "Geographic Coordinate system"
            else:
                projection_info=[FileDesc_obj.spatialReference.datumName, FileDesc_obj.spatialReference.spheroidName, FileDesc_obj.spatialReference.angularUnitName, Zone[Zone.rfind(zone)-3:]]
                print "Projected Coordinate system"
            x=0
            for node in tree.findall('.//spdom'):
                for node2 in node.findall('.//keyword'):
                    #print node2.text
                    node2.text = str(projection_info[x])
                    #print node2.text
                    x=x+1


            tree.write(newMetaFile)
            with open(newMetaFile, 'w') as output: # would be better to write to temp file and rename
                output.write(DECLARATION)
                tree.write(output, xml_declaration=False, encoding='utf-8') 
    # xml_declaration=False - don't write default declaration   

            f = open(Generated_XMLs, 'a')
            f.write(str(Count)+": "+File+"; "+newMetaFile+"; "+currentPath+";"+existingXML+"\n")
            f.close()



    #        Create_xml(currentPath)

来自Wing IDE的错误信息

xml.parsers.expat.ExpatError: 找不到元素:第3行,第0列 文件 "L:\Data_Admin\QA\Metadata_python_toolset\test2\update_Metadata1f.py", 第78行, 在 tree=et.parse(newMetaFile) 文件 "C:\Python26\ArcGIS10.0\Lib\xml\etree\ElementTree.py",第862行,在 parse tree.parse(source, parser) 文件 "C:\Python26\ArcGIS10.0\Lib\xml\etree\ElementTree.py",第587行,在 parse self._root = parser.close() 文件 "C:\Python26\ArcGIS10.0\Lib\xml\etree\ElementTree.py",第1254行,在 close self._parser.Parse("", 1) # 数据结束

2 个回答

1

如果你所有的xml文件都有相同的声明,你可以自己写一个:

import xml.etree.ElementTree as ET


DECLARATION = """<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type='text/xsl' href='ANZMeta.xsl'?>\n"""

tree = ET.parse(filename)
# do some work on tree

with open(filename, 'w') as output: # would be better to write to temp file and rename
    output.write(DECLARATION)
    tree.write(output, xml_declaration=False, encoding='utf-8') 
    # xml_declaration=False - don't write default declaration
2

我也曾经在给ElementTree文档开头添加处理指令(PI)时遇到困难。最后我想出了一个办法,就是使用一个假的根节点(元素标签用None表示)来存放需要的处理指令,然后再放入真正的文档根节点。

import xml.etree.ElementTree as ET

# Build your XML document as normal...
root = ET.Element('root')

# Create 'fake' root node
fake_root = ET.Element(None)

# Add desired processing instructions.  Repeat as necessary.
pi = ET.PI("xml-stylesheet", "type='text/xsl' href='ANZMeta.xsl'")
pi.tail = "\n"
fake_root.append(pi)

# Add real root as last child of fake root
fake_root.append(root)

# Write to file, using ElementTree.write( ) to generate <?xml ...?> tag.
tree = ET.ElementTree(fake_root)
tree.write("doc.xml", xml_declaration=True)

生成的doc.xml文件如下:

<?xml version='1.0' encoding='us-ascii'?>
<?xml-stylesheet type='text/xsl' href='ANZMeta.xsl'?>
<root />

撰写回答