从文本文件中找到一行的第四个实例，然后trun

import time import sched import urllib.request import shutil scheduler = sched.scheduler(time.time, time.sleep) def rss(): # Download the file from `url` and save it locally under `file_name`: with urllib.request.urlopen('http://any.website.here/rss') as response, open('test.xml', 'wb') as out_file: shutil.copyfileobj(response, out_file) print('Updating RSS') def trunc(): a = () a = open('test.xml', 'r+', encoding = 'utf-8') c = (0) for line in a: if a.readline() == '</item>': c = c+1 print(c, 'items found!' at ) if c == 4: return a.tell() a.seek(0), print(a.read()) a.close def scheduler_rss(): scheduler.enter(0, 1, rss, ()) # calls rss scheduler.run() trunc() #time.sleep(43200) #time in seconds, this is 12 hours time.sleep(30) #Variable for testing for i in range(100): scheduler_rss()

1条回答

网友

1楼 · 发布于 2024-04-19 16:30:50

如果您对另一种方法感兴趣，下面介绍如何使用python的xml.dom模块实现这一点。你也可以用xml.etree来做这个。你知道吗

from xml.dom.minidom import parse, parseString
dom = parse('test.xml')

... # download and save your xml
items = dom.getElementsByTagName('item')
for item in items:
    for child in item.childNodes[:4]:
        if len(child.childNodes) > 0:
            print(child.tagName + ':', child.firstChild.nodeValue)

为每个<item>标记打印这样的内容，直到第4个：

title: 110 - Matryoshka
pubDate: Thu, 15 Jun 2017 04:00:00 +0000
guid: ef49bfbd9603243db217053194cc2dc0
link: http://nightvale.libsyn.com/110-matryoshka
...

现在，要截断第4个元素以外的所有项：

parentNode = items[0].parentNode    
for i in range(4, len(items)):
    parentNode.removeChild(items[i])

dom.writexml(open('test2.xml', 'w'))

相关问题更多 >

编程相关推荐

热门问题

热门文章