我有以下代码。理解代码后,可以用
大写字母。我可以用insert or ignore
测试通道中是否有新项,但是
我正在尝试使用feed.updated_parsed
属性的更好的机制。为什么不管用
如预期的那样?你知道吗
from __future__ import unicode_literals
import feedparser
from sqlite3 import dbapi2 as sqlite
import sys, os
from datetime import datetime
from time import mktime
from daeutils import *
import re
import random
import optparse
import curses
import socket
def getActiveChannels():
"""Returns a list of active RSS channels"""
con = sqlite.connect(connectionString)
cur = con.cursor()
cur.execute("select id, title, xmlurl, updated from channels")
channels = cur.fetchall()
cur.close()
con.close()
return channels
def getItemsForChannel(xmlUrl, lastUpdate):
socket.setdefaulttimeout(60)
feedparserDictionary = feedparser.parse(xmlUrl)
updatedTime = datetime.fromtimestamp(mktime(feedparserDictionary.feed.updated_parsed))
lst = datetime.strptime(lastUpdate, "%Y-%m-%dT%H:%M:%S.%f")
if updatedTime < lst:
return [] # HERE NOT BEHAVING CORRECTLY, WHEN I COMMENT THIS LINE, THERE MAY BE A FEW ITEMS
items = feedparserDictionary.entries
print "There are new %d items" % len(items)
return items
def setChannelUpdateTime(xmlUrl, tm):
con = sqlite.connect(connectionString)
cur = con.cursor()
cur.execute("update channels set updated = :tm where xmlurl = :xmlUrl", locals())
con.commit()
print "updated successfully"
cur.close()
con.close()
if __name__ == "_main__":
con = sqlite.connect(connectionString)
for channel in getActiveChannels():
channelId, channelTitle, channelXmlUrl, lastChannelUpdate = channel
countOfNewItems = 0
items = getItemsForChannel(channelXmlUrl, lastChannelUpdate)
for item in items:
title, link, description, priority, updated = item
cur = con.cursor()
cur.execute("insert or ignore into feeds \
(title, link, description, read, updated, channelid) \
values (?, ?, ?, ?, ?, ?)", \
(title, link, description, 0, updated, channelId))
countOfNewItems += cur.rowcount # WHICH ARE INSERTED HERE
con.commit()
cur.close()
if countOfNewItems:
print "Found new items"
now = datetime.now().isoformat()
if "." not in now:
now = now + ".000000"
setChannelUpdateTime(channelXmlUrl, now)
以下是sqlite中的两个表:
CREATE TABLE channels (id integer primary key, title string, text string, description string, type string, xmlurl string unique, htmlurl string, priority integer, active integer, deactivated integer, updated text);
CREATE TABLE feeds (id integer primary key, title string, link string unique, description string, read integer, priority integer, updated string, channelid integer, foreign key (channelid) references channels(id));
我认为可能的错误是您试图比较feed上的
updated
字段,feed创建者可能不太支持这些feed。或时区格式,因为使用了isoformat或etc无论如何,我认为比较每个条目的
updated
属性比比较feed属性要好得多,feed属性主要用于使feed缓存无效。你知道吗下面是一个工作示例,其中我只返回函数中的新条目。你知道吗
它对数据库值中最后一次解析的日期使用from/to iso格式,并对每个条目进行比较,而不是基于feed
updated
属性的全局比较。你知道吗相关问题 更多 >
编程相关推荐