Python中用于JSON提要的cURL方法

import json import sys import urllib2 from datetime import datetime import pymongo import pytz from utils import slugify # from utils import logger client = pymongo.MongoClient() db = client.artlogic def fetch_artworks(): # logger.debug("downloading artwork data from Artlogic") AL_artworks = [] AL_artists = [] url = "http://feeds.artlogic.net/artworks/artlogiconline/json/" while True: f = urllib2.urlopen(url) data = json.load(f) AL_artworks += data['rows'] # logger.debug("retrieved page %s of %s of artwork data" % (data['feed_data']['page'], data['feed_data']['no_of_pages'])) # Stop we are at the last page if data['feed_data']['page'] == data['feed_data']['no_of_pages']: break url = data['feed_data']['next_page_link'] # Now we have a list called ‘artworks’ in which all the descriptions are stored # We are going to put them into the mongoDB database, # Making sure that if the artwork is already encoded (an object with the same id # already is in the database) we update the existing description instead of # inserting a new one (‘upsert’). # logger.debug("updating local mongodb database with %s entries" % len(artworks)) for artwork in AL_artworks: # Mongo does not like keys that have a dot in their name, # this property does not seem to be used anyway so let us # delete it: if 'artworks.description2' in artwork: del artwork['artworks.description2'] # upsert int the database: db.AL_artworks.update({"id": artwork['id']}, artwork, upsert=True) # artwork['artist_id'] is not functioning properly db.AL_artists.update({"artist": artwork['artist']}, {"artist_sort": artwork['artist_sort'], "artist": artwork['artist'], "slug": slugify(artwork['artist'])}, upsert=True) # db.meta.update({"subject": "artworks"}, {"updated": datetime.now(pytz.utc), "subject": "artworks"}, upsert=True) return AL_artworks if __name__ == "__main__": fetch_artworks()

1条回答

网友

1楼 · 发布于 2024-06-01 05:19:48

首先，您可能喜欢requests库。在

否则，如果你想坚持stdlib，它将是这样的：

def fetchfile(url, dst):
    fi = urllib2.urlopen(url)
    fo = open(dst, 'wb')
    while True:
        chunk = fi.read(4096)
        if not chunk: break
        fo.write(chunk)


fetchfile(
    data['feed_data']['next_page_link'],
    os.path.join('/var/www/static', uuid.uuid1().get_hex()
)

除了正确的异常捕捉（如果您需要，我可以开发，但我相信文档会足够清晰）。在

您可以将fetchfile()放入异步作业的pool中，一次获取多个文件。在

相关问题更多 >

编程相关推荐

热门问题

热门文章