将两个python数据处理脚本组合成一个脚本

{ "links": { "self": "http://localhost:2510/api/v2/jobs?skills=data%20science" }, "data": [ { "id": 121, "type": "job", "attributes": { "title": "Data Scientist", "date": "2014-01-22T15:25:00.000Z", "description": "Data scientists are in increasingly high demand amongst tech companies in London. Generally a combination of business acumen and technical skills are sought. Big data experience ..." }, "relationships": { "location": { "links": { "self": "http://localhost:2510/api/v2/jobs/121/location" }, "data": { "type": "location", "id": 3 } }, "country": { "links": { "self": "http://localhost:2510/api/v2/jobs/121/country" }, "data": { "type": "country", "id": 1 } },

import json from collections import defaultdict from pprint import pprint with open('data-science.txt') as data_file: data = json.load(data_file) locations = defaultdict(int) for item in data['data']: location = item['relationships']['location']['data']['id'] locations[location] += 1 pprint(locations)

"included": [ { "id": 3, "type": "location", "attributes": { "name": "Victoria", "coord": [ 51.503378, -0.139134 ] } },

import json from collections import defaultdict from pprint import pprint with open('data-science.txt') as data_file: data = json.load(data_file) locations = defaultdict(int) for record in data['included']: id = record.get('id', None) name = record.get('attributes', {}).get('name', None) coord = record.get('attributes', {}).get('coord', None) print(id, name, coord)

3 Victoria [51.503378, -0.139134] 1 United Kingdom None 71 data science None 32 None None 3 Victoria [51.503378, -0.139134] 1 United Kingdom None 1 data mining None 22 data analysis None 33 sdlc None 38 artificial intelligence None 39 machine learning None 40 software development None 71 data science None 93 devops None 63 None None 52 Cubitt Town [51.505199, -0.018848]

1条回答

网友

1楼 · 发布于 2024-04-19 01:04:27

使用functions是组合这两个脚本的一种方法，毕竟它们处理相同的数据。因此，您应该为每个处理逻辑块创建一个函数，然后将结果合并到一起：

import json
from collections import defaultdict
from pprint import pprint

def process_locations_data(data):
    # processes the 'data' block
    locations = defaultdict(int)
    for item in data['data']:
        location = item['relationships']['location']['data']['id']
        locations[location] += 1
    return locations

def process_locations_included(data):
    # processes the 'included' block
    return_list = []
    for record in data['included']:
        id = record.get('id', None)
        name = record.get('attributes', {}).get('name', None)
        coord = record.get('attributes', {}).get('coord', None)
        return_list.append((id, name, coord))
    return return_list    # return list of tuples

# load the data from file once
with open('data-science.txt') as data_file:
    data = json.load(data_file)

# use the two functions on same data
locations = process_locations_data(data)
records = process_locations_included(data)

# combine the data for printing
for record in records:
    id, name, coord = record
    references = locations[id]   # lookup the references in the dict
    print id, name, coord, references

函数可以有更好的名称，但这应该可以实现您正在寻找的统一。你知道吗

相关问题更多 >

编程相关推荐

热门问题

热门文章