更新JSON文件的python脚本

{ "_notes": [], "deviceid": "353422071089618", "grp-milkuse/milksold": "0.0", "hh_id": "0753628391", "regdate": "2017-10-03", "gpsloc": "-9.10112605948487 32.897751368000414 1488.8210801196851 4.0", "grp-milkuse/milkprocess": "0.0", "_xform_id_string": "ADGG-LNG01-20170725", "hh_country": "2", "meta/instanceID": "uuid:0b3b8257-f154-42e8-ae17-119bfa04ceca", "_duration": "", "visitdate": "2017-10-03", "hh_region": "1007", "_geolocation": [ -9.10112605948487, 32.897751368000414 ], "hh_district": "1037", "datacollid": "0758990688", "grp-milkuse/milkcalf": "0.0", "_status": "submitted_via_web", "formhub/uuid": "98dfbfd65ef24a92a46d6f794e748627", "rpt_animrec": [ { "rpt_animrec/grp_animrec/cowmilked": "3", "rpt_animrec/calved": "2", "rpt_animrec/grp_animrec/tagid": "TZN000404015233", "rpt_animrec/injuries": "2", "rpt_animrec/anim_weight/heartgirth": "150.0", "rpt_animrec/anim_weight/bodyscore": "3.0", "rpt_animrec/parasites": "2", "rpt_animrec/grp_feedwater/watertype": "3", "rpt_animrec/anim_weight/weight": "200.0", "rpt_animrec/grp_feedwater/feedtype": "3 4", "rpt_animrec/vaccinated": "2", "rpt_animrec/served": "2" }, { "rpt_animrec/sirehastag": "2", "rpt_animrec/siredetails/sirename": "Nil", "rpt_animrec/grp_feedwater/feedtype": "2 3 4", "rpt_animrec/siredetails/sirebreed": "2", "rpt_animrec/vaccinated": "2", "rpt_animrec/calved": "2", "rpt_animrec/grp_animrec/tagid": "TZN000404015236", "rpt_animrec/injuries": "2", "rpt_animrec/anim_weight/bodyscore": "3.0", "rpt_animrec/grp_service/dateserv": "2016-12-22", "rpt_animrec/anim_weight/weight": "250.0", "rpt_animrec/siredetails/sirecnty": "Nil", "rpt_animrec/grp_servicedtls/servechange": "2", "rpt_animrec/grp_servicedtls/servsourcebull": "1", "rpt_animrec/parasites": "2", "rpt_animrec/grp_feedwater/watertype": "3", "rpt_animrec/served": "1", "rpt_animrec/grp_animrec/cowmilked": "3", "rpt_animrec/grp_servicedtls/servicechangeyes": "1", "rpt_animrec/grp_servicedtls/sercost": "15000.0", "rpt_animrec/anim_weight/heartgirth": "160.0", "rpt_animrec/siredetails/sirecomp": "5", "rpt_animrec/grp_service/servtype": "1" }, { "rpt_animrec/sirehastag": "2", "rpt_animrec/siredetails/sirename": "Nill", "rpt_animrec/grp_feedwater/feedtype": "1 3 4", "rpt_animrec/siredetails/sirebreed": "2", "rpt_animrec/vaccinated": "2", "rpt_animrec/calved": "2", "rpt_animrec/grp_animrec/tagid": "TZN000404015237", "rpt_animrec/injuries": "2", "rpt_animrec/anim_weight/bodyscore": "3.0", "rpt_animrec/grp_service/dateserv": "2017-02-09", "rpt_animrec/anim_weight/weight": "350.0", "rpt_animrec/siredetails/sirecnty": "Nill", "rpt_animrec/grp_servicedtls/servechange": "2", "rpt_animrec/grp_servicedtls/servsourcebull": "1", "rpt_animrec/parasites": "2", "rpt_animrec/grp_feedwater/watertype": "3", "rpt_animrec/served": "1", "rpt_animrec/grp_animrec/cowmilked": "2", "rpt_animrec/grp_servicedtls/servicechangeyes": "1", "rpt_animrec/drydate": "2017-07-22", "rpt_animrec/grp_servicedtls/sercost": "15000.0", "rpt_animrec/anim_weight/heartgirth": "170.0", "rpt_animrec/siredetails/sirecomp": "5", "rpt_animrec/grp_service/servtype": "1" } ], "_bamboo_dataset_id": "", "start_time": "2017-10-03T13:25:01.529+03", "_uuid": "0b3b8257-f154-42e8-ae17-119bfa04ceca", "_tags": [], "grp-milkuse/milkprice": "0.0", "_userform_id": "adggtnz_ADGG-LNG01-20170725", "_submitted_by": null, "meta/instanceName": "ADGG-LNG01-20170725-HH0753628391", "enumtype": "2", "hh_village": "4835", "grp-milkuse/milkconsumed": "0.0", "_submission_time": "2017-10-05T18:35:19", "_version": "20170725", "_attachments": [], "end_time": "2017-10-03T13:31:28.876+03", "hh_kebele": "1807", "_id": 369982

import json import os #python code to update TZN to ETH json_dir="/opt/new/file/20180116/" json_dir_processed="/opt/new/file/20180116updated/" for json_file in os.listdir(json_dir): if json_file.endswith(".json"): processed_json = "%s%s" % (json_dir_processed, json_file) json_file = json_dir + json_file print "Processing %s -> %s" % (json_file, processed_json) with open(json_file, 'r') as f: json_data = json.load(f) json_data['TZN'] = json_data['ETH'] with open(processed_json, 'w') as f: f.write(json.dumps(json_data, indent=4)) else: print "%s not a JSON file" % json_file

3条回答

网友

1楼 · 编辑于 2024-05-16 15:15:15

如果只想用另一个子字符串替换非特定位置中的一个子字符串，为什么还要费心解析json呢？只需直接对file对象返回的json字符串进行操作。你甚至不需要json模块。你知道吗

import os
#python code to update TZN to ETH
json_dir="/opt/new/file/20180116/"
json_dir_processed="/opt/new/file/20180116updated/"
for json_file in os.listdir(json_dir):
    if json_file.endswith(".json"):
        processed_json = "%s%s" % (json_dir_processed, json_file)
        json_file = "%s%s" % (json_dir, json_file)
        print "Processing %s -> %s" % (json_file, processed_json)
        open(processed_json, 'w').write(
            open(json_file).read().replace('TZN', 'ETH'))
    else:
        print "%s not a JSON file" % json_file

网友

2楼 · 编辑于 2024-05-16 15:15:15

import json
import glob
import os
import re

from pathlib import Path


def get_json_files():
    return glob.glob('/home/test/PycharmProjects/test/*.json')


def read_json_file(filename):
    with open(filename) as f:
        data = json.load(f)

    return data


def process_json(json_data):
    for selection in json_data['rpt_animrec']:
        if selection['rpt_animrec/grp_animrec/tagid']:
            processed_data = selection['rpt_animrec/grp_animrec/tagid']
            processed_data = re.sub('TZN', "ETH", processed_data)
            selection['rpt_animrec/grp_animrec/tagid'] = processed_data
    return json_data


def write_json(json_data, file_path):
    filename = os.path.basename(file_path)
    json_dir_processed = Path("/home/test/PycharmProjects/test/processed/{}".format(filename))
    with open(json_dir_processed, 'w') as f:
        f.write(json.dumps(json_data, indent=4))


def process_json_files(json_files):
    for json_file in json_files:
        print("Processing {}".format(json_file))
        json_data = read_json_file(json_file)
        write_json(process_json(json_data), json_file)


if __name__ == '__main__':
    json_files_list = get_json_files()
    process_json_files(json_files_list)

这就是我要做的改变。你知道吗

网友

3楼 · 编辑于 2024-05-16 15:15:15

import json
import os

def process(fp):
    json_data = json.loads(fp.read())
    for d in json_data.get('rpt_animrec', []):
        key = "rpt_animrec/grp_animrec/tagid"
        d[key] = d[key].replace('TZN', 'ETH')

    return json.dumps(json_data, indent=4)


source_path = '/opt/new/file/20180116/'
dest_path = '/opt/new/file/20180116updated/'

for path, dirnames, fnames in os.walk(source_path, topdown=True):
    for fname in filter(lambda name: name.endswith('.json'), fnames):
        full_path = os.path.join(path, fname)
        end_path = os.path.join(dest_path, fname)
        print('Processing: {full_path} => {dest_path}'.format(
            full_path=full_path,
            dest_path=end_path
        ))
        with open(full_path, 'r') as in_fp, open(dest_path, 'w') as out_fp:
            out_fp.write(process(in_fp))

如果要遍历整个路径（包括子目录），可以尝试这样的操作；如果不想，可以将topdown更改为False。你知道吗

相关问题更多 >

编程相关推荐

热门问题

热门文章