将json文件合并到on中的最佳方法

folder1/ file1.json file2.json file3.json folder2/ file1.json file2.json file3.json file4.json folder3/ file1.json file2.json file3.json file4.json file5.json ....

from pathlib import Path output_dir = Path(location_of_output_folder) output_dir.mkdir(parents=True, exist_ok=True) # find all the folders root_dir = Path(root_location_for_folders) folders = [fld for fld in root_dir.iterdir() if fld.is_dir()] # find all the unique file names all_filenames = [] for fld in folders: for f in fld.glob('*.json'): all_filenames.append(f.name) ## Approach 1 # Join file that possibly exists across all the folders by creating empty list for f in list(set(all_filenames)): f_data = [] for fld in folders: if (fld / f).is_file(): with open(fld /f, 'r') as fp: f_data.append(json.load(fp)) with open(output_dir / f, 'w') as fp: json.dump(f_data, fp, indent=4) ## Approach 2 # Join file that possibly exists across all the folders by creating empty dict for f in list(set(all_filenames)): f_data = {} for fld in folders: if (fld / f).is_file(): with open(fld /f, 'r') as fp: f_data.update(json.load(fp)) with open(output_dir / f, 'w') as fp: json.dump(f_data, fp, indent=4)

{ k1: { l1: 11, l2: 12, l3: 13, l4: 14, }, k2: { l1: 21, l2: 22, l3: 23, l4: 24, } ..... }

{ k8: { l1: 41, l2: 42, l3: 43, l4: 44, }, k9: { l1: 51, l2: 52, l3: 53, l4: 54, } ..... }

3条回答

网友

1楼 · 编辑于 2024-04-26 06:41:54

编辑：我知道此解决方案不再符合要求，我将很快更新它。你知道吗

暂且不谈这件事是否重要的问题，下面是我的想法。你知道吗

import glob
import json

file_names = glob.glob('../resources/json_files/*.json')

json_list = []

for curr_f_name in file_names:
    with open(curr_f_name) as curr_f_obj:
        json_list.append(json.load(curr_f_obj))

with open('../out/json_merge_out.json', 'w') as out_file:
    json.dump(json_list, out_file, indent=4)

包含JSON文件的目录：

example_1.json：

{
    "fruit": "Apple",
    "size": "Large",
    "color": "Red"
}

example_2.json：

{
    "quiz": {
        "sport": {
            "q1": {
                "question": "Which one is correct team name in NBA?",
                "options": [
                    "New York Bulls",
                    "Los Angeles Kings",
                    "Golden State Warriros",
                    "Huston Rocket"
                ],
                "answer": "Huston Rocket"
            }
        },
        "maths": {
            "q1": {
                "question": "5 + 7 = ?",
                "options": [
                    "10",
                    "11",
                    "12",
                    "13"
                ],
                "answer": "12"
            },
            "q2": {
                "question": "12 - 8 = ?",
                "options": [
                    "1",
                    "2",
                    "3",
                    "4"
                ],
                "answer": "4"
            }
        }
    }
}

输出文件的内容json_merge_out.json：

[
    {
        "quiz": {
            "sport": {
                "q1": {
                    "question": "Which one is correct team name in NBA?",
                    "options": [
                        "New York Bulls",
                        "Los Angeles Kings",
                        "Golden State Warriros",
                        "Huston Rocket"
                    ],
                    "answer": "Huston Rocket"
                }
            },
            "maths": {
                "q1": {
                    "question": "5 + 7 = ?",
                    "options": [
                        "10",
                        "11",
                        "12",
                        "13"
                    ],
                    "answer": "12"
                },
                "q2": {
                    "question": "12 - 8 = ?",
                    "options": [
                        "1",
                        "2",
                        "3",
                        "4"
                    ],
                    "answer": "4"
                }
            }
        }
    },
    {
        "fruit": "Apple",
        "size": "Large",
        "color": "Red"
    }
]

网友

2楼 · 编辑于 2024-04-26 06:41:54

下面是我能想到的最简单的代码：

from glob import glob
from os import makedirs, path
from pathlib import Path
import json

# Directories
input_dir = "in"
output_file = "out/out.json"

# Get array of files
files = glob(path.join(input_dir, "**", "*.json"))

# Data object
data = {}

# Merge all files
for file in files:
    data.update(json.load(open(file)))

# Create output directory
makedirs(path.dirname(output_file), exist_ok=True)

# Dump data
json.dump(data, open(output_file, "w+"))

网友

3楼 · 编辑于 2024-04-26 06:41:54

您不需要解析您的输入JSON文件，只需将它们作为文本文件读取，这将快得多（基本上是每个文件一个系统调用）。然后通过在开始处添加[，在结束处添加]，在每个文件内容后面添加,，将它们组合成一个全局JSON列表。好的，0级列表的行不会缩进，但谁在乎呢？以下是一个基本实现：

infiles = [...] # the whole list of input JSON files
outfile = 'out.json'

with open(outfile,'w') as o:
    o.write('[')
    for infile in infiles[:-1]: # loop over all files except the last one
        with open(infile,'r') as i:
            o.write(i.read().strip() + ',\n')
    with open(infiles[-1]) as i: # special treatement for last file
        o.write(i.read().strip() + ']\n')

注意，这个实现将输入文件一个接一个地存储在RAM中，因此与其他方法相反，很容易处理很长的文件列表。你知道吗

最后一点：如果您真的想对所有内行进行缩进，只需逐行读取每个文件（对文件使用readline()方法）和前缀在输出文件上写入之前，先用4个空格。但你会失去表现。。。你知道吗

编辑：稍微修改的版本，包含更多的代码分解

infiles = [...] # the whole list of input JSON files
outfile = 'out.json'
end, n = (']\n', ',\n'), len(infiles)

with open(outfile, 'w') as o:
  o.write('[')
  for infile in infiles:
    n -= 1
    with open(infile, 'r') as i:
      o.write(i.read().strip() + end[n>0]) # select correct end separator

相关问题更多 >

编程相关推荐

热门问题

热门文章