在Python中保存数据到JSON文件 - 多个键追加问题
我有一大堆数据。我想从这些数据中创建一个脚本。具体来说,我想把数据保存成这样:
在 pAccountIds1 中会先保存前99个 ID,然后在 pAccountIds2 中保存接下来的99个 ID,依此类推。最后把这些数据保存在 Parameters 目录里。所有的 ID 都会保存在 pAccountIds1 中。
这是我期望的输出:
{"Parameters": [
{"pAccountIds1": "886180295749,575789942587,331377892512"},
{"pAccountIds2": "886180295749,575789942587,331377892512"}
]}
这是实际的行为:
{
"Parameters": [
{
"pAccountIds1": "886180295749,169278231308,888561797329,316900773169,452451531881,263111390741,774531687947,307175455232,160582862483,503763934565,628239060389,732071894519,851207678364,176876819377,852942366732,697301814574,463173411868,813366789735,434423952232,104239629908,850131272446,173873129414,758190182387,917707497382,813660687632,295585687189,946660130177,531405577506,803054876607,150802796093,231981811420,288035531821,187585725025,381266788059,913104880535,109470036896,843076529994,554635727446,384741278002,179697366565,115248717328,834696924337,137711249429,241488429314,574589139538"
}
]
}
这是我的示例数据:
{
"deployment_map_source": "S3",
"deployment_map_name": "deployment_maps.yaml",
"pipeline_definition": {
"name": "logs",
"default_providers": {
"source": {
"provider": "codecommit",
"properties": {
"account_id": 715151534,
"branch": "main"
}
},
"deploy": {
"provider": "cloudformation",
"properties": {
"action": "replace_on_failure",
"stack_name": "subscription"
}
}
},
"params": {
"restart_execution_on_update": true
},
"targets": [
{
"target": 1716335251,
"properties": {
"template_filename": "management.yml",
"param_filename": "gen_parameter.json"
},
"regions": "us-east-1",
"path": [
82446615151
]
},
{
"target": [
96342414163,
99926626625,
362514193959
],
"regions": "us-west-1",
"path": [
96342414163,
99926626625,
362514193959
]
}
]
},
"pipeline_input": {
"environments": {
"targets": [
[
[
{
"id": "715151515151",
"name": "logs-pro",
"path": 715151515151,
"step_name": ""
},
{
"id": "286261515151",
"name": "logs-dev",
"path": 286261515151,
"step_name": ""
}
]
],
[
[
{
"id": "7363514399199001",
"name": "logs-pro-dada",
"path": 7363514399199001,
"step_name": ""
},
{
"id": "u2716166633444",
"name": "logs-dev",
"path": 2716166633444,
"step_name": ""
}
]
]
]
}
}
}
这是我的脚本:
import json
IGNORE_ACCOUNTID = '981813074321'
OUTPUT_FILE = 'params/gen_parameter.json'
def chunk_list(lst, chunk_size):
"""Helper function to chunk a list into smaller lists."""
for i in range(0, len(lst), chunk_size):
yield lst[i:i + chunk_size]
def extract_ids_from_targets(targets):
extracted_ids = []
for target_group in targets:
for target_list in target_group:
for account in target_list:
if 'id' in account and account['id'] != IGNORE_ACCOUNTID:
extracted_ids.append(str(account['id']))
return extracted_ids
def main():
with open("display.json") as f:
data = json.load(f)
targets = data.get("pipeline_input", {}).get("environments", {}).get("targets", [])
print(f"Total targets: {sum(map(len, targets))}")
# Split the targets into groups of 99
grouped_targets = list(chunk_list(targets, 99))
print(f"Total groups: {len(grouped_targets)}")
# Create the final JSON structure
result = []
for i, group in enumerate(grouped_targets, start=1):
extracted_ids = extract_ids_from_targets(group)
result.append({f"pAccountIds{i}": ','.join(extracted_ids)})
final_data = {"Parameters": result}
json_str = json.dumps(final_data, indent=4)
# Save the result to gen_parameter.json
with open(OUTPUT_FILE, 'w') as f:
f.write(json_str)
if __name__ == '__main__':
main()
1 个回答
0
我查看了你的代码,发现了以下问题:
- 你的数据中有两个账户在一个列表里
- 在你的 grouped_targets 变量中,你会有 198 个账户 ID(99 组,每组 2 个)
- 我更新了你的代码,先提取 ID,然后再进行分组
以下是更改的内容:
targets = extract_ids_from_targets(targets)
grouped_targets = list(chunk_list(targets, 99))
还有
for i, group in enumerate(grouped_targets, start=1):
result[f"pAccountIds{i}"] = ','.join(group)
这是完整的更新代码(查看 # AFFECTED CODE LINE 注释以了解更改):
import json
IGNORE_ACCOUNTID = '981813074321'
OUTPUT_FILE = 'params/gen_parameter.json'
def chunk_list(lst, chunk_size):
"""Helper function to chunk a list into smaller lists."""
for i in range(0, len(lst), chunk_size):
yield lst[i:i + chunk_size]
def extract_ids_from_targets(targets):
extracted_ids = []
for target_group in targets:
for target_list in target_group:
for account in target_list:
if 'id' in account and account['id'] != IGNORE_ACCOUNTID:
extracted_ids.append(str(account['id']))
return extracted_ids
def main():
with open("display.json") as f:
data = json.load(f)
targets = data.get("pipeline_input", {}).get("environments", {}).get("targets", [])
print(f"Total targets: {len(targets)}")
print(f"Total targets: {sum(targets, [])}")
# Split the targets into groups of 99
targets = extract_ids_from_targets(targets) # AFFECTED CODE LINE
grouped_targets = list(chunk_list(targets, 99))
print(f"Total groups: {len(grouped_targets)}")
# Create the final JSON structure
result = {}
for i, group in enumerate(grouped_targets, start=1):
result[f"pAccountIds{i}"] = ','.join(group) # AFFECTED CODE LINE
final_data = {"Parameters": result}
json_str = json.dumps(final_data, indent=4)
##print(json_str)
# Save the result to gen_parameter.json
with open(OUTPUT_FILE, 'w') as f:
f.write(json_str)
if __name__ == '__main__':
main()