Python 3 合并具有相同值的列表

2024-04-23 17:49:30 发布

您现在位置:Python中文网/ 问答频道 /正文

我使用xlrd从excel导入数据。将其转换为JSON并加载后,数据格式如下所示:

[{'receipt_id': '1', 'service': 'A', 'charge': '2000', 'company': 'Company A'},
{'receipt_id': '1', 'service': 'B', 'charge': '3000', 'company': 'Company A'},
{'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]

如何合并具有相同收据id的行,以生成如下新列表:

[{'receipt_id': '1', 'service': 'A 2000 B 3000', 'charge': '5000', 'company': 'Company A'},
{'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]

我试过把这个单子改成dict,但还是有些问题。有什么办法吗?你知道吗


Tags: 数据idjson列表serviceexceldictcompany
3条回答

你可以试试这样的。你知道吗

data=[{'receipt_id': '1', 'service': 'A', 'charge': '2000', 'company': 'Company A'},
{'receipt_id': '1', 'service': 'B', 'charge': '3000', 'company': 'Company A'},
{'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]

new_dict={}
for i in data:
    if i['receipt_id'] not in new_dict:
        new_dict[i['receipt_id']]=[i]
    else:
        new_dict[i['receipt_id']][0]['charge']=int(new_dict[i['receipt_id']][0]['charge'])+int(i['charge'])
        new_dict[i['receipt_id']][0]['service']="{} {} {} {}".format(str(new_dict[i['receipt_id']][0]['service']),str(new_dict[i['receipt_id']][0]['charge']),str(i['service']),str(i['charge']))

print([j[0] for i,j in new_dict.items()])

输出:

[{'company': 'Company A', 'service': 'A 5000 B 3000', 'charge': 5000, 'receipt_id': '1'}, {'company': 'Company B', 'service': 'C', 'charge': '1000', 'receipt_id': '2'}]

使用数据帧。你知道吗

aa=[{'receipt_id': '1', 'service': 'A', 'charge': '2000', 'company': 'Company A'},
{'receipt_id': '1', 'service': 'B', 'charge': '3000', 'company': 'Company A'},
{'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]

df = pd.DataFrame(aa)

def add_service(x):
    return " ".join(x)

def add_charge(x):
    #results = map(int, x) # In py2
    results = list(map(int, x)) #In py3:
    return sum(results)

def add_comp(x):
    a = list(set(x))
    return " ".join(a)
grouped = df.groupby(['receipt_id'])

ser = grouped['service'].agg(add_service)
cha = grouped['charge'].agg(add_charge)
com = grouped['company'].agg(add_comp)

df_new = pd.DataFrame({"service": ser, "charge": cha, "company": com})
list(df_new.T.to_dict().values()) # To get json format

如果不想使用pandas,可以编写自己的group_by函数:

from collections import defaultdict

def group_by(dicts, key):
    out = defaultdict(list)
    for d in dicts:
        out[d[key]].append(d)
    return out

def combine_receipts(receipts):
    for receipt_id, receipts in group_by(receipts, 'receipt_id').items():
        if len(receipts) > 1:
            service = ' '.join(f"{r['service']} {r['charge']}" for r in receipts)
            charge = str(sum(float(r['charge']) for r in receipts))
            yield {'receipt_id': receipt_id,
                   'service': service,
                   'charge': charge,
                   'company': receipts[0]['company']}
        elif len(receipts) == 1:
            yield receipts[0]
        else:
            raise RuntimeError("empty group")

if __name__ == "__main__":
    receipts = [{'receipt_id': '1', 'service': 'A', 'charge': '2000', 'company': 'Company A'},
                {'receipt_id': '1', 'service': 'B', 'charge': '3000', 'company': 'Company A'},
                {'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]

    print(list(combine_receipts(receipts)))
# [{'receipt_id': '1', 'service': 'A 2000 B 3000', 'charge': '5000.0', 'company': 'Company A'},
#  {'receipt_id': '2', 'service': 'C', 'charge': '1000', 'company': 'Company B'}]

相关问题 更多 >