统计Python字典中值的重复次数
我有一个字典,里面有不同类型的区域,但这些区域可能出现多次。我想从这个字典生成另一个字典,这个新的字典会多一个键叫“Count”,这个键会记录每个区域(比如“Full Run”、“Half Run”或“Semi Run”)出现的次数。
[
{'zip_zone': u'Full Run', 'zipcode': u'14042', 'longitude': -78.516154},
{'zip_zone': u'Full Run', 'zipcode': u'14101', 'longitude': -78.51734},
{'zip_zone': u'Full Run', 'zipcode': u'14706', 'longitude': -78.493761},
{'zip_zone': u'Half Run', 'zipcode': u'14709', 'longitude': -78.024817},
{'zip_zone': u'Semi Run', 'zipcode': u'14711', 'longitude': -78.119974},
{'zip_zone': u'Full Run', 'zipcode': u'14714', 'longitude': -78.256921},
{'zip_zone': u'Half Run', 'zipcode': u'14715', 'longitude': -78.157392},
{'zip_zone': u'Semi Run', 'zipcode': u'14717', 'longitude': -78.210567},
{'zip_zone': u'Semi Run', 'zipcode': u'14719', 'longitude': -78.86951},
{'zip_zone': u'Half Run', 'zipcode': u'14727', 'longitude': -78.268103},
{'zip_zone': u'Semi Run', 'zipcode': u'14731', 'longitude': -78.658909},
{'zip_zone': u'Half Run', 'zipcode': u'14735', 'longitude': -78.087607},
{'zip_zone': None, 'zipcode': u'14737', 'longitude': -78.431625},
{'zip_zone': u'Semi Run', 'zipcode': u'14739', 'longitude': -78.139046},
{'zip_zone': u'Semi Run', 'zipcode': u'14741', 'longitude': -78.5907},
{'zip_zone': u'Special Run', 'zipcode': u'14743', 'longitude': -78.4098},
{'zip_zone': u'Special Run', 'zipcode': u'14744', 'longitude': -78.167853},
{'zip_zone': u'Half Run', 'zipcode': u'14748', 'longitude': -78.639987},
{'zip_zone': u'Semi Run', 'zipcode': u'14753', 'longitude': -78.640416},
{'zip_zone': u'Special Run', 'zipcode': u'14754', 'longitude': -78.18395},
{'zip_zone': u'Special Run', 'zipcode': u'14755', 'longitude': -78.800866},
{'zip_zone': u'Half Run', 'zipcode': u'14760', 'longitude': -78.426015},
]
输出的字典应该是这样的:
[
{'zip_zone': u'Full Run', 'zipcode': u'14042', 'longitude': -78.516154, 'count': 4},
{'zip_zone': u'Full Run', 'zipcode': u'14101', 'longitude': -78.51734, 'count': 4},
{'zip_zone': u'Full Run', 'zipcode': u'14706', 'longitude': -78.493761, 'count': 4},
{'zip_zone': u'Half Run', 'zipcode': u'14709', 'longitude': -78.024817, 'count': 6},
{'zip_zone': u'Semi Run', 'zipcode': u'14711', 'longitude': -78.119974, 'count': 7},
{'zip_zone': u'Full Run', 'zipcode': u'14714', 'longitude': -78.256921, 'count': 4},
{'zip_zone': u'Half Run', 'zipcode': u'14715', 'longitude': -78.157392, 'count': 6},
{'zip_zone': u'Semi Run', 'zipcode': u'14717', 'longitude': -78.210567, 'count': 7},
{'zip_zone': u'Semi Run', 'zipcode': u'14719', 'longitude': -78.86951, 'count': 7},
{'zip_zone': u'Half Run', 'zipcode': u'14727', 'longitude': -78.268103, 'count': 6},
{'zip_zone': u'Semi Run', 'zipcode': u'14731', 'longitude': -78.658909, 'count': 7},
{'zip_zone': u'Half Run', 'zipcode': u'14735', 'longitude': -78.087607, 'count': 6},
{'zip_zone': None, 'zipcode': u'14737', 'longitude': -78.431625, 'count': 0},
{'zip_zone': u'Semi Run', 'zipcode': u'14739', 'longitude': -78.139046, 'count': 7},
{'zip_zone': u'Semi Run', 'zipcode': u'14741', 'longitude': -78.5907, 'count': 7},
{'zip_zone': u'Special Run', 'zipcode': u'14743', 'longitude': -78.4098, 'count': 4},
{'zip_zone': u'Special Run', 'zipcode': u'14744', 'longitude': -78.167853, 'count': 4},
{'zip_zone': u'Half Run', 'zipcode': u'14748', 'longitude': -78.639987, 'count': 6},
{'zip_zone': u'Semi Run', 'zipcode': u'14753', 'longitude': -78.640416, 'count': 7},
{'zip_zone': u'Special Run', 'zipcode': u'14754', 'longitude': -78.18395, 'count': 4},
{'zip_zone': u'Special Run', 'zipcode': u'14755', 'longitude': -78.800866, 'count': 4},
{'zip_zone': u'Half Run', 'zipcode': u'14760', 'longitude': -78.426015, 'count': 6},
]
4 个回答
0
可能看起来不是特别好,但你可以试试用 defaultdict:
from collections import defaultdict
output = defaultdict(list)
for line in origData:
output[line['zip_zone']].append(line)
for line in origData:
line['Count'] = len(output[line['zip_zone']])
print origData
0
我不太确定你的问题,不过下面的代码可能可以实现你在问题中提到的功能:
input = [
{'zip_zone': u'Full Run', 'zipcode': u'14042', 'longitude': -78.516154},
{'zip_zone': u'Full Run', 'zipcode': u'14101', 'longitude': -78.51734},
{'zip_zone': u'Full Run', 'zipcode': u'14706', 'longitude': -78.493761},
{'zip_zone': u'Half Run', 'zipcode': u'14709', 'longitude': -78.024817},
{'zip_zone': u'Semi Run', 'zipcode': u'14711', 'longitude': -78.119974},
{'zip_zone': u'Full Run', 'zipcode': u'14714', 'longitude': -78.256921},
{'zip_zone': u'Half Run', 'zipcode': u'14715', 'longitude': -78.157392},
{'zip_zone': u'Semi Run', 'zipcode': u'14717', 'longitude': -78.210567},
{'zip_zone': u'Semi Run', 'zipcode': u'14719', 'longitude': -78.86951},
{'zip_zone': u'Half Run', 'zipcode': u'14727', 'longitude': -78.268103},
{'zip_zone': u'Semi Run', 'zipcode': u'14731', 'longitude': -78.658909},
{'zip_zone': u'Half Run', 'zipcode': u'14735', 'longitude': -78.087607},
{'zip_zone': None, 'zipcode': u'14737', 'longitude': -78.431625},
{'zip_zone': u'Semi Run', 'zipcode': u'14739', 'longitude': -78.139046},
{'zip_zone': u'Semi Run', 'zipcode': u'14741', 'longitude': -78.5907},
{'zip_zone': u'Special Run', 'zipcode': u'14743', 'longitude': -78.4098},
{'zip_zone': u'Special Run', 'zipcode': u'14744', 'longitude': -78.167853},
{'zip_zone': u'Half Run', 'zipcode': u'14748', 'longitude': -78.639987},
{'zip_zone': u'Semi Run', 'zipcode': u'14753', 'longitude': -78.640416},
{'zip_zone': u'Special Run', 'zipcode': u'14754', 'longitude': -78.18395},
{'zip_zone': u'Special Run', 'zipcode': u'14755', 'longitude': -78.800866},
{'zip_zone': u'Half Run', 'zipcode': u'14760', 'longitude': -78.426015},
];
output=[];
zipZoneCnt={};
for item in input:
if item['zip_zone'] in zipZoneCnt.keys():
zipZoneCnt[item['zip_zone']]+=1;
else:
zipZoneCnt[item['zip_zone']]=1;
zipZoneCnt[None]=0;
for item in input:
item['count']=zipZoneCnt[item['zip_zone']];
print zipZoneCnt;
for item in input:
print item;
5
这是一个很好的例子,说明了如何使用Python的collections模块中的Counter类。
import collections
# u is your input list of dictionaries, entries in u will be modified in place
c = collections.Counter(e["zip_zone"] for e in u)
for e in u:
e["count"] = c[e["zip_zone"]]