Python - 从两个字典中高效获取所有可能的组合

2024-04-25 00:36:22 发布

您现在位置:Python中文网/ 问答频道 /正文

ra_position_preferences = {"yoder3":["J","E","T","S","M","R","B","SS"],
                           "yoder4":["J","E","S","T","M","R","SS","B"],
                           "kratz3":["M","J","S","E","T","R","B","SS"],
                           "miller3":["S","M","J","E","T","R","B","SS"],
                           "nofloor":["SS","B","R","T","S","M","E","J"]}

applicants_floor_prefernce ={"J":["yoder3","yoder4","kratz3","miller3","nofloor"],
                             "E":["yoder3","yoder4","kratz3","miller3","nofloor"],
                             "S":["kratz3","miller3","yoder3","yoder4","nofloor"],
                             "M":["kratz3","miller3","nofloor","yoder3","yoder4"],
                             "T":["nofloor","yoder4","yoder3","kratz3","miller3",],
                             'SS':["yoder3","yoder4","kratz3","miller3","nofloor"],
                             'R':["kratz3","miller3","yoder3","yoder4","nofloor"],
                             'B':["yoder4","yoder3","kratz3","miller3","nofloor"]}

在上述字典中,所有值都是键的首选项。就像匹配问题https://www.cs.princeton.edu/~wayne/kleinberg-tardos/pdf/01StableMatching.pdf一样。我试图在这里获得所有可能的偏好组合,而不会出现内存错误。同时,我把我得到的每一个组合放入gale-shapley算法中,得到所有可能的匹配。我的代码如下:

def sensitivity_analysis(dic1,dic2): #gives all the possible combinations of the preferences 
    a=copy.deepcopy(dic1)
    b=copy.deepcopy(dic2)
    length_a=len(a.keys())
    length_b=len(b.keys())
    items_a=list(a.keys())
    items_b=list(b.keys())
    a_variants = [dict(zip(items_a, values)) 
                 for values in product(permutations(items_b), repeat=length_a)]
    b_variants = [dict(zip(items_b, values)) 
                 for values in product(permutations(items_a), repeat=length_b)]

    all_variants = product(a_variants, b_variants)
    contains_a=[]
    contains_b=[]
    for i,j in all_variants:
        contains_a.append(i)
        contains_b.append(j)
    return contains_a,contains_b

从上面的代码我得到的内存错误。还有别的办法吗?我的建议是一次得到一个组合,然后把它插入gale-shapley函数中,得到匹配的结果。然后将匹配项追加到字典中。如果新匹配与上一个匹配相同,我们可以删除新匹配以保存数组中的内存。但仍将是2.78亿次计算。你们有什么有效的方法让我用16GB的内存在电脑上运行吗?你知道吗


Tags: 内存foritemskeysalllengthssvalues
1条回答
网友
1楼 · 发布于 2024-04-25 00:36:22

给定的

import itertools as it
import collections as ct


floors = ["yoder3", "yoder6", "kratz3", "miller3", "nofloor"]
applicants = ["SS", "M", "J", "T", "R", "B", "E", "S"]

preferences = {
    "J": ["yoder3", "yoder6", "kratz3", "miller3", "nofloor"],
    "E": ["yoder3", "yoder6", "kratz3", "miller3", "nofloor"],
    "S": ["kratz3", "miller3", "yoder3", "yoder6", "nofloor"],
    "M": ["kratz3", "miller3", "nofloor", "yoder3", "yoder6"],
    "T": ["nofloor", "yoder6", "yoder3", "kratz3", "miller3"],
    "SS": ["yoder3", "yoder6", "kratz3", "miller3", "nofloor"], 
    "R": ["kratz3", "miller3", "yoder3", "yoder6", "nofloor"],
    "B": ["yoder6", "yoder3", "kratz3", "miller3", "nofloor"]
}

辅助函数

def fill_missing(iterable, fillvalues):
    """Yield combinations replacing empty strings in iterable with fillvalues."""
    iter_fillvalues = map(iter, fillvalues)
    for x in iter_fillvalues:
        comb = []
        for item in iterable:
            if item == "":
                comb.append(next(x))
            else:
                comb.append(item)
        yield comb

代码

def ranked_preferences(floors, preferences):
    """Return a dict of preferences."""
    ranked = ct.defaultdict(list)
    for floor in floors:
        for i in range(len(floors)):
            idx_prefs = [
                name for name, lst in preferences.items() 
                     for j, v in enumerate(lst) if (j == i and v == floor)
            ]
            if not idx_prefs:
                idx_prefs = [""]
            ranked[floor].append(idx_prefs)
    return dict(ranked)


def preferred_assignments(ranked, applicants, top=None):
    """Yield combinations of preferred assignments."""
    if top is None:
        top = len(ranked)

    applicants = set(applicants)
    ranks = zip(ranked.values())
    for i, rank in enumerate(ranks):
        if i >= top:
            continue
        for a in it.product(*rank[0]):
            missing = a.count("")
            b = applicants - set(a)
            c = list(it.combinations(b, missing))
            d = list(it.chain.from_iterable([list(it.permutations(x, missing)) for x in c]))
            e = list(fill_missing(a, d))
            yield tuple(tuple(zip(*x)) for x in zip(it.repeat(list(ranked.keys())), e))

演示

根据偏好生成所有组合:

ranked = ranked_preferences(floors, preferences)
combos = set(it.chain.from_iterable(preferred_assignments(ranked, applicants)))
print("Number of preferred combinations:", len(combos))
# Number of preferred combinations: 668

指定top首选选取:

combos = set(it.chain.from_iterable(preferred_assignments(ranked, applicants, top=1)))
print("Number of preferred combinations:", len(combos))
combos

# Number of preferred combinations: 36
# {(('yoder3', 'E'),
#   ('yoder6', 'B'),
#   ('kratz3', 'R'),
#   ('miller3', 'M'),
#   ('nofloor', 'J')),
#  (('yoder3', 'E'),
#   ('yoder6', 'B'),
#   ('kratz3', 'R'),
#   ('miller3', 'M'),
#   ('nofloor', 'S')),
#  (('yoder3', 'E'),
#  ...)}

这里只给出数字1选项的组合。您可以通过设置top=2来选择前两个选项。你知道吗


细节

使用itertools^{}配方的简单方法(无首选项):

def all_assignments(chunks):
    """Yield all combinations of floors assigned to applicants."""
    combs = it.product(*chunks)
    for comb in combs:
        names = {c[1] for c in comb}
        if len(names) != len(floors):
            continue
        yield comb

chunks_by_floor = list(grouper(len(applicants), it.product(floors, applicants)))
chunks_by_floor[:2]
result = list(all_assignments(chunks_by_floor))
print("Number of combinations:", len(result))
# Number of combinations: 6720

因此,具有偏好的组合是这些组合的一些子集。要选择此子集,请查看按前1-5个选项分组的每层首选项:

ranked
{'yoder3': [['J', 'E', 'SS'], ['B'], ['S', 'T', 'R'], ['M'], ['']],
 'yoder6': [['B'], ['J', 'E', 'T', 'SS'], [''], ['S', 'R'], ['M']],
 'kratz3': [['S', 'M', 'R'], [''], ['J', 'E', 'SS', 'B'], ['T'], ['']],
 'miller3': [[''], ['S', 'M', 'R'], [''], ['J', 'E', 'SS', 'B'], ['T']],
 'nofloor': [['T'], [''], ['M'], [''], ['J', 'E', 'S', 'SS', 'R', 'B']]}

每个楼层的首选项从左到右排列,例如,dict中每个值的索引0表示选择该楼层作为首选项的申请人。索引2表示他们的2号偏好,等等。有些楼层有偏好相同的申请人(yoder3kartz3,在索引[0])。有些楼层没有首选项(miller3[0])。preferred_assignments()中的其余逻辑,即变量a-e,根据偏好组合申请者(考虑垂直列)。缺少的值将从剩余的申请者池中随机替换。你知道吗

在演示中,由于这些组合是基于首选项分组的,因此我们使用itertools.chain.from_iterable()将这些组合展平并强制转换为一个集合以删除任何重复项。你知道吗

相关问题 更多 >