Python中使用二进制搜索的列表列表比较

def intersection(x,y): temp=x[:] for i in range(len(y)): l=0 h=len(x)-1 while l<h: mid=l+((h-l)/2) if y[i][0]==temp[mid][0]: a=y[i] x.remove(a) break elif y[i][0]>temp[mid][0]: if l==mid: break l=mid elif y[i][0]<temp[mid][0]: h=mid return(x) X-List input of 30 elements [[1.0, 25.0, 0.0, 0.0] [2.0, 0.0, 25.0, 0.0] [3.0, 0.0, 50.0, 0.0] [4.0, 50.0, 50.0, 0.0] [5.0, 50.0, 0.0, 0.0] [6.0, 0.0, 25.0, 10.0] [7.0, 25.0, 0.0, 10.0] [8.0, 50.0, 0.0, 10.0] [9.0, 50.0, 50.0, 10.0] [10.0, 0.0, 50.0, 10.0] [11.0, 0.0, 0.0, 0.0] [12.0, 0.0, 0.0, 10.0] [13.0, 17.6776695, 17.6776695, 0.0] [14.0, 0.0, 34.3113632, 0.0] [15.0, 25.9780293, 50.0, 0.0] [16.0, 50.0, 25.9780293, 0.0] [17.0, 34.3113632, 0.0, 0.0] [18.0, 17.6776695, 17.6776695, 10.0] [19.0, 34.3113632, 0.0, 10.0] [20.0, 50.0, 25.9780293, 10.0] [21.0, 25.9780293, 50.0, 10.0] [22.0, 0.0, 34.3113632, 10.0] [23.0, 11.6599302, 0.0, 0.0] [24.0, 0.0, 11.6599302, 0.0] [25.0, 0.0, 11.6599302, 10.0] [26.0, 11.6599302, 0.0, 10.0] [27.0, 27.9121876, 27.9121876, 0.0] [28.0, 27.9121876, 27.9121876, 10.0] [29.0, 9.77920055, 9.77920055, 0.0] [30.0, 9.77920055, 9.77920055, 10.0]] Y -List of 10 elements [1.0, 25.0, 0.0, 0.0] [2.0, 0.0, 25.0, 0.0] [11.0, 0.0, 0.0, 0.0] [13.0, 17.6776695, 17.6776695, 0.0] [14.0, 0.0, 34.3113632, 0.0] [17.0, 34.3113632, 0.0, 0.0] [23.0, 11.6599302, 0.0, 0.0] [24.0, 0.0, 11.6599302, 0.0] [27.0, 27.9121876, 27.9121876, 0.0] [29.0, 9.77920055, 9.77920055, 0.0] ------------------------------------------------------------------------------------------------------------------------------------------Z list (X-Y) the result should be 20 elements but its gives length as 23 elements. it does not remove the remaining 3 elements from the list. [[3.0, 0.0, 50.0, 0.0], [4.0, 50.0, 50.0, 0.0], [5.0, 50.0, 0.0, 0.0], [6.0, 0.0, 25.0, 10.0], [7.0, 25.0, 0.0, 10.0], [8.0, 50.0, 0.0, 10.0], [9.0, 50.0, 50.0, 10.0], [10.0, 0.0, 50.0, 10.0], [12.0, 0.0, 0.0, 10.0], [15.0, 25.9780293, 50.0, 0.0], [16.0, 50.0, 25.9780293, 0.0], [18.0, 17.6776695, 17.6776695, 10.0], [19.0, 34.3113632, 0.0, 10.0], [20.0, 50.0, 25.9780293, 10.0], [21.0, 25.9780293, 50.0, 10.0], [22.0, 0.0, 34.3113632, 10.0], [24.0, 0.0, 11.6599302, 0.0], [25.0, 0.0, 11.6599302, 10.0], [26.0, 11.6599302, 0.0, 10.0], [27.0, 27.9121876, 27.9121876, 0.0], [28.0, 27.9121876, 27.9121876, 10.0], [29.0, 9.77920055, 9.77920055, 0.0], [30.0, 9.77920055, 9.77920055, 10.0]]

3条回答

网友

1楼 · 编辑于 2024-04-27 00:32:43

如果可以使用第一个元素进行过滤：

ykeys = set(zip(*y)[0])
z = [s for s in x if s[0] not in ykeys]

Python 3版本：

^{pr2}$

如果仅凭第一个要素判断还不够：

yset = set(map(tuple, y))
return [s for s in x if tuple(s) not in yset]

在我那台弱小的笔记本电脑上，测试一下你的尺寸，第一个解决方案需要大约0.4秒，第二个解决方案大约需要1秒。这并不奇怪，因为^{} lookups average O(1)）。在

这是第三个版本，这个版本可能是最有趣的，因为它不仅让Python完成这项工作，而且它更接近于您的预期，但甚至更好：

yi, last = 0, len(y) - 1
z = []
for s in x:
    while s > y[yi] and yi < last:
        yi += 1
    if s != y[yi]:
        z.append(s)

它遍历x，而“并行”遍历y。类似于合并排序的合并步骤。使用yi我们指向y，并根据需要增加它。因此，我们有整体的线性时间，因为我们只从开始到结束遍历x，也从开始到结束遍历{}。我的笔记本电脑花了大约0.6秒，这比我的第二个解决方案要快！（将它与我的第一个解决方案相比是不公平的，因为这个方案只考虑第一个元素）。在

网友

2楼 · 编辑于 2024-04-27 00:32:43

对分可以工作，但另一个简单的解决方案是使用set：

y_set = set(tuple(v) for v in y)

注意，list必须转换成不可变的东西。在

现在只需生成结果：

^{pr2}$

这可能看起来与您的初始解决方案非常相似，但是这里的查找速度要快得多。在

@StefanPochmann有一个很好的观点，你可能希望你的查找基于比整个向量更具体的东西，比如仅仅是第一个元素。这个问题不是很清楚（只说明那些是分类的）。在

网友

3楼 · 编辑于 2024-04-27 00:32:43

如果我理解正确，请使用bisect.bisect_left查找匹配项并删除：

from bisect import bisect_left

for ele in y:
    ind = bisect_left(x, ele)
    if ind < len(x) -1 and x[ind][0] == ele[0]:
        del x[ind]

如果你看source，你可以看到左等分的代码：

^{pr2}$

您可以将其改编为您自己的代码：

def intersection(x, y):
    for ele in y:
        lo = 0
        hi = len(x)
        while lo < hi:
            mid = (lo+hi)//2
            if x[mid] < ele:
                lo = mid+1
            else:
                hi = mid
        if lo < len(x) - 1 and x[ind][0] == ele[0]:
            del x[lo]
    return x

print(len(intersection(x,y)))
20

如果有重复，则需要使用“删除”。检查第一个元素是否完全匹配是if lo < len(x) - 1 and x[ind][0] == ele[0]:，但是如果使用remove，我看不出这是怎么回事，因为第一个匹配的元素并不意味着{}在{}中，所以{}将失败。因此，如果只匹配第一个元素，那么您可以更改逻辑，只需迭代x，将每个子列表中的所有第一个元素放入一个集合中，并使用生成器表达式更新x

st = {sub[0] for sub in y}

x[:] = (sub for sub in x if sub[0] not in st)

相关问题更多 >

编程相关推荐

热门问题

热门文章