Python:试图将元组添加到不同的列表时元组被复制

2024-04-24 07:10:49 发布

您现在位置:Python中文网/ 问答频道 /正文

我真的被一个问题弄糊涂了,我一直在用一些代码来聚集一组点

我使用一种算法来为一定数量的簇(例如3个)播种,然后为每个簇添加点。但是,玩了一会儿之后,我注意到每个点都被添加到每个集群中。我检查了代码好几次,我很确定我没有忘记从原始列表中删除内容,但我强烈怀疑传递引用有问题。我对Python有点陌生,我的经验主要是Java,所以我有一种预感,Python处理参数的方式让我大吃一惊

import random
import math

class Cluster(object):
    points = []
    centroid = ()
    dimensions = 0
    color = 'k'

    def __init__(self, init_pt, color='k'):
        self.points.append(init_pt)
        self.dimensions = len(init_pt)
        self.centroid = init_pt
        self.color = color

    def addPoint(self, pt):
        try:
            if len(pt) != self.dimensions:
                raise ArithmeticError("Wrong number of dimensions on new point, ignoring")
            else:
                centroid_dim_list = []
                for dim in range(0, self.dimensions):
                    centroid_dim_list.append((self.centroid[dim] * len(self.points) + pt[dim]) / float(len(self.points) + 1))
                self.centroid = tuple(centroid_dim_list)
                self.points.append(pt)
        except ArithmeticError as ae:
            print ae.message
            pass

class KMeans(object):
    clusters = []
    unassignedPoints = []
    dimensions = 0
    k = 0

    def __init__(self, _k, _points):
        if _k > 0:
            self.k = _k 
            self.unassignedPoints = _points
            self.dimensions = len(_points[0])
        else:
            raise Exception()

    def runKMeans(self):
        self.initializeClusters() 
        while len(self.unassignedPoints) > 0:
            pt = self.unassignedPoints.pop()
            dist_to_cent = float("+inf")
            closest_cluster = ()
            for cluster in self.clusters:
                new_dist = self.getDistance(pt, cluster.centroid)
                if new_dist < dist_to_cent:
                    closest_cluster = cluster
                    dist_to_cent = new_dist
            closest_cluster.addPoint(pt)

    def initializeClusters(self):
        cluster_seeds = []
        new_point = self.getRandomPoint()
        print "New cluster seed: " + str(new_point)
        self.clusters.append(Cluster(new_point))
        cluster_seeds.append(new_point)
        self.unassignedPoints.remove(new_point)
        for i in range(1, self.k):
            farthest_point = ()
            farthest_dist = 0
            for pt1 in self.unassignedPoints:
                for pt2 in cluster_seeds:
                    curr_dist = self.getDistance(pt1, pt2)
                    if curr_dist > farthest_dist:
                        farthest_dist = curr_dist
                        farthest_point = pt1
            self.unassignedPoints.remove(farthest_point)
            self.clusters.append(Cluster(farthest_point))
            print "New cluster seed: " + str(farthest_point)
            cluster_seeds.append(farthest_point)

    def getRandomPoint(self):
        return self.unassignedPoints[int(math.floor(len(self.unassignedPoints) * random.random()))]

    def getDistance(self, pt1, pt2):
        return math.sqrt(sum([math.pow(pt1[dim] - pt2[dim], 2) for dim in range(0, self.dimensions)]))

kmeans = KMeans(3, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5,5)])
kmeans.runKMeans()

for cluster in kmeans.clusters:
    for pt in cluster.points:
       print "Assigned: " + str(pt)

for pt in kmeans.unassignedPoints:
    print "Unassigned: " + str(pt)

我错过了什么


Tags: inselfptnewfordistdefpoints
1条回答
网友
1楼 · 发布于 2024-04-24 07:10:49

您的问题可能是因为:

class Cluster(object):
    points = []

考虑将points的初始化移到__init__方法中:

class Cluster(object):
    def __init__(self, init_pt, color='k'):
        self.points = []            # <  
        self.points.append(init_pt)
        self.dimensions = len(init_pt)
        self.centroid = init_pt
        self.color = color

相关问题 更多 >