我正在使用下面的代码尝试使用Fpgrowth算法,但是当我要删除它们时,我将“”作为篮子中的项。正确的方法是什么?你知道吗
from pyspark.mllib.fpm import FPGrowth
from pyspark import SparkConf
from pyspark.context import SparkContext
sc = SparkContext.getOrCreate(SparkConf().setMaster("local[*]"))
data = sc.textFile("C:\\Users\\marka\\Downloads\\Assig2.txt")
data.map(lambda line: line.strip().split())
transactions = data.map(lambda line: line.strip().split('\t'))
#notempty = transactions.map(lambda x: x is not '')
unique = transactions.map(lambda x: list(set(x))).cache()
model = FPGrowth.train(unique, minSupport=0.7, numPartitions=10)
result = model.freqItemsets().collect()
for fi in result:
print(fi)
输出:
FreqItemset(items=[''], freq=100)
FreqItemset(items=['Soap'], freq=99)
FreqItemset(items=['Soap', ''], freq=99)
FreqItemset(items=['Water'], freq=99)
FreqItemset(items=['Water', 'Soap'], freq=99)
FreqItemset(items=['Water', 'Soap', ''], freq=99)
FreqItemset(items=['Water', ''], freq=99)
FreqItemset(items=['Beer'], freq=88)
FreqItemset(items=['Beer', 'Water'], freq=88)
FreqItemset(items=['Beer', 'Water', 'Soap'], freq=88)
FreqItemset(items=['Beer', 'Water', 'Soap', ''], freq=88)
FreqItemset(items=['Beer', 'Water', ''], freq=88)
FreqItemset(items=['Beer', 'Soap'], freq=88)
FreqItemset(items=['Beer', 'Soap', ''], freq=88)
FreqItemset(items=['Beer', ''], freq=88)
FreqItemset(items=['Rock_Salt'], freq=80)
FreqItemset(items=['Rock_Salt', 'Water'], freq=79)
FreqItemset(items=['Rock_Salt', 'Water', 'Soap'], freq=79)
FreqItemset(items=['Rock_Salt', 'Water', 'Soap', ''], freq=79)
FreqItemset(items=['Rock_Salt', 'Water', ''], freq=79)
FreqItemset(items=['Rock_Salt', 'Soap'], freq=79)
FreqItemset(items=['Rock_Salt', 'Soap', ''], freq=79)
相关问题 更多 >
编程相关推荐