<p>使用python recsys库的解决方案[<a href="http://ocelma.net/software/python-recsys/build/html/quickstart.html">http://ocelma.net/software/python-recsys/build/html/quickstart.html</a>]</p>
<pre><code>from recsys.algorithm.factorize import SVD
from recsys.datamodel.data import Data
likes={
"rajat":{"music","x-men","programming","hindi","english","himesh","lil wayne","rap","travelling","coding"},
"steve":{"travelling","pop","hanging out","friends","facebook","tv","skating","religion","english","chocolate"},
"toby":{"programming","pop","rap","gardens","flowers","birthday","tv","summer","youtube","eminem"},
"ravi":{"skating","opera","sony","apple","iphone","music","winter","mango shake","heart","microsoft"},
"katy":{"music","pics","guitar","glamour","paris","fun","lip sticks","cute guys","rap","winter"},
"paul":{"office","women","dress","casuals","action movies","fun","public speaking","microsoft","developer"},
"sheila":{"heart","beach","summer","laptops","youtube","movies","hindi","english","cute guys","love"},
"saif":{"women","beach","laptops","movies","himesh","world","earth","rap","fun","eminem"},
"mark":{"pilgrimage","programming","house","world","books","country music","bob","tom hanks","beauty","tigers"},
"stuart":{"rap","smart girls","music","wrestling","brock lesnar","country music","public speaking","women","coding","iphone"},
"grover":{"skating","mountaineering","racing","athletics","sports","adidas","nike","women","apple","pop"},
"anita":{"heart","sunidhi","hindi","love","love songs","cooking","adidas","beach","travelling","flowers"},
"kelly":{"travelling","comedy","tv","facebook","youtube","cooking","horror","movies","dublin","animals"},
"dino":{"women","games","xbox","x-men","assassin's creed","pop","rap","opera","need for speed","jeans"},
"priya":{"heart","mountaineering","sky diving","sony","apple","pop","perfumes","luxury","eminem","lil wayne"},
"brenda":{"cute guys","xbox","shower","beach","summer","english","french","country music","office","birds"}
}
data = Data()
VALUE = 1.0
for username in likes:
for user_likes in likes[username]:
data.add_tuple((VALUE, username, user_likes)) # Tuple format is: <value, row, column>
svd = SVD()
svd.set_data(data)
k = 5 # Usually, in a real dataset, you should set a higher number, e.g. 100
svd.compute(k=k, min_values=3, pre_normalize=None, mean_center=False, post_normalize=True)
svd.similar('sheila')
svd.similar('rajat')
</code></pre>
<p>结果:</p>
<pre><code>In [11]: svd.similar('sheila')
Out[11]:
[('sheila', 0.99999999999999978),
('brenda', 0.94929845546505753),
('anita', 0.85943494201162518),
('kelly', 0.53385495931440263),
('saif', 0.39985366653259058),
('rajat', 0.30757664244952165),
('toby', 0.28541364367155014),
('priya', 0.26184289111194581),
('steve', 0.25043700194182622),
('katy', 0.21812807229358305)]
In [12]: svd.similar('rajat')
Out[12]:
[('rajat', 1.0000000000000004),
('mark', 0.89164019482177692),
('katy', 0.65207273451425907),
('stuart', 0.61675507205285718),
('steve', 0.55730648750670264),
('anita', 0.49836982296014803),
('brenda', 0.42759524471725929),
('kelly', 0.40436047539358799),
('toby', 0.35972227835054826),
('ravi', 0.31113813325818901)]
</code></pre>