擅长:python、mysql、java
<p>使用mrjob库,您可以在Python:-在</p>
<pre><code>#Write a Code to print the top 5 word - occurences
#Import Dependencies
from mrjob.job import MRJob
from mrjob.step import MRStep
class MRWordCount(MRJob):
def steps(self):
return [MRStep(mapper=self.mapper,reducer=self.reducer),MRStep(reducer = self.secondreducer)]
def mapper(self,_,lines):
words = lines.split()
for word in words:
yield word.lower(),1
def reducer(self,key,values):
yield None,('%04d'%int(sum(values)),key)
def secondreducer(self,key,values):
self.alist = []
for value in values:
self.alist.append(value)
self.blist = []
for i in range(5):
self.blist.append(max(self.alist))
self.alist.remove(max(self.alist))
for i in range(5):
yield self.blist[i]
if __name__ == '__main__':
MRWordCount.run()
</code></pre>