File tree Expand file tree Collapse file tree 1 file changed +13
-4
lines changed Expand file tree Collapse file tree 1 file changed +13
-4
lines changed Original file line number Diff line number Diff line change 22from bs4 import BeautifulSoup
33import re
44import string
5+ from collections import OrderedDict
56
67def cleanInput (input ):
78 input = re .sub ('\n +' , " " , input )
@@ -19,14 +20,22 @@ def cleanInput(input):
1920
2021def getNgrams (input , n ):
2122 input = cleanInput (input )
22- output = []
23+ output = dict ()
2324 for i in range (len (input )- n + 1 ):
24- output .append (input [i :i + n ])
25+ newNGram = " " .join (input [i :i + n ])
26+ if newNGram in output :
27+ output [newNGram ] += 1
28+ else :
29+ output [newNGram ] = 1
2530 return output
2631
2732html = urlopen ("http://en.wikipedia.org/wiki/Python_(programming_language)" )
2833bsObj = BeautifulSoup (html )
2934content = bsObj .find ("div" , {"id" :"mw-content-text" }).get_text ()
35+ #ngrams = getNgrams(content, 2)
36+ #print(ngrams)
37+ #print("2-grams count is: "+str(len(ngrams)))
38+
3039ngrams = getNgrams (content , 2 )
31- print ( ngrams )
32- print ("2-grams count is: " + str ( len ( ngrams )) )
40+ ngrams = OrderedDict ( sorted ( ngrams . items (), key = lambda t : t [ 1 ], reverse = True ) )
41+ print (ngrams )
You can’t perform that action at this time.
0 commit comments