File tree Expand file tree Collapse file tree 2 files changed +8
-5
lines changed Expand file tree Collapse file tree 2 files changed +8
-5
lines changed Original file line number Diff line number Diff line change @@ -39,9 +39,10 @@ def getRandomExternalLink(startingPage):
3939 bsObj = BeautifulSoup (html )
4040 externalLinks = getExternalLinks (bsObj , urlparse (startingPage ).netloc )
4141 if len (externalLinks ) == 0 :
42- domain = domain = urlparse (startingPage ).scheme + "://" + urlparse (startingPage ).netloc
42+ print ("No external links, looking around the site for one" )
43+ domain = urlparse (startingPage ).scheme + "://" + urlparse (startingPage ).netloc
4344 internalLinks = getInternalLinks (bsObj , domain )
44- return getNextExternalLink (internalLinks [random .randint (0 ,len (internalLinks )- 1 )])
45+ return getRandomExternalLink (internalLinks [random .randint (0 ,len (internalLinks )- 1 )])
4546 else :
4647 return externalLinks [random .randint (0 , len (externalLinks )- 1 )]
4748
Original file line number Diff line number Diff line change @@ -33,14 +33,16 @@ def getExternalLinks(bsObj, excludeUrl):
3333 externalLinks .append (link .attrs ['href' ])
3434 return externalLinks
3535
36+
3637def getRandomExternalLink (startingPage ):
3738 html = urlopen (startingPage )
3839 bsObj = BeautifulSoup (html )
3940 externalLinks = getExternalLinks (bsObj , urlparse (startingPage ).netloc )
4041 if len (externalLinks ) == 0 :
41- domain = domain = urlparse (startingPage ).scheme + "://" + urlparse (startingPage ).netloc
42+ print ("No external links, looking around the site for one" )
43+ domain = urlparse (startingPage ).scheme + "://" + urlparse (startingPage ).netloc
4244 internalLinks = getInternalLinks (bsObj , domain )
43- return getNextExternalLink (internalLinks [random .randint (0 ,len (internalLinks )- 1 )])
45+ return getRandomExternalLink (internalLinks [random .randint (0 ,len (internalLinks )- 1 )])
4446 else :
4547 return externalLinks [random .randint (0 , len (externalLinks )- 1 )]
4648
@@ -71,4 +73,4 @@ def getAllExternalLinks(siteUrl):
7173followExternalOnly ("http://oreilly.com" )
7274
7375allIntLinks .add ("http://oreilly.com" )
74- getAllExternalLinks ("http://oreilly.com" )
76+ getAllExternalLinks ("http://oreilly.com" )
You can’t perform that action at this time.
0 commit comments