File tree Expand file tree Collapse file tree 2 files changed +14
-3
lines changed Expand file tree Collapse file tree 2 files changed +14
-3
lines changed Original file line number Diff line number Diff line change 99import random
1010import time
1111import os
12+ import re
13+
14+ #from nltk.util import clean_html
15+ #import urllib2
16+
1217
1318# Open a web page
1419web_address = 'https://polisci.wustl.edu/faculty/specialization'
2530
2631# Get the attributes
2732my_a_tag = soup .find_all ('a' )[2 ]
33+ my_a_tag = re .sub (r'<[^>]+>' , '' , str (my_a_tag )) #remove tags
2834my_a_tag .attrs #Gives a dictionary with the attributes
2935my_a_tag .attrs .keys ()
3036my_a_tag ['alt' ]
3642mysection = soup .find_all ('div' )[0 ]
3743mysection .a #Gives the 'a' tag within the 'div' tag
3844mysection .find_all ('a' ) #Gives the list of all 'a' tags within the 'div' tag
45+ mysection .get_text ()
46+
3947
4048# Creating a tree of objects
4149
Original file line number Diff line number Diff line change @@ -9,6 +9,11 @@ import urllib2
99import random
1010import time
1111import os
12+ import re
13+
14+ #from nltk.util import clean_html
15+ #import urllib2
16+
1217
1318# Open a web page
1419web_address='https://polisci.wustl.edu/faculty/specialization'
@@ -22,12 +27,10 @@ soup.prettify()
2227soup.find_all('h3')
2328soup.find_all('a')
2429
25- # Get the script of a certain tag
26- mytitle=soup.find_all('h3')[0]
27- mytitle.script
2830
2931# Get the attributes
3032my_a_tag=soup.find_all('a')[2]
33+ my_a_tag = re.sub(r'<[^>]+>', '', str(my_a_tag)) #remove tags
3134my_a_tag.attrs #Gives a dictionary with the attributes
3235my_a_tag.attrs.keys()
3336my_a_tag['alt']
You can’t perform that action at this time.
0 commit comments