Skip to content

Commit 478cfbe

Browse files
Add files via upload
0 parents  commit 478cfbe

File tree

6 files changed

+63
-0
lines changed

6 files changed

+63
-0
lines changed

webscraping1.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import requests
2+
URL = "http://vulnweb.com"
3+
r = requests.get(URL)
4+
print(r.content)

webscraping2.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from bs4 import BeautifulSoup
2+
import requests
3+
url="http://vulnweb.com"
4+
r=requests.get(url)
5+
Soup=BeautifulSoup(r.content, 'html5lib')
6+
print(Soup.prettify())

webscraping3.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import requests
2+
3+
# Make a GET request
4+
5+
r = requests.get("http://www.vulnweb.com")
6+
7+
# print request object
8+
9+
print(r.url)
10+
11+
# print status code
12+
# status code 200 means success
13+
14+
print(r.status_code)

webscraping4.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
4+
#make a get request
5+
r = requests.get("http://www.vulnweb.com")
6+
7+
# parse html
8+
soup = BeautifulSoup(r.content, 'html.parser')
9+
10+
#get the title tag
11+
print(soup.title)
12+
13+
#name of the tag
14+
print(soup.title.name)
15+
16+
#get the name of parent tag
17+
print(soup.title.parent.name)

webscraping5.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
4+
r = requests.get("http://www.vulnweb.com")
5+
6+
soup = BeautifulSoup(r.content, 'html.parser')
7+
8+
for link in soup.find_all('a'):
9+
print(link.get('href'))
10+

webscraping6.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from bs4 import BeautifulSoup
2+
import requests
3+
4+
url="https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States"
5+
6+
result=requests.get(url)
7+
8+
doc = BeautifulSoup(result.content, 'html.parser')
9+
10+
result1 = doc.find_all(string = 'California')
11+
12+
print(result1)

0 commit comments

Comments
 (0)