-
Notifications
You must be signed in to change notification settings - Fork 0
/
webscrewler.py
39 lines (32 loc) · 956 Bytes
/
webscrewler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import requests
import bs4
r = requests.get('https://www.geog.leeds.ac.uk/courses/computing/practicals/python/agent-framework/part9/data.html')
content = r.text
#instancize soup
soup = bs4.BeautifulSoup(content, 'html.parser')
#Getting elements by ID or other attributes:
table = soup.find(id="yxz")
#tds = soup.find_all(attrs={"class" : "y"})
#print(tds)
#Getting all elements of a specific tag:
trs = table.find_all('tr')
del trs[0]
print(trs)
cds = []
for tr in trs:
print(tr.text)
m = tr.text[0] + tr.text[1]
n = tr.text[2] + tr.text[3]
cd = (int(m),int(n))
cds.append(cd)
coords = []
for tr in trs:
coord = []
x = int(list(filter(str.isdigit,tr.find(attrs={"class" : "x"})))[0])
y = int(list(filter(str.isdigit,tr.find(attrs={"class" : "y"})))[0])
coord = (y,x)
coords.append(coord)
td_ys = soup.find_all(attrs={"class" : "y"})
td_xs = soup.find_all(attrs={"class" : "x"})
print(td_ys)
print(td_xs)