-
Notifications
You must be signed in to change notification settings - Fork 0
/
population.py
105 lines (94 loc) · 3.73 KB
/
population.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# population data is taken from the following site - Refer https://www.worldometers.info/world-population/population-by-country/
###### NECESSARY IMPORTS ######
import requests
import pandas as pd
import streamlit as st
class Population_Data:
@staticmethod
@st.cache(show_spinner = False)
def population_list():
url = 'https://www.worldometers.info/world-population/population-by-country/'
page = requests.get(url)
#s = page.status_code
tables = pd.read_html(page.text)
country_population_data = tables[0]
country_population_data.drop(['#'], axis = 1, inplace = True)
country_population_data.drop(country_population_data.loc[:, 'Yearly Change': 'World Share'].columns, axis = 1, inplace = True)
country_population_data = country_population_data.rename(columns={'Country (or dependency)': 'Country', 'Population (2020)': 'Population'})
# adding unnecessary countries
country_list = [
'American Samoa',
'Anguilla',
'Aruba',
'Bermuda',
'British Virgin Islands',
'Caribbean Netherlands',
'Cayman Islands',
'Channel Islands',
'Cook Islands',
'Curaçao',
'DR Congo',
'Faeroe Islands',
'Falkland Islands',
'French Guiana',
'French Polynesia',
'Gibraltar',
'Greenland',
'Guadeloupe',
'Guam',
'Hong Kong',
'Isle of Man',
'Macao',
'Martinique',
'Mayotte',
'Montserrat',
'Myanmar',
'Nauru',
'New Caledonia',
'Niue',
'North Korea',
'Northern Mariana Islands',
'Puerto Rico',
'Réunion',
'Saint Barthelemy',
'Saint Helena',
'Saint Martin',
'Saint Pierre & Miquelon',
'Sint Maarten',
'Tokelau',
'Tonga',
'Turkmenistan',
'Turks and Caicos',
'Tuvalu',
'U.S. Virgin Islands',
'Wallis & Futuna',
'Western Sahara'
]
# loop to remove the above mentioned countries
i = 0
while i < len(country_list):
country_population_data = country_population_data[(country_population_data.Country != country_list[i])]
i += 1
# renaming countries
country_population_data["Country"] = country_population_data["Country"].replace(
{"Côte d'Ivoire" : "Cote d'Ivoire",
"South Korea" : "Korea (South)",
"State of Palestine" : "West Bank and Gaza",
"St. Vincent & Grenadines" : "Saint Vincent and the Grenadines",
"US" : "United States",
"Congo" : "Congo (Brazzaville)",
"Czech Republic (Czechia)" : "Czechia",
"Saint Kitts & Nevis" : "Saint Kitts and Nevis",
"Sao Tome & Principe" : "Sao Tome and Principe"}
)
# adding missing countries
dict = {'Country': ['Burma', 'Kosovo', 'Congo (Kinshasa)'],
'Population': [54409800, 1767881, 86790567]
}
new_countries = pd.DataFrame(dict)
country_population_data = pd.concat([country_population_data, new_countries], ignore_index = True)
country_population_data.sort_values(by = ['Country'], inplace=True)
country_population_data = country_population_data.reset_index(drop=True)
# selecting only population values
country_population_data = country_population_data['Population']
return country_population_data