import sqlite3
import pandas as pd
import re
import numpy as np
import requests
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from matplotlib import rcParams
import matplotlib.dates as mdates
from functions import *
plt.rcParams.update({'axes.titlepad': 20, 'font.size': 12, 'axes.titlesize':20})
colors = [(0/255,107/255,164/255), (255/255, 128/255, 14/255), 'red', 'green', '#9E80BA', '#8EDB8E', '#58517A']
Ncolors = 10
color_map = plt.cm.Blues_r(np.linspace(0.2, 0.5, Ncolors))
#color_map = plt.cm.tab20c_r(np.linspace(0.2, 0.5, Ncolors))
conn = sqlite3.connect('./data/original_factbook.db')
query = 'Select * from sqlite_master where type="table"'
df = pd.read_sql_query(query, conn)
display(df)
query = 'Select * from facts LIMIT 5'
df = pd.read_sql_query(query, conn)
display(df.iloc[:3,:-1])
query = '''Select MIN(population) as min,
MAX(population) as max,
MIN(population_growth) as min_growth,
MAX(population_growth) as max_growth from facts'''
pd.read_sql_query(query, conn)
query = 'Select * from facts where population=0'
df = pd.read_sql_query(query, conn)
display(df.iloc[:,:-2])
query = 'Select * from facts where population = (Select Max(population) from facts)'
df = pd.read_sql_query(query, conn)
display(df.iloc[:,:-2])
Order by population DESC to get the countries with highest population
query = 'Select name, population, population_growth, birth_rate, death_rate from facts where population>0 and population<(select MAX(population) from facts) order by population DESC'
df = pd.read_sql_query(query, conn)
display(df[:5])
Histograms for the different quantities:
fig = plt.figure(figsize=(12,12))
properties = ['population', 'population_growth', 'birth_rate', 'death_rate']
for idx, xx in enumerate(properties):
ax = fig.add_subplot(2,2, idx+1)
#drop rows with any entry as NaN
df.dropna(axis=0, how='any', inplace=True)
prop = df[xx]
if(xx == 'population'):
prop = np.log10(prop)
ax.hist(prop, bins=20)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.tick_params(left='off', bottom='off')
ax.set_xlabel(xx)
ax.set_ylabel('counts')
plt.show()
query = 'select name, population/area_land as pop_density from facts order by pop_density DESC'
df = pd.read_sql_query(query, conn)
fig = plt.figure(figsize=(7,7))
ax = fig.add_subplot(1,1,1)
df = df.dropna(axis=0, how='any')
df = df[df['pop_density']>0]
ax.hist(np.log10(df['pop_density']))
for key, spine in ax.spines.items():
spine.set_visible(False)
ax.tick_params(left='off', bottom='off')
ax.set_xlabel('$\log_{10}(\mathrm{pop\;density}[\mathrm{People}/\mathrm{km}^2])$')
plt.show()
print("Countries with the highest population density (N_people/km^2):")
print(df[0:10])