import pandas as pd
import re
import numpy as np
from IPython.display import display
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from matplotlib import rcParams
from functions import *
plt.rcParams.update({'axes.titlepad': 20, 'font.size': 12, 'axes.titlesize':20})
colors = [(0/255,107/255,164/255), (255/255, 128/255, 14/255), 'red', 'green']
titanic = pd.read_csv('./data/train.csv')
#remove columns with too many missing values
titanic.drop(['PassengerId','Name','Ticket','Cabin'], axis = 1, inplace = True)
titanic.dropna(axis=0, how='any', inplace = True)
Variables available:
for element in titanic.columns:
print("%s | " % element, end='')
DF snipet:
display(titanic.iloc[:3])
import seaborn as sns
sns.set_style('white')
sns.kdeplot(titanic['Age'], shade=True)
plt.xlabel('Age')
sns.despine(left=True, bottom=True)
plt.show()
grid = sns.FacetGrid(titanic, col='Pclass', height=6)
grid.map(sns.kdeplot, 'Age', shade=True)
sns.despine(left=True, bottom=True)
plt.show()
grid = sns.FacetGrid(titanic, col='Survived', row='Pclass', height=4)
grid.map(sns.kdeplot, 'Age', shade=True)
sns.despine(left=True, bottom=True)
plt.show()
#we can add a condition just by setting the hue parameter
grid = sns.FacetGrid(titanic, col='Survived', row='Pclass', hue='Sex', height=3)
grid.map(sns.kdeplot, 'Age', shade=True)
grid.add_legend()
#grid = grid.map(sns.kdeplot, 'Age', shade=True).add_legend()
sns.despine(left=True, bottom=True)
plt.show()
sns.jointplot('Age', 'Fare', titanic.loc[titanic['Fare']<75,:], kind='kde', height=4)
sns.despine(left=True, bottom=True)
plt.show()