34.pandas missingData

Create a dataframe with nan values

dropna
drop rows that have null values
drop columns that have null values
dropna -- thresh -- drop rows or columns with atleast thresh NA values
fillna -- value
Fill NA values with mean of A column

import numpy as np
import pandas as pd
d = {'A':[1,2,np.nan],'B':[5,np.nan,np.nan],'C':[1,2,3]}
df = pd.DataFrame(d)
print(df)
     A    B  C
0  1.0  5.0  1
1  2.0  NaN  2
2  NaN  NaN  3

df.dropna()
A B C
0 1.0 5.0 1
df.dropna(axis=1)
C
0 1
1 2
2 3
df.dropna(thresh=2)
A B C
0 1.0 5.0 1
1 2.0 NaN 2
df.fillna(value=10)
A B C
0 1.0 5.0 1
1 2.0 10.0 2
2 10.0 10.0 3
df['A'].fillna(value=15)
0     1.0
1     2.0
2    15.0
Name: A, dtype: float64
df
A B C
0 1.0 5.0 1
1 2.0 NaN 2
2 NaN NaN 3
df['A'].fillna(value=df['A'].mean())
0    1.0
1    2.0
2    1.5
Name: A, dtype: float64