35.pandas operations

### Create a dataframe

head()
unique()
count()
len(  )
nunique()
value_counts()
sum()
applying the function
apply lamba functions
sort_values(  )
isnull()
pivot_table()
import numpy as np
import pandas as pd
df = pd.DataFrame({'col1':[1,2,3,4],
                   'col2':[444,555,666,444],
                   'col3':['abc','def','ghi','xyz']})
print(df)
   col1  col2 col3
0     1   444  abc
1     2   555  def
2     3   666  ghi
3     4   444  xyz

df.head()  # first 5 rows
col1 col2 col3
0 1 444 abc
1 2 555 def
2 3 666 ghi
3 4 444 xyz
df.head(3) 
col1 col2 col3
0 1 444 abc
1 2 555 def
2 3 666 ghi
df.tail(2)
col1 col2 col3
2 3 666 ghi
3 4 444 xyz
df
col1 col2 col3
0 1 444 abc
1 2 555 def
2 3 666 ghi
3 4 444 xyz
df['col2'].unique()
array([444, 555, 666])
df['col2'].count()
4
df.count()
col1    4
col2    4
col3    4
dtype: int64
df['col1'].nunique()
4
df['col2'].nunique()
3
df['col2'].value_counts()
444    2
555    1
666    1
Name: col2, dtype: int64
len(df)
4
len(df['col1'])
4
df
col1 col2 col3
0 1 444 abc
1 2 555 def
2 3 666 ghi
3 4 444 xyz
df['col2'].sort_values()
0    444
3    444
1    555
2    666
Name: col2, dtype: int64
df.isnull()
col1 col2 col3
0 False False False
1 False False False
2 False False False
3 False False False
df['col1'].sum()
10
df['col1'].mean()
2.5
def times2(x):
    return x*2
df['col1'].apply(times2)
0    2
1    4
2    6
3    8
Name: col1, dtype: int64
df['col1'].apply(lambda x:x*2)
0    2
1    4
2    6
3    8
Name: col1, dtype: int64
df.apply(len)
col1    4
col2    4
col3    4
dtype: int64
data = {'A':['foo','foo','foo','bar','bar','bar'],
        'B':['one','one','two','two','one','one'],
        'C':['x','y','x','y','x','y'],
        'D':[1,3,2,5,4,1]}

df = pd.DataFrame(data)
df
A B C D
0 foo one x 1
1 foo one y 3
2 foo two x 2
3 bar two y 5
4 bar one x 4
5 bar one y 1
df.pivot_table('D', index=['A','B'], columns=['C'])
C x y
A B
bar one 4.0 1.0
two NaN 5.0
foo one 1.0 3.0
two 2.0 NaN