Pandas - Intro

import pandas as pd
pd.Series()
/Users/ram/homebrew/lib/python3.7/site-packages/ipykernel_launcher.py:1: DeprecationWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.
  """Entry point for launching an IPython kernel.
Series([], dtype: float64)
import numpy as np
arr = np.array([1,2,3,4,5])
s = pd.Series(arr)
print(s)
0    1
1    2
2    3
3    4
4    5
dtype: int64
arr = np.array([1,2,3,4,5])
s = pd.Series(arr, index=['A','B','C','D','E'])
print(s)
A    1
B    2
C    3
D    4
E    5
dtype: int64
arr = np.array([1,2,3,4,5])
s = pd.Series(arr, index=['A','B','C','D','E'], name = 'x')
print(s)
A    1
B    2
C    3
D    4
E    5
Name: x, dtype: int64
x = {'a':1, 'b':2, 'c':3}
s = pd.Series(x)
print(s)
a    1
b    2
c    3
dtype: int64
x = {'a':1, 'b':2, 'c':3}
s = pd.Series(x, index=['A'])
print(s)
A   NaN
dtype: float64
x = {'a':1, 'b':2, 'c':3}
s = pd.Series(x, index=['a', 'A'])
print(s)
a    1.0
A    NaN
dtype: float64
s = pd.Series(5, index=[1,2,3,4,5])
print(s)
1    5
2    5
3    5
4    5
5    5
dtype: int64
arr = np.array([1,2,3,4,5])
s = pd.Series(arr, index=['A','B','C','D','E'])
print(s)
A    1
B    2
C    3
D    4
E    5
dtype: int64
print(s[0])
1
print(s['A'])
1
print(s[3:])
D    4
E    5
dtype: int64
print(s[-3:])
C    3
D    4
E    5
dtype: int64
print(s[['A','B','C']])
A    1
B    2
C    3
dtype: int64
print(s[3:])
D    4
E    5
dtype: int64
print(s[['D','E']])
D    4
E    5
dtype: int64
print(s['F'])
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
~/homebrew/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
   4410             try:
-> 4411                 return libindex.get_value_at(s, key)
   4412             except IndexError:

pandas/_libs/index.pyx in pandas._libs.index.get_value_at()

pandas/_libs/index.pyx in pandas._libs.index.get_value_at()

pandas/_libs/util.pxd in pandas._libs.util.get_value_at()

pandas/_libs/util.pxd in pandas._libs.util.validate_indexer()

TypeError: 'str' object cannot be interpreted as an integer

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-26-61ea9712b813> in <module>
----> 1 print(s['F'])

~/homebrew/lib/python3.7/site-packages/pandas/core/series.py in __getitem__(self, key)
    869         key = com.apply_if_callable(key, self)
    870         try:
--> 871             result = self.index.get_value(self, key)
    872 
    873             if not is_scalar(result):

~/homebrew/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
   4417                     raise InvalidIndexError(key)
   4418                 else:
-> 4419                     raise e1
   4420             except Exception:
   4421                 raise e1

~/homebrew/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
   4403         k = self._convert_scalar_indexer(k, kind="getitem")
   4404         try:
-> 4405             return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
   4406         except KeyError as e1:
   4407             if len(self) > 0 and (self.holds_integer() or self.is_boolean()):

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'F'
## DataFrames
df = pd.DataFrame()
type(df)
pandas.core.frame.DataFrame
print(df)
Empty DataFrame
Columns: []
Index: []
df = pd.DataFrame([1,2,3,4,5])
print(df)
   0
0  1
1  2
2  3
3  4
4  5
df = pd.DataFrame(data=([1,2,3,4,5],[6,7,8,9,10]), index=['A','B'], columns=['c1','c2','c3','c4','c5'])
df
c1 c2 c3 c4 c5
A 1 2 3 4 5
B 6 7 8 9 10
data = [['Bhavya',10],['Manasa',12],['Nandhini',13],['Keerthi',14]]
df = pd.DataFrame(data,columns=['Name','level'])
print(df)
       Name  level
0    Bhavya     10
1    Manasa     12
2  Nandhini     13
3   Keerthi     14
data = [['Bhavya',10],['Manasa',12],['Nandhini',13],['Keerthi',14]]
df = pd.DataFrame(data,columns=['Name','level'], dtype='float')
print(df)
       Name  level
0    Bhavya   10.0
1    Manasa   12.0
2  Nandhini   13.0
3   Keerthi   14.0
data = {'Name': ['Bhavya','Manasa','Nandhini','Keerthi'], 'level':[10,12,13,14]}
df = pd.DataFrame(data)
print(df)
       Name  level
0    Bhavya     10
1    Manasa     12
2  Nandhini     13
3   Keerthi     14
data = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
  'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(data)
print(df)
   one  two
a  1.0    1
b  2.0    2
c  3.0    3
d  NaN    4
df['one']
a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64
df['one']['c']
3.0
df['one'][2]
3.0
data = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
  'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(data)
df['three'] = [10,12,13,14]
print(df)
   one  two  three
a  1.0    1     10
b  2.0    2     12
c  3.0    3     13
d  NaN    4     14
del df['three']
print(df)
   one  two
a  1.0    1
b  2.0    2
c  3.0    3
d  NaN    4
df.pop('one')
a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64
print(df)
   two
a    1
b    2
c    3
d    4
data = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
  'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(data)
df['three'] = [10,12,13,14]
print(df)
   one  two  three
a  1.0    1     10
b  2.0    2     12
c  3.0    3     13
d  NaN    4     14
df.iloc[1]
one       2.0
two       2.0
three    12.0
Name: b, dtype: float64
df.loc['a']
one       1.0
two       1.0
three    10.0
Name: a, dtype: float64
df['one']
a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64
df.loc['a']
one       1.0
two       1.0
three    10.0
Name: a, dtype: float64
df.iloc[0]
one       1.0
two       1.0
three    10.0
Name: a, dtype: float64
df
one two three
a 1.0 1 10
b 2.0 2 12
c 3.0 3 13
d NaN 4 14
df2 = pd.DataFrame([[5,6,7]],columns=['one','two','three'])
df.append(df2, ignore_index='')