Thsi Post have the complete syntax and code wise details of Pandas two Major concept Series and Data Frame.
Now Lets Start-
Now Lets Start-
# Pandas Series
#Series is a one-dimensional labeled array capable of
#holding data of any type (integer, string, float, python objects, etc.).
# The axis labels are collectively called index
In [1]:
1 import pandas as pd
2 import numpy as np
In [2]:
# A pandas Series can be created using the following constructor −
# pandas.Series( data, index, dtype, copy)
3 df = pd.Series()
4 df
Out[2]:
Series([], dtype: float64)
In [3]:
# A series can be created using various inputs like −
# Array
# Dict
# Scalar value or constant
In [4]:
# Create a Series from ndarray
data = np.array(['D','I','Z','S','W','E','B'])
s = pd.Series(data)
s
Out[4]:
0 D 1 I 2 Z 3 S 4 W 5 E 6 B dtype: object
In [6]:
# Create a Series from ndarray
data = np.array(['D','I','Z','S','W','E','B'])
s = pd.Series(data, index =['001','002','003','004','005','006','007'])
s
Out[6]:
001 D 002 I 003 Z 004 S 005 W 006 E 007 B dtype: object
In [7]:
# Create a Series from dict
data = {'D':1,'I':2,'Z':3,'S':4,'W':5.0,'E':6,'B':7.0}
s = pd.Series(data)
s
Out[7]:
D 1.0 I 2.0 Z 3.0 S 4.0 W 5.0 E 6.0 B 7.0 dtype: float64
In [8]:
# Create a Series from dict
data = {'D':1,'I':2,'Z':3,'S':4,'W':5.0,'E':6,'B':7.0}
s = pd.Series(data, index =['001','002','003','004','005','006','007'])
s
# output will be NaN becuase in case of dictionary
# if the values not match with index it gices NaN
Out[8]:
001 NaN 002 NaN 003 NaN 004 NaN 005 NaN 006 NaN 007 NaN dtype: float64
In [9]:
# Create a Series from dict
data = {'D':1,'I':2,'Z':3,'S':4,'W':5.0,'E':6,'B':7.0}
s = pd.Series(data, index =['D','002','Z','004','W','006','B'])
s
Out[9]:
D 1.0 002 NaN Z 3.0 004 NaN W 5.0 006 NaN B 7.0 dtype: float64
In [12]:
# Create a Series from Scalar
s = pd.Series(3, index = ['1','2','3','4','5','6'])
s
Out[12]:
1 3 2 3 3 3 4 3 5 3 6 3 dtype: int64
In [13]:
# Create a Series from Scalar without index
s = pd.Series(3)
s
Out[13]:
0 3 dtype: int64
In [14]:
# Accessing Data from Series with Position
s = pd.Series([1,2,3,4,5,6,7],index = ['D','I','Z','S','W','E','B'])
#retrieve the first three element
print s[:3]
File "<ipython-input-14-fbdfbded2fb7>", line 2 s = pd.Series([1,2,3,4,5,,6,7],index = ['D','I','Z','S','W','E','B']) ^ SyntaxError: invalid syntax
In [16]:
# Accessing Data from Series with Position
s = pd.Series([1,2,3,4,5,6,7],index = ['D','I','Z','S','W','E','B'])
# Retrieve the first three element
print(s[:3])
D 1 I 2 Z 3 dtype: int64
In [17]:
# Retrieve the last three elements.
print(s[-3:])
W 5 E 6 B 7 dtype: int64
In [18]:
# Retrieve Data Using Label (Index)
print(s['D'])
In [19]:
# Retrieve multiple elements using a list of index label values.
print(s[['D','Z','S','W']])
D 1 Z 3 S 4 W 5 dtype: int64
In [20]:
# If a label is not contained, an exception is raised.
print(s[['D','Z','S','W','Chauhan']])
D 1.0 Z 3.0 S 4.0 W 5.0 Chauhan NaN dtype: float64
c:\users\hp\appdata\local\programs\python\python37-32\lib\site-packages\pandas\core\series.py:1152: FutureWarning: Passing list-likes to .loc or [] with any missing label will raise KeyError in the future, you can use .reindex() as an alternative. See the documentation here: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike return self.loc[key]
In [ ]:
# DataFrame
# Defination-A Data frame is a two-dimensional data structure,
# i.e., data is aligned in a tabular fashion in rows and columns
# Features of DataFrame
# Potentially columns are of different types
# Size – Mutable
# Labeled axes (rows and columns)
# Can Perform Arithmetic operations on rows and columns
# A pandas DataFrame can be created using the following constructor −
# pandas.DataFrame( data, index, columns, dtype, copy)
In [21]:
# How to create the Pandas DataFrame?
In [22]:
# Their are following methods to create the Pandas DataFrame.
# Lists
# dict
# Series
# Numpy ndarrays
# Another DataFrame
# Now lets us start the learning this with
In [23]:
import pandas as pd
import numpy as np
# define the dataframe object
df = pd.DataFrame()
# lets print this
df
Out[23]:
In [24]:
print(df)
Empty DataFrame Columns: [] Index: []
In [27]:
1 import pandas as pd
2 import numpy as np
3
4 # define the dataframe object
5 data = [1,2,3,4,5,6,7]
6 df = pd.DataFrame(data)
7
8 # lets print this
9 print(df)
0 0 1 1 2 2 3 3 4 4 5 5 6 6 7
In [28]:
1 df
Out[28]:
0
0 1
1 2
2 3
3 4
4 5
5 6
6 7
In [29]:
1 import pandas as pd
2 import numpy as np
3
4 # define the dataframe object
5 data = [['Sawan',26],['Sandeep',25],['Shiv',24]]
6 df = pd.DataFrame(data,columns=['Name','Age'])
7
8 # lets print this
9 print(df)
Name Age 0 Sawan 26 1 Sandeep 25 2 Shiv 24
In [31]:
1 import pandas as pd
2 import numpy as np
3
4 # define the dataframe object
5 data = [['Sawan',26],['Sandeep',25],['Shiv',24]]
6 df = pd.DataFrame(data,columns=['Name','Age'], dtype=int)
7
8 # lets print this
9 print(df)
Name Age 0 Sawan 26 1 Sandeep 25 2 Shiv 24
In [36]:
1 # Create a DataFrame from Dict of ndarrays / Lists
2 import pandas as pd
3 import numpy as np
4
5 # define the dataframe object
6 data = {'Name':['Dizsweb','Sawan','Chauhan'] ,'Age' :[ 123, 26, 35]}
7 df = pd.DataFrame(data)
8
9 # lets print this
10 print(df)
Name Age 0 Dizsweb 123 1 Sawan 26 2 Chauhan 35
In [37]:
1 # Create a DataFrame from Dict of ndarrays / Lists
2 import pandas as pd
3 import numpy as np
4
5 # define the dataframe object
6 data = {'Name':['Dizsweb','Sawan','Chauhan'] ,'Age' :[ 123, 26, 35]}
7 df = pd.DataFrame(data, index =['R1','R2','R3'])
8
9 # lets print this
10 print(df)
Name Age R1 Dizsweb 123 R2 Sawan 26 R3 Chauhan 35
In [39]:
1 # Create a DataFrame from List of Dicts
2 import pandas as pd
3 import numpy as np
4 data = [{'a':3,'b':4,'c':8},{'a':5,'d':6,'f':9},{'b':3,'c':8,'d':9}]
5 df = pd.DataFrame(data)
6 print(df)
a b c d f 0 3.0 4.0 8.0 NaN NaN 1 5.0 NaN NaN 6.0 9.0 2 NaN 3.0 8.0 9.0 NaN
In [44]:
1 # Creation of two Dataframe from the same data
2 import pandas as pd
3 data = [{'a': 1, 'b': 2},{'a': 5, 'b': 10, 'c': 20}]
4
5 #With two column indices, values same as dictionary keys
6 df1 = pd.DataFrame(data, index=['first', 'second'], columns=['a', 'b'])
7
8 #With two column indices with one index with other name
9 df2 = pd.DataFrame(data, index=['first', 'second'], columns=['a', 'b1'])
10 print(df1)
11
12 print(df2)
a b first 1 2 second 5 10 a b1 first 1 NaN second 5 NaN
In [47]:
1 # Create a DataFrame from Dict of Series
2 d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
3 'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
4
5 df = pd.DataFrame(d)
6 print(df)
7
one two a 1.0 1 b 2.0 2 c 3.0 3 d NaN 4
In [48]:
1 # perticular column Selection
2 print(df['one'])
a 1.0 b 2.0 c 3.0 d NaN Name: one, dtype: float64
In [50]:
1 # Column Addition
2 import pandas as pd
3
4 d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
5 'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
6
7 df = pd.DataFrame(d)
8
9 # Adding a new column to an existing DataFrame object with column label by passing new series
10
11 print ("Adding a new column by passing as Series:")
12 df['three']=pd.Series([10,20,30],index=['a','b','c'])
13 print(df)
14
15 print("Adding a new column using the existing columns in DataFrame:")
16 df['four']=df['one']+df['three']
17
18 print(df)
Adding a new column by passing as Series: one two three a 1.0 1 10.0 b 2.0 2 20.0 c 3.0 3 30.0 d NaN 4 NaN Adding a new column using the existing columns in DataFrame: one two three four a 1.0 1 10.0 11.0 b 2.0 2 20.0 22.0 c 3.0 3 30.0 33.0 d NaN 4 NaN NaN
In [51]:
1 # Column Deletion
2 # Using the previous DataFrame, we will delete a column
3 # using del function
4 import pandas as pd
5
6 d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
7 'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']),
8 'three' : pd.Series([10,20,30], index=['a','b','c'])}
9
10 df = pd.DataFrame(d)
11 print("Our dataframe is:")
12 print(df)
13
14 # using del function
15 print("Deleting the first column using DEL function:")
16 del(df['one'])
17 print(df)
18
19 # using pop function
20 print("Deleting another column using POP function:")
21 df.pop('two')
22 print(df)
Our dataframe is: one two three a 1.0 1 10.0 b 2.0 2 20.0 c 3.0 3 30.0 d NaN 4 NaN Deleting the first column using DEL function: two three a 1 10.0 b 2 20.0 c 3 30.0 d 4 NaN Deleting another column using POP function: three a 10.0 b 20.0 c 30.0 d NaN
In [55]:
1 # Row Selection, Addition, and Deletion with loc()
2 d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
3 'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
4
5 df = pd.DataFrame(d, dtype=int)
6 print(df.loc['b'])
one 2.0 two 2.0 Name: b, dtype: float64
In [57]:
1 # Row Selection, Addition, and Deletion with iloc()
2 d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
3 'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
4
5 df = pd.DataFrame(d, dtype=int)
6 print(df.iloc[2])
one 3.0 two 3.0 Name: c, dtype: float64
In [58]:
1 # Slice Rows
2 import pandas as pd
3
4 d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
5 'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
6
7 df = pd.DataFrame(d)
8 print(df[2:4])
one two c 3.0 3 d NaN 4
In [63]:
1 # Addition of Rows using append funtion
2
3 import pandas as pd
4
5 df = pd.DataFrame([[1, 2], [3, 4]], columns = ['a','b'])
6 df2 = pd.DataFrame([[5, 6], [7, 8]], columns = ['a','b'])
7
8 df = df.append(df2)
9 print(df)
a b 0 1 2 1 3 4 0 5 6 1 7 8
In [64]:
1 # Deletion of Rows
2
3 import pandas as pd
4
5 df = pd.DataFrame([[1, 2], [3, 4]], columns = ['a','b'])
6 df2 = pd.DataFrame([[5, 6], [7, 8]], columns = ['a','b'])
7
8 df = df.append(df2)
9
10 # Drop rows with label 0
11 df = df.drop(0)
12
13 print(df)
a b 1 3 4 1 7 8
In [65]:
1 # Python Pandas - Panel
2 # A panel is a 3D container of data.
3 # The term Panel data is derived from econometrics and
4 # is partially responsible for the name pandas − pan(el)-da(ta)-s.
5
6 # The names for the 3 axes are intended to give
7 # some semantic meaning to describing operations
8 # involving panel data. They are −
9
10 # items − axis 0, each item corresponds to a DataFrame contained inside.
11 # major_axis − axis 1, it is the index (rows) of each
of the DataFrames.
12
# minor_axis − axis 2, it is the columns of each of the DataFrames.
13
14
# pandas.Panel()
15
# A Panel can be created using the following constructor −
16
# pandas.Panel(data, items, major_axis, minor_axis, dtype, copy)
17
# This class is removed from pandas now