Search the Blog

Showing posts with label Python. Show all posts
Showing posts with label Python. Show all posts

Wednesday, October 16, 2019

Data Science - Pandas- Series and Dataframe

Thsi Post have the complete syntax and code wise details of Pandas two Major concept Series and Data Frame.

Now Lets Start-





Thsi Post have the complete syntax and code wise details of Pandas two Major concept Series and Data Frame.
Now Lets Start-
# Pandas Series
#Series is a one-dimensional labeled array capable of
#holding data of any type (integer, string, float, python objects, etc.).
# The axis labels are collectively called index
In [1]:
1 import pandas as pd
2 import numpy as np
In [2]:
# A pandas Series can be created using the following constructor −
# pandas.Series( data, index, dtype, copy)
3 df = pd.Series()
4 df
Out[2]:
Series([], dtype: float64)
In [3]:
# A series can be created using various inputs like −
# Array
# Dict
# Scalar value or constant
In [4]:
# Create a Series from ndarray
    data = np.array(['D','I','Z','S','W','E','B'])
    s = pd.Series(data)
    s
Out[4]:
0 D 1 I 2 Z 3 S 4 W 5 E 6 B dtype: object
In [6]:
# Create a Series from ndarray
    data = np.array(['D','I','Z','S','W','E','B'])
    s = pd.Series(data, index =['001','002','003','004','005','006','007'])
    s
Out[6]:
    001 D 002 I 003 Z 004 S 005 W 006 E 007 B dtype: object
In [7]:
# Create a Series from dict
    data = {'D':1,'I':2,'Z':3,'S':4,'W':5.0,'E':6,'B':7.0}
    s = pd.Series(data)
    s
Out[7]:
D 1.0 I 2.0 Z 3.0 S 4.0 W 5.0 E 6.0 B 7.0 dtype: float64
In [8]:
# Create a Series from dict
    data = {'D':1,'I':2,'Z':3,'S':4,'W':5.0,'E':6,'B':7.0}
    s = pd.Series(data, index =['001','002','003','004','005','006','007'])
    s
# output will be NaN becuase in case of dictionary
# if the values not match with index it gices NaN
Out[8]:
001 NaN 002 NaN 003 NaN 004 NaN 005 NaN 006 NaN 007 NaN dtype: float64
In [9]:
# Create a Series from dict
    data = {'D':1,'I':2,'Z':3,'S':4,'W':5.0,'E':6,'B':7.0}
    s = pd.Series(data, index =['D','002','Z','004','W','006','B'])
    s
Out[9]:
    D 1.0 002 NaN Z 3.0 004 NaN W 5.0 006 NaN B 7.0 dtype: float64
In [12]:
# Create a Series from Scalar
    s = pd.Series(3, index = ['1','2','3','4','5','6'])
    s
Out[12]:
1 3 2 3 3 3 4 3 5 3 6 3 dtype: int64
In [13]:
# Create a Series from Scalar without index
    s = pd.Series(3)
    s
Out[13]:
0 3 dtype: int64
In [14]:
# Accessing Data from Series with Position
    s = pd.Series([1,2,3,4,5,6,7],index = ['D','I','Z','S','W','E','B'])
#retrieve the first three element
    print s[:3]
File "<ipython-input-14-fbdfbded2fb7>", line 2 s = pd.Series([1,2,3,4,5,,6,7],index = ['D','I','Z','S','W','E','B']) ^ SyntaxError: invalid syntax
In [16]:
# Accessing Data from Series with Position
    s = pd.Series([1,2,3,4,5,6,7],index = ['D','I','Z','S','W','E','B'])
# Retrieve the first three element
    print(s[:3])
    D 1 I 2 Z 3 dtype: int64
In [17]:
# Retrieve the last three elements.
    print(s[-3:])
    W 5 E 6 B 7 dtype: int64
In [18]:
# Retrieve Data Using Label (Index)
    print(s['D'])
In [19]:
# Retrieve multiple elements using a list of index label values.
    print(s[['D','Z','S','W']])
    D 1 Z 3 S 4 W 5 dtype: int64
In [20]:
# If a label is not contained, an exception is raised.
    print(s[['D','Z','S','W','Chauhan']])
    D 1.0 Z 3.0 S 4.0 W 5.0 Chauhan NaN dtype: float64
    c:\users\hp\appdata\local\programs\python\python37-32\lib\site-packages\pandas\core\series.py:1152: FutureWarning: Passing list-likes to .loc or [] with any missing label will raise KeyError in the future, you can use .reindex() as an alternative. See the documentation here: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike return self.loc[key]
In [ ]:
# DataFrame
# Defination-A Data frame is a two-dimensional data structure,
# i.e., data is aligned in a tabular fashion in rows and columns ​
# Features of DataFrame
# Potentially columns are of different types
# Size – Mutable
# Labeled axes (rows and columns)
# Can Perform Arithmetic operations on rows and columns ​
# A pandas DataFrame can be created using the following constructor −
# pandas.DataFrame( data, index, columns, dtype, copy)

In [21]:
# How to create the Pandas DataFrame?
In [22]:
 ​
# Their are following methods to create the Pandas DataFrame.
# Lists
# dict
# Series
# Numpy ndarrays
# Another DataFrame
# Now lets us start the learning this with

In [23]:
    import pandas as pd
    import numpy as np​

# define the dataframe object
 ​    df = pd.DataFrame()​
# lets print this
    df
Out[23]:
In [24]:
    print(df)
Empty DataFrame Columns: [] Index: []
In [27]:
1 import pandas as pd
2 import numpy as np
3 ​
4 # define the dataframe object
5 data = [1,2,3,4,5,6,7]
6 df = pd.DataFrame(data)
7 ​
8 # lets print this
9 print(df)
0 0 1 1 2 2 3 3 4 4 5 5 6 6 7
In [28]:
1 df
Out[28]:
0
0 1
1 2
2 3
3 4
4 5
5 6
6 7
In [29]:
1 import pandas as pd
2 import numpy as np
3 ​
4 # define the dataframe object
5 data = [['Sawan',26],['Sandeep',25],['Shiv',24]]
6 df = pd.DataFrame(data,columns=['Name','Age'])
7 ​
8 # lets print this
9 print(df)
Name Age 0 Sawan 26 1 Sandeep 25 2 Shiv 24
In [31]:
1 import pandas as pd
2 import numpy as np
3 ​
4 # define the dataframe object
5 data = [['Sawan',26],['Sandeep',25],['Shiv',24]]
6 df = pd.DataFrame(data,columns=['Name','Age'], dtype=int)
7 ​
8 # lets print this
9 print(df)
Name Age 0 Sawan 26 1 Sandeep 25 2 Shiv 24
In [36]:
1 # Create a DataFrame from Dict of ndarrays / Lists
2 import pandas as pd
3 import numpy as np
4 ​
5 # define the dataframe object
6 data = {'Name':['Dizsweb','Sawan','Chauhan'] ,'Age' :[ 123, 26, 35]}
7 df = pd.DataFrame(data)
8 ​
9 # lets print this
10 print(df)
Name Age 0 Dizsweb 123 1 Sawan 26 2 Chauhan 35
In [37]:
1 # Create a DataFrame from Dict of ndarrays / Lists
2 import pandas as pd
3 import numpy as np
4 ​
5 # define the dataframe object
6 data = {'Name':['Dizsweb','Sawan','Chauhan'] ,'Age' :[ 123, 26, 35]}
7 df = pd.DataFrame(data, index =['R1','R2','R3'])
8 ​
9 # lets print this
10 print(df)
Name Age R1 Dizsweb 123 R2 Sawan 26 R3 Chauhan 35
In [39]:
1 # Create a DataFrame from List of Dicts
2 import pandas as pd
3 import numpy as np
4 data = [{'a':3,'b':4,'c':8},{'a':5,'d':6,'f':9},{'b':3,'c':8,'d':9}]
5 df = pd.DataFrame(data)
6 print(df)
a b c d f 0 3.0 4.0 8.0 NaN NaN 1 5.0 NaN NaN 6.0 9.0 2 NaN 3.0 8.0 9.0 NaN
In [44]:
1 # Creation of two Dataframe from the same data
2 import pandas as pd
3 data = [{'a': 1, 'b': 2},{'a': 5, 'b': 10, 'c': 20}]
4 ​
5 #With two column indices, values same as dictionary keys
6 df1 = pd.DataFrame(data, index=['first', 'second'], columns=['a', 'b'])
7 ​
8 #With two column indices with one index with other name
9 df2 = pd.DataFrame(data, index=['first', 'second'], columns=['a', 'b1'])
10 print(df1)
11 ​
12 print(df2)
a b first 1 2 second 5 10 a b1 first 1 NaN second 5 NaN
In [47]:
1 # Create a DataFrame from Dict of Series
2 d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
3 'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
4 ​
5 df = pd.DataFrame(d)
6 print(df)
7 ​
one two a 1.0 1 b 2.0 2 c 3.0 3 d NaN 4
In [48]:
1 # perticular column Selection
2 print(df['one'])
a 1.0 b 2.0 c 3.0 d NaN Name: one, dtype: float64
In [50]:
1 # Column Addition
2 import pandas as pd
3 ​
4 d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
5 'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
6 ​
7 df = pd.DataFrame(d)
8 ​
9 # Adding a new column to an existing DataFrame object with column label by passing new series
10 ​
11 print ("Adding a new column by passing as Series:")
12 df['three']=pd.Series([10,20,30],index=['a','b','c'])
13 print(df)
14 ​
15 print("Adding a new column using the existing columns in DataFrame:")
16 df['four']=df['one']+df['three']
17 ​
18 print(df)
Adding a new column by passing as Series: one two three a 1.0 1 10.0 b 2.0 2 20.0 c 3.0 3 30.0 d NaN 4 NaN Adding a new column using the existing columns in DataFrame: one two three four a 1.0 1 10.0 11.0 b 2.0 2 20.0 22.0 c 3.0 3 30.0 33.0 d NaN 4 NaN NaN
In [51]:
1 # Column Deletion
2 # Using the previous DataFrame, we will delete a column
3 # using del function
4 import pandas as pd
5 ​
6 d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
7 'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']),
8 'three' : pd.Series([10,20,30], index=['a','b','c'])}
9 ​
10 df = pd.DataFrame(d)
11 print("Our dataframe is:")
12 print(df)
13 ​
14 # using del function
15 print("Deleting the first column using DEL function:")
16 del(df['one'])
17 print(df)
18 ​
19 # using pop function
20 print("Deleting another column using POP function:")
21 df.pop('two')
22 print(df)
Our dataframe is: one two three a 1.0 1 10.0 b 2.0 2 20.0 c 3.0 3 30.0 d NaN 4 NaN Deleting the first column using DEL function: two three a 1 10.0 b 2 20.0 c 3 30.0 d 4 NaN Deleting another column using POP function: three a 10.0 b 20.0 c 30.0 d NaN
In [55]:
1 # Row Selection, Addition, and Deletion with loc()
2 d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
3 'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
4 ​
5 df = pd.DataFrame(d, dtype=int)
6 print(df.loc['b'])
one 2.0 two 2.0 Name: b, dtype: float64
In [57]:
1 # Row Selection, Addition, and Deletion with iloc()
2 d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
3 'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
4 ​
5 df = pd.DataFrame(d, dtype=int)
6 print(df.iloc[2])
one 3.0 two 3.0 Name: c, dtype: float64
In [58]:
1 # Slice Rows
2 import pandas as pd
3 ​
4 d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
5 'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
6 ​
7 df = pd.DataFrame(d)
8 print(df[2:4])
one two c 3.0 3 d NaN 4
In [63]:
1 # Addition of Rows using append funtion
2 ​
3 import pandas as pd
4 ​
5 df = pd.DataFrame([[1, 2], [3, 4]], columns = ['a','b'])
6 df2 = pd.DataFrame([[5, 6], [7, 8]], columns = ['a','b'])
7 ​
8 df = df.append(df2)
9 print(df)
a b 0 1 2 1 3 4 0 5 6 1 7 8
In [64]:
1 # Deletion of Rows
2 ​
3 import pandas as pd
4 ​
5 df = pd.DataFrame([[1, 2], [3, 4]], columns = ['a','b'])
6 df2 = pd.DataFrame([[5, 6], [7, 8]], columns = ['a','b'])
7 ​
8 df = df.append(df2)
9 ​
10 # Drop rows with label 0
11 df = df.drop(0)
12 ​
13 print(df)
a b 1 3 4 1 7 8
In [65]:
1 # Python Pandas - Panel
2 # A panel is a 3D container of data.
3 # The term Panel data is derived from econometrics and
4 # is partially responsible for the name pandas − pan(el)-da(ta)-s.
5 ​
6 # The names for the 3 axes are intended to give
7 # some semantic meaning to describing operations
8 # involving panel data. They are −
9 ​
10 # items − axis 0, each item corresponds to a DataFrame contained inside.
11 # major_axis − axis 1, it is the index (rows) of each
of the DataFrames.
12
#     minor_axis − axis 2, it is the columns of each of the DataFrames.
13

14
# pandas.Panel()
15
# A Panel can be created using the following constructor −
16
# pandas.Panel(data, items, major_axis, minor_axis, dtype, copy)
17
# This class is removed from pandas now

Wednesday, October 9, 2019

Indexes in Python

Indexing is a standard feature of most programming languages. Java, Python, C#, C++, and C all have syntax like E[i] whereby an item with index i can be reference in E (which might be a character string, an array of numbers etc). 
Python Indexes


Python goes further beyond, it is possible to refer to chunks  of a sequence instead of indexing items one at a time. This feature in python is called slicing a sequence. Other techniques, like split and join, make it easy to take apart strings and assemble lists of strings into a new strings . In the same way that hand tools allow people to cut, shape, reshape, and put together wood into furniture, slicing, split, and join operations form the “carpentry” of Python’s text processing tasks. Even more powerful methods, notabe regular expressions, could be likened to the power tools of text and string processing. This post only covers the basic techniques, leaving regular expressions and other more advance libraries of software as later topics to explore. Working with slices particularly in Python’s syntax for slices, takes some getting used to. With regular practice, it becomes easy to write expressions that take apart sequences in creative ways and use operators to assemble slices into a new sequences. For strings slicing is only the beginning. String methods provide ways to search, replace, trim and form new strings from sequences of strings.

Following three methods are used to get the data in python via indexing
  1    loc() Only for lables
  2    iloc()  only for integer
  3    ix()  for both label and and integer.


Pandas Indexing with jupyter complete tutorialk to underatnd the concept and learning

Pandas Indexing with jupyter complete tutorial to understnd the concept and learning of data science

Pandas Indexing with jupyter complete tutorial to understnd the concept and learning of data science  for beginner

Pandas Indexing with jupyter complete tutorial to understnd the concept and learning of data science  for beginner and fresher

Pythan index with jupyter complete tutorial with example





Tuesday, October 1, 2019

LPP Problem With Python and Jupyter

Linear Programmming (LP) is a widely used mathematical modeling technique developed to help the decisions makers in planning and decision - making according to resource allocation is concerned.
LPP Programming Concept and other documentation for starting learning  Data Science

Structure of LP Model are

1- Decision Variables (Activities)

    We need to evalute various alternative for getting the optimum solution of desired Objective Function.
    The values of certain varibles is not under decision-makers. Then such varibles is called Controllable.
   The values of certain varibles is under decision-makers. Then such varibles is called uncontrollable.

2- Objective function

   This is the funtion which have to either maximized or minimized.
   Otimize ( Minimize or Maximize) 
      Z = C1X1 + C2X2 + C3X3 + ...... + CnXn
     where C1, C2, C3, ..., Cn are constant 
     X1, X2, X3,  .... Xn are varibles.

3- Constraints

There are always certain limitations on the use of resourses, example like lobour, machinery, raw material, space, money , etc . the limit the degree to which objective can be achived. Such constraints must be expressed as linear equalities i terms of decision vatriables.  


Assumptions of an LP Model-

1- Certainty

In all LPP models is assumed that all model parameters contribute to make unit decision.

2- Additivity

   The value of the objective function and the totaln amount of each resourses used muct be eqaul to the sum of the respective indivdual and consumtpion of resourses by a unit of decision varible.

3- Linearity or proportionality

   The amount of each resourse used and ists contibution to the profit in objective function must be proportional to the values of each decision varibles

4- Divisiblity or Continuity

   The solution of each resourses values of decision variables  are allowed to assume continuous values.

Advantages of LPP-

1- Optimum uses of productive Resources.

2- Improve the Quality of decesion making.

3- Fesible and possible Solution. 

4- Identifing the bottlenecks.

5- Re- evalution of decesion by changeing the varibles.


Limitation of LPP-

1- All relationship are considered as Linear.

2- No gaurentee to have integer Solution.

3- No consideration of time and uncertainity.

4- Large - Scale problem solution is very complicated.

5- Parameteres is assumed as constant.

6- Deals with only single objective.


Application Ares of Linear Programming Problem(LPP)

1- Agriculture

2- Military Applications

3- Production Management

    1- Mix Product    

   2- Production Planning    

   3- Assembly Line Balancing    

   4- Trim Loss


4- Financial Management

    1- Portfolio Selection    

    2- Profit Management

5- Marketing Management

    1- Media Selection    

    2- Travellling Salesman Problem    

    3- Physical Distribution


6- Personal Management.

   1- Staffing Problem   

   2- Determination of Equitable Salaries   

   3- Job Evaluation and Selection



A linear programming problem may be defined as the problem for maximizing or minimizing a linear function subject to the linear constraints. The constraints could be in equalities or inequalities.

Here is a simple example: find numbers x1 and x2 that maximizes the sum x1 + x2 subject to the constraints(conditions)

(i)    X1 ≥ 0
(ii)   X2 ≥ 0
(ii)   X1 + 2X2  40
(iii)  4X1 + 2X2  120
(iv)  X1 + X2  1

The first two constraints,X1 ≥ 0 and X2 ≥ 0 are called nonnegativity constraints  and will always be postive in case of maximum and mininum problems becauase in real world if something exist then it could calculted and formulated.


The other constraints shown above are called the main constraints. The function that have to be maximized (or minimized) is called the objective function. Here, the objective function is X+ X2.




We usually denotes this function with Z.
    Z =  X1 +  X2

Lets understand the complete concept with Real Life Example

We want to find the maximum solution of this linear polymer to:
 Constraints are following:
   i       x ≥ 0,   
   ii      ≥ 3,     
   iii     2≤ 20 – x,     
   iv     4≥ 2– 8,
   V       ≤ 2− 10



# LPP Problem Sample Example 1
# LPP Problem Sample Example 1
import numpy as np
import matplotlib.pyplot as plt
% matplotlib inline

# Construct lines
# x > 0
x = np.linspace(0, 20, 2000)
# y >= 3
y1 = (x*0) + 3
# 2y <= 50 - x
y2 = (20-x)/2.0
# 4y >= 2x - 8 
y3 = (2*x-8)/4.0
# y <= 2x - 10 
y4 = 2 * x -10

# Make plot
plt.plot(x, y1, label=r'$y\geq3$')
plt.plot(x, y2, label=r'$2y\leq20-x$')
plt.plot(x, y3, label=r'$4y\geq 2x - 8$')
plt.plot(x, y4, label=r'$y\leq 2x-10$')
plt.xlim((0, 16))
plt.ylim((0, 11))
plt.xlabel(r'$x$')
plt.ylabel(r'$y$')

# Fill feasible region
y5 = np.minimum(y2, y4)
y6 = np.maximum(y1, y3)
plt.fill_between(x, y5, y6, where=y5>y6, color='grey', alpha=0.5)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)


Our solution lies somewhere in the grey feasible region in the graph below.
LPP Problem With Python and Jupyter with graph


It has been proven that the minima and maxima of linear programming problems are at the vertices of the feasible region. In this, there are 4 corners to our feasible region, So we can find the solutions for putting the value of this corner into the Objective Function each corner to find our maximum.
Four Courner X Y Cordinates Are:
Maximun 
Funtion             X           Y             Value
Z= 5x+ 4y                 8                  6                  64

 
Z= 5x+ 4y                 6.25             3                  43.25  

Z= 5x+ 4y                 10                3                  62                                  
Z= 5x+ 4y                 12                4                  76


So Last Cordinate ( 12 , 4) is giveing us the highest value.
So these are the  best for maximum. 

So Second Cordinate ( 6.25 , 3) is giveing us the lost value.
   iv     4y2x8
   v      
y2x5

Sunday, September 29, 2019

Data Analysis of Purchase Order of an Organization with AI

In this post, I will explain the  Python data Analysis Concept with AI Question with Live Data and Code and Data.

Topic Main Points


 1- Python
 2- Pandas
 3- Numpy
 4- MatplotLib
 5- PO of Industrial Organization.
 6- AI of Industrial Purchase Question of a perticular Session.

Now lets Start, I will explain the AI Question which could be raised against a perticular data set  PO against a perticular session.
  
I tried top explain  all the Topics Sequenctillay, what i bignner Data Scientiest need to know.

Step 1:- Creating the first Data Frame of  Main PO Details 

# query Master Reading



Pur_order_mast = "SELECT  
                               URN_NO,                                                                  convert(nvarchar,pur_order_date,103)as Date,
 Pur_Order_No as [Order_No],
supp_code as [Sup_Code] 
from purchase_order_mast  
where (PO_Status='Approved' or PO_Status='Not Approved')  
and co_code like '%a%'" 

Pur_df_1 = pd.read_sql(Pur_order_mast, conn)


# Testing of Data

Pur_df_1.head(5)
Step 2:- Create Second Data Frame for PO Details Part

# Query for Details Reading
Pur_order_det = "SELECT URN_NO,it_code,alt_qty as [Item_qty],alt_rate as Rate from purchase_order_det  where (PO_Status='Approved' or PO_Status='Not Approved')and co_code like '%a%'" 

Pur_df_2 = pd.read_sql(Pur_order_det, conn)

# Testing of Data
Pur_df_2.head(5)

Create Second Data Frame for PO Details Part Data Science

Step 3:- Create Third Data Frame of Itemmaster

# Query for Itemmaster Reading
Itemmaster = "SELECT it_code,alt_name as [Item_Name] from itemmaster  where (item_Status='Approved' or item_Status='Not Approved')and co_code like '%a%'" 
Itemmaster = pd.read_sql(Itemmaster, conn)


Create Third Data Frame of Itemmaster Data Science

Step 4:- Create Fourth Data Frame of Supplier Master
# Query for Itemmaster Reading
AccountMaster = "SELECT Sr_no,ac_name from acct_mast  where (acc_Status='Approved' or acc_Status='Not Approved')and co_code like '%a%'" 
AccountMaster = pd.read_sql(AccountMaster, conn)

#Testing of DATA
AccountMaster.head(5)

Create Fourth Data Frame of Supplier Master Data Science

Step 5:- Merging of all dataframe object into single Data Frame


# Merging of data Sequentially and  Making a useful data
# First we will create master Data Frame of from the available 
# dataframe objects
# first we will merge details and master part
Final_order_details = pd.merge(Pur_df_2,Pur_df_1)

# Purchase order main data and Purchase order details data merging
Final_order_details.head(5)
Pandas Merging of all dataframe object into single Data Frame

# Now we will merge the data of accounts name to the current frame

Final_order_details = pd.merge(Final_order_details,AccountMaster, left_on ='Sup_Code', right_on='Sr_no')

# Now we will merge the item name to the data set

Final_order_details = pd.merge(Final_order_details,Itemmaster, on ='it_code')

Final_order_details.head(5)

Step 6:- Function to remove duplicate rows 

# function to remove duplicates key from a DataFrame 
def getDuplicateColumns(df):
    '''
    Get a list of duplicate columns.
    It will iterate over all the columns in dataframe and find the columns whose content are duplicate.
    :param df: Dataframe object
    :return: List of columns whose contents are duplicates.
    '''
    duplicateColumnNames = set()
    # Iterate over all the column in DataFrame
    for x in range(df.shape[1]):
        # Select column at xth index.
        col = df.iloc[:, x]
        # Iterate over all the columns in DataFrame from (x+1)th index till end
        for y in range(x + 1, df.shape[1]):
            # Select column at yth index.
            otherCol = df.iloc[:, y]
            # Check if two columns at x 7 y index are equal
            if col.equals(otherCol):
                duplicateColumnNames.add(df.columns.values[y])
    return list(duplicateColumnNames)


#Final_order_details.drop (["it_code","Sup_Code","Sr_no","Item_Name_x"], axis = 1)
Final_order_details = Final_order_details.drop
(columns=getDuplicateColumns(Final_order_details))


# Finally print the Usable DataFrame and Test 
#and check that only useful data is ready for anylysis.
Final_order_details.head(2)
Finally print the Usable DataFrame and Test  Data Science


Now from the available data we have to answer the following Question

# Question 1:- What is the Monthely Costing on Purchase.


Complete Code 

# parameters
import pandas as pd
import pyodbc
from pandas import Series

DB = {'servername': '103.27.100.100',
      'database': 'dizsweb'
     }
# create the connection
conn = pyodbc.connect('DRIVER={SQL Server};
SERVER=' + DB['servername'] + ';
DATABASE=' + DB['database'] + ';
UID= dizsweb;PWD= dizsweb
;Trusted_Connection=no')

# function to remove duplicates key from a DataFrame
def getDuplicateColumns(df):
    '''
    Get a list of duplicate columns.
    It will iterate over all the columns in dataframe and find the columns whose content are duplicate.
    :param df: Dataframe object
    :return: List of columns whose contents are duplicates.
    '''
    duplicateColumnNames = set()
    for x in range(df.shape[1]):
        # Select column at xth index.
        col = df.iloc[:, x]
        # Iterate over all the columns in DataFrame from (x+1)th index till end
        for y in range(x + 1, df.shape[1]):
            otherCol = df.iloc[:, y]
            if col.equals(otherCol):
                duplicateColumnNames.add(df.columns.values[y])
    return list(duplicateColumnNames)
Pur_order_mast = "SELECT URN_NO,pur_order_date as Date,Pur_Order_No as [Order_No],supp_code as [Sup_Code] from purchase_order_mast  where (PO_Status='Approved' or PO_Status='Not Approved')  and co_code like '%a%'"
Pur_df_1 = pd.read_sql(Pur_order_mast, conn)
Pur_df_1.head(5)
Pur_order_det = "SELECT URN_NO,it_code,alt_qty as [Item_qty],alt_rate as Rate from purchase_order_det  where (PO_Status='Approved' or PO_Status='Not Approved')and co_code like '%a%'"
Pur_df_2 = pd.read_sql(Pur_order_det, conn)
Pur_df_2.head(5)
Itemmaster = "SELECT it_code,alt_name as [Item_Name] from itemmaster  where (item_Status='Approved' or item_Status='Not Approved')and co_code like '%a%'"
Itemmaster = pd.read_sql(Itemmaster, conn)
Itemmaster.head(5)
AccountMaster = "SELECT Sr_no,ac_name from acct_mast  where (acc_Status='Approved' or acc_Status='Not Approved')and co_code like '%a%'"
AccountMaster = pd.read_sql(AccountMaster, conn)
Final_order_details = pd.merge(Pur_df_2,Pur_df_1)
Final_order_details.head(5)
Final_order_details = pd.merge(Final_order_details,AccountMaster, left_on ='Sup_Code', right_on='Sr_no')
Final_order_details = pd.merge(Final_order_details,Itemmaster, on ='it_code')
Final_order_details['Amount'] =  Final_order_details['Item_qty']* Final_order_details['Rate']
Final_order_details.head(5)
#Final_order_details.drop(["it_code","Sup_Code","Sr_no","Item_Name_x"], axis = 1)
Final_order_details = Final_order_details.drop(columns=getDuplicateColumns(Final_order_details))
Final_order_details.head(5)
Final_order_details.head(5)
Final_order_details.head(2)
Monthely_purchase_costing = Final_order_details[['Date','Amount']]
Monthely_purchase_costing.info()
Monthely_purchase_costing['Date'] = pd.to_datetime(Monthely_purchase_costing['Date'],format='%y-%m-%d')
Monthely_purchase_costing.head(8)
Monthely_purchase_costing.info()
Monthely_purchase_costing2 = Monthely_purchase_costing.groupby([pd.Grouper(key='Date', freq='M')]).agg({'Amount':sum})
Monthely_purchase_costing2
Monthely_purchase_costing2.info()
Monthely_purchase_costing["Year"],Monthely_purchase_costing["Month"],Monthely_purchase_costing["Day"] = Monthely_purchase_costing.Date.dt.year,Monthely_purchase_costing.Date.dt.month,Monthely_purchase_costing.Date.dt.day
Monthely_purchase_costing_OK=Monthely_purchase_costing
Monthely_purchase_costing_OK
Month_costing = Monthely_purchase_costing_OK.groupby(['Month']).sum()
import matplotlib.dates as dates
%matplotlib inline
df=pd.DataFrame(
    { 'x': Month_costing1.index,
     'y1': Month_costing1.Amount
    })


plt.plot( 'x', 'y1', data=df, marker='o', markerfacecolor='blue', markersize=1, color='skyblue', linewidth=1)
plt.legend()



Translate