In this post, I will explain the Python data Analysis Concept with AI Question with Live Data and Code and Data.
Topic Main Points
1- Python
2- Pandas
3- Numpy
4- MatplotLib
5- PO of Industrial Organization.
6- AI of Industrial Purchase Question of a perticular Session.
Now lets Start, I will explain the AI Question which could be raised against a perticular data set PO against a perticular session.
I tried top explain all the Topics Sequenctillay, what i bignner Data Scientiest need to know.
Step 1:- Creating the first Data Frame of Main PO Details
# query Master Reading
Pur_order_mast = "SELECT
URN_NO, convert(nvarchar,pur_order_date,103)as Date,
Pur_Order_No as [Order_No],
supp_code as [Sup_Code]
from purchase_order_mast
where (PO_Status='Approved' or PO_Status='Not Approved')
and co_code like '%a%'"
Pur_df_1 = pd.read_sql(Pur_order_mast, conn)
# Testing of Data
Pur_df_1.head(5)
Step 2:- Create Second Data Frame for PO Details Part
# Query for Details Reading
Pur_order_det = "SELECT URN_NO,it_code,alt_qty as [Item_qty],alt_rate as Rate from purchase_order_det where (PO_Status='Approved' or PO_Status='Not Approved')and co_code like '%a%'"
Pur_df_2 = pd.read_sql(Pur_order_det, conn)
# Testing of Data
Pur_df_2.head(5)
Step 3:- Create Third Data Frame of Itemmaster
# Query for Itemmaster Reading
Itemmaster = "SELECT it_code,alt_name as [Item_Name] from itemmaster where (item_Status='Approved' or item_Status='Not Approved')and co_code like '%a%'"
Itemmaster = pd.read_sql(Itemmaster, conn)
Step 4:- Create Fourth Data Frame of Supplier Master
# Query for Itemmaster Reading
AccountMaster = "SELECT Sr_no,ac_name from acct_mast where (acc_Status='Approved' or acc_Status='Not Approved')and co_code like '%a%'"
AccountMaster = pd.read_sql(AccountMaster, conn)
#Testing of DATA
AccountMaster.head(5)
Step 5:- Merging of all dataframe object into single Data Frame
# Merging of data Sequentially and Making a useful data
# First we will create master Data Frame of from the available
# dataframe objects
# first we will merge details and master part
Final_order_details = pd.merge(Pur_df_2,Pur_df_1)
# Purchase order main data and Purchase order details data merging
Final_order_details.head(5)
# Now we will merge the data of accounts name to the current frame
Final_order_details = pd.merge(Final_order_details,AccountMaster, left_on ='Sup_Code', right_on='Sr_no')
# Now we will merge the item name to the data set
Final_order_details = pd.merge(Final_order_details,Itemmaster, on ='it_code')
Final_order_details.head(5)
Step 6:- Function to remove duplicate rows
# function to remove duplicates key from a DataFrame
def getDuplicateColumns(df):
'''
Get a list of duplicate columns.
It will iterate over all the columns in dataframe and find the columns whose content are duplicate.
:param df: Dataframe object
:return: List of columns whose contents are duplicates.
'''
duplicateColumnNames = set()
# Iterate over all the column in DataFrame
for x in range(df.shape[1]):
# Select column at xth index.
col = df.iloc[:, x]
# Iterate over all the columns in DataFrame from (x+1)th index till end
for y in range(x + 1, df.shape[1]):
# Select column at yth index.
otherCol = df.iloc[:, y]
# Check if two columns at x 7 y index are equal
if col.equals(otherCol):
duplicateColumnNames.add(df.columns.values[y])
return list(duplicateColumnNames)
#Final_order_details.drop (["it_code","Sup_Code","Sr_no","Item_Name_x"], axis = 1)
Final_order_details = Final_order_details.drop
(columns=getDuplicateColumns(Final_order_details))
# Finally print the Usable DataFrame and Test
#and check that only useful data is ready for anylysis.
Final_order_details.head(2)
Now from the available data we have to answer the following Question
# Question 1:- What is the Monthely Costing on Purchase.
Complete Code
# parameters
import pandas as pd
import pyodbc
from pandas import Series
DB = {'servername': '103.27.100.100',
'database': 'dizsweb'
}
# create the connection
conn = pyodbc.connect('DRIVER={SQL Server};
SERVER=' + DB['servername'] + ';
DATABASE=' + DB['database'] + ';
UID= dizsweb;PWD= dizsweb
;Trusted_Connection=no')
# function to remove duplicates key from a DataFrame
def getDuplicateColumns(df):
'''
Get a list of
duplicate columns.
It will iterate
over all the columns in dataframe and find the columns whose content are
duplicate.
:param df:
Dataframe object
:return: List of
columns whose contents are duplicates.
'''
duplicateColumnNames = set()
for x in
range(df.shape[1]):
# Select
column at xth index.
col =
df.iloc[:, x]
# Iterate
over all the columns in DataFrame from (x+1)th index till end
for y in
range(x + 1, df.shape[1]):
otherCol
= df.iloc[:, y]
if
col.equals(otherCol):
duplicateColumnNames.add(df.columns.values[y])
return list(duplicateColumnNames)
Pur_order_mast = "SELECT URN_NO,pur_order_date as
Date,Pur_Order_No as [Order_No],supp_code as [Sup_Code] from
purchase_order_mast where
(PO_Status='Approved' or PO_Status='Not Approved') and co_code like '%a%'"
Pur_df_1 = pd.read_sql(Pur_order_mast, conn)
Pur_df_1.head(5)
Pur_order_det = "SELECT URN_NO,it_code,alt_qty as
[Item_qty],alt_rate as Rate from purchase_order_det where (PO_Status='Approved' or PO_Status='Not
Approved')and co_code like '%a%'"
Pur_df_2 = pd.read_sql(Pur_order_det, conn)
Pur_df_2.head(5)
Itemmaster = "SELECT it_code,alt_name as [Item_Name]
from itemmaster where
(item_Status='Approved' or item_Status='Not Approved')and co_code like
'%a%'"
Itemmaster = pd.read_sql(Itemmaster, conn)
Itemmaster.head(5)
AccountMaster = "SELECT Sr_no,ac_name from
acct_mast where (acc_Status='Approved'
or acc_Status='Not Approved')and co_code like '%a%'"
AccountMaster = pd.read_sql(AccountMaster, conn)
Final_order_details = pd.merge(Pur_df_2,Pur_df_1)
Final_order_details.head(5)
Final_order_details = pd.merge(Final_order_details,AccountMaster,
left_on ='Sup_Code', right_on='Sr_no')
Final_order_details =
pd.merge(Final_order_details,Itemmaster, on ='it_code')
Final_order_details['Amount'] = Final_order_details['Item_qty']*
Final_order_details['Rate']
Final_order_details.head(5)
#Final_order_details.drop(["it_code","Sup_Code","Sr_no","Item_Name_x"],
axis = 1)
Final_order_details = Final_order_details.drop(columns=getDuplicateColumns(Final_order_details))
Final_order_details.head(5)
Final_order_details.head(5)
Final_order_details.head(2)
Monthely_purchase_costing =
Final_order_details[['Date','Amount']]
Monthely_purchase_costing.info()
Monthely_purchase_costing['Date'] =
pd.to_datetime(Monthely_purchase_costing['Date'],format='%y-%m-%d')
Monthely_purchase_costing.head(8)
Monthely_purchase_costing.info()
Monthely_purchase_costing2 = Monthely_purchase_costing.groupby([pd.Grouper(key='Date',
freq='M')]).agg({'Amount':sum})
Monthely_purchase_costing2
Monthely_purchase_costing2.info()
Monthely_purchase_costing["Year"],Monthely_purchase_costing["Month"],Monthely_purchase_costing["Day"]
= Monthely_purchase_costing.Date.dt.year,Monthely_purchase_costing.Date.dt.month,Monthely_purchase_costing.Date.dt.day
Monthely_purchase_costing_OK=Monthely_purchase_costing
Monthely_purchase_costing_OK
Month_costing =
Monthely_purchase_costing_OK.groupby(['Month']).sum()
import matplotlib.dates as dates
%matplotlib inline
df=pd.DataFrame(
{ 'x':
Month_costing1.index,
'y1':
Month_costing1.Amount
})
plt.plot( 'x', 'y1', data=df, marker='o',
markerfacecolor='blue', markersize=1, color='skyblue', linewidth=1)
plt.legend()