|
| 1 | +#!/usr/bin/env python |
| 2 | +# coding: utf-8 |
| 3 | +# Author : Mohamed Noordeen |
| 4 | +# Date : 22-04-2019 |
| 5 | + |
| 6 | + |
| 7 | +#import Library |
| 8 | +import pandas as pd |
| 9 | + |
| 10 | + |
| 11 | +# 1.Load the Data |
| 12 | + |
| 13 | + |
| 14 | +df = pd.read_csv("Wholesale customers data.csv") |
| 15 | + |
| 16 | + |
| 17 | +# 2.Understand the Data Features |
| 18 | + |
| 19 | + |
| 20 | + |
| 21 | +#Finding number of rows and columns |
| 22 | +print("Number of rows and columns : ",df.shape) |
| 23 | + |
| 24 | + |
| 25 | + |
| 26 | +#Basic Information about all the columns |
| 27 | +print("Basic Information about all the column : ") |
| 28 | +print(df.info()) |
| 29 | + |
| 30 | + |
| 31 | + |
| 32 | +#Basic Statistics about all the columns |
| 33 | +print("Basic Statistics about all the column : ") |
| 34 | +print(df.describe().transpose()) |
| 35 | + |
| 36 | + |
| 37 | +# 3.Check for NULL values and their datatypes |
| 38 | + |
| 39 | + |
| 40 | + |
| 41 | +#checking for any null in each column |
| 42 | +null_ = df.isna().any() |
| 43 | +print(null_) |
| 44 | + |
| 45 | + |
| 46 | + |
| 47 | +#findind datatypes of each column |
| 48 | +dtypes = df.dtypes |
| 49 | +print(dtypes) |
| 50 | + |
| 51 | + |
| 52 | + |
| 53 | +#Combining both null and datatypes of each column |
| 54 | +info = pd.concat([null_,dtypes],axis = 1,keys = ['Null','type']) |
| 55 | +print(info) |
| 56 | + |
| 57 | + |
| 58 | +# 4.Remove the missing values (if any) |
| 59 | +# |
| 60 | +# Since there are no missing values in the data set we have nothing to handle |
| 61 | + |
| 62 | + |
| 63 | + |
| 64 | +#finding the data types of each column and checking for null |
| 65 | +null_ = df.isna().any() |
| 66 | +dtypes = df.dtypes |
| 67 | +info = pd.concat([null_,dtypes],axis = 1,keys = ['Null','type']) |
| 68 | +print(info) |
| 69 | + |
| 70 | + |
| 71 | +# 5.Perform Feature Scaling |
| 72 | + |
| 73 | +from sklearn import preprocessing |
| 74 | + |
| 75 | +std_scale = preprocessing.StandardScaler().fit(df) |
| 76 | +df_std = pd.DataFrame(std_scale.transform(df),columns=df.columns) |
| 77 | +print("Standardised data : \n",df_std) |
| 78 | + |
0 commit comments