Skip to content

Commit 3d84bb0

Browse files
committed
Activity
1 parent 03b0d25 commit 3d84bb0

File tree

8 files changed

+41861
-0
lines changed

8 files changed

+41861
-0
lines changed

Chapter 3/.DS_Store

0 Bytes
Binary file not shown.

Chapter 3/Activity/.DS_Store

6 KB
Binary file not shown.
6 KB
Binary file not shown.
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#!/usr/bin/env python
2+
# coding: utf-8
3+
# Author : Mohamed Noordeen
4+
# Date : 22-04-2019
5+
6+
7+
#import Library
8+
import pandas as pd
9+
10+
11+
# 1.Load the Data
12+
13+
14+
df = pd.read_csv("Wholesale customers data.csv")
15+
16+
17+
# 2.Understand the Data Features
18+
19+
20+
21+
#Finding number of rows and columns
22+
print("Number of rows and columns : ",df.shape)
23+
24+
25+
26+
#Basic Information about all the columns
27+
print("Basic Information about all the column : ")
28+
print(df.info())
29+
30+
31+
32+
#Basic Statistics about all the columns
33+
print("Basic Statistics about all the column : ")
34+
print(df.describe().transpose())
35+
36+
37+
# 3.Check for NULL values and their datatypes
38+
39+
40+
41+
#checking for any null in each column
42+
null_ = df.isna().any()
43+
print(null_)
44+
45+
46+
47+
#findind datatypes of each column
48+
dtypes = df.dtypes
49+
print(dtypes)
50+
51+
52+
53+
#Combining both null and datatypes of each column
54+
info = pd.concat([null_,dtypes],axis = 1,keys = ['Null','type'])
55+
print(info)
56+
57+
58+
# 4.Remove the missing values (if any)
59+
#
60+
# Since there are no missing values in the data set we have nothing to handle
61+
62+
63+
64+
#finding the data types of each column and checking for null
65+
null_ = df.isna().any()
66+
dtypes = df.dtypes
67+
info = pd.concat([null_,dtypes],axis = 1,keys = ['Null','type'])
68+
print(info)
69+
70+
71+
# 5.Perform Feature Scaling
72+
73+
from sklearn import preprocessing
74+
75+
std_scale = preprocessing.StandardScaler().fit(df)
76+
df_std = pd.DataFrame(std_scale.transform(df),columns=df.columns)
77+
print("Standardised data : \n",df_std)
78+

0 commit comments

Comments
 (0)