Excercise - 2

nursnaaz · nursnaaz · commit 495d816fecd7 · 2019-06-20T22:19:40.000+05:30
diff --git a/Chapter 1/Excercises/Excercise_2_remove_missing_data.ipynb b/Chapter 1/Excercises/Excercise_2_remove_missing_data.ipynb
@@ -0,0 +1,192 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1.Load the dataset into the pandas data frame. To do so, you first need to import the pandas library, and then, use the function pd.read_csv(), as shown below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "#reading the data into the dataframe into the object data\n",
+    "df = pd.read_csv('../Data/Banking_Marketing.csv', header=0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.Print the datatype of each column. To do so, use the dtypes attribute from pandas data frame."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "age               float64\n",
+       "job                object\n",
+       "marital            object\n",
+       "education          object\n",
+       "default            object\n",
+       "housing            object\n",
+       "loan               object\n",
+       "contact            object\n",
+       "month              object\n",
+       "day_of_week        object\n",
+       "duration          float64\n",
+       "campaign            int64\n",
+       "pdays               int64\n",
+       "previous            int64\n",
+       "poutcome           object\n",
+       "emp_var_rate      float64\n",
+       "cons_price_idx    float64\n",
+       "cons_conf_idx     float64\n",
+       "euribor3m         float64\n",
+       "nr_employed       float64\n",
+       "y                   int64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#finding the data types of each column\n",
+    "df.dtypes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.Print how many missing values on each column. To do so, use isna() function from pandas dataframe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "age               2\n",
+       "job               0\n",
+       "marital           0\n",
+       "education         0\n",
+       "default           0\n",
+       "housing           0\n",
+       "loan              0\n",
+       "contact           6\n",
+       "month             0\n",
+       "day_of_week       0\n",
+       "duration          7\n",
+       "campaign          0\n",
+       "pdays             0\n",
+       "previous          0\n",
+       "poutcome          0\n",
+       "emp_var_rate      0\n",
+       "cons_price_idx    0\n",
+       "cons_conf_idx     0\n",
+       "euribor3m         0\n",
+       "nr_employed       0\n",
+       "y                 0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.isna().sum()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4.Remove all the missing rows from the dataframe. To do so, we make use of the function dropna()."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "age               0\n",
+       "job               0\n",
+       "marital           0\n",
+       "education         0\n",
+       "default           0\n",
+       "housing           0\n",
+       "loan              0\n",
+       "contact           0\n",
+       "month             0\n",
+       "day_of_week       0\n",
+       "duration          0\n",
+       "campaign          0\n",
+       "pdays             0\n",
+       "previous          0\n",
+       "poutcome          0\n",
+       "emp_var_rate      0\n",
+       "cons_price_idx    0\n",
+       "cons_conf_idx     0\n",
+       "euribor3m         0\n",
+       "nr_employed       0\n",
+       "y                 0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#removing Null values\n",
+    "df = df.dropna()\n",
+    "#Let us check again if NA’s still available\n",
+    "df.isna().sum()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}