Skip to content

Commit a1e72cb

Browse files
author
Michael Galarnyk
committed
Added the start of the machine learning from scratch folder (linear regression for now)
1 parent 79853b4 commit a1e72cb

14 files changed

+3697
-0
lines changed
Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"<h1 align=\"center\"> Linear Regression </h1>"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 2,
13+
"metadata": {
14+
"collapsed": false
15+
},
16+
"outputs": [],
17+
"source": [
18+
"import numpy as np \n",
19+
"import matplotlib.pyplot as plt\n",
20+
"from mpl_toolkits.mplot3d import Axes3D\n",
21+
"from matplotlib import cm\n",
22+
"# import warmUpExercise as wue\n",
23+
"#import computeCost as cc\n",
24+
"#import gradientDescent as gd"
25+
]
26+
},
27+
{
28+
"cell_type": "code",
29+
"execution_count": 14,
30+
"metadata": {
31+
"collapsed": true
32+
},
33+
"outputs": [],
34+
"source": [
35+
"data = np.loadtxt('ex1data1.txt', delimiter=\",\")\n",
36+
"X = data[:,0]\n",
37+
"y = data[:,1]\n",
38+
"m = len(y) # number of training examples"
39+
]
40+
},
41+
{
42+
"cell_type": "code",
43+
"execution_count": 15,
44+
"metadata": {
45+
"collapsed": false
46+
},
47+
"outputs": [
48+
{
49+
"data": {
50+
"text/plain": [
51+
"(97L,)"
52+
]
53+
},
54+
"execution_count": 15,
55+
"metadata": {},
56+
"output_type": "execute_result"
57+
}
58+
],
59+
"source": [
60+
"X.shape"
61+
]
62+
},
63+
{
64+
"cell_type": "code",
65+
"execution_count": 16,
66+
"metadata": {
67+
"collapsed": false
68+
},
69+
"outputs": [
70+
{
71+
"data": {
72+
"text/plain": [
73+
"(97L, 2L)"
74+
]
75+
},
76+
"execution_count": 16,
77+
"metadata": {},
78+
"output_type": "execute_result"
79+
}
80+
],
81+
"source": [
82+
"np.column_stack((np.ones((m,1)), X)).shape"
83+
]
84+
},
85+
{
86+
"cell_type": "markdown",
87+
"metadata": {},
88+
"source": [
89+
"Add a column of ones to x (theta<sub>0</sub>)"
90+
]
91+
},
92+
{
93+
"cell_type": "code",
94+
"execution_count": 17,
95+
"metadata": {
96+
"collapsed": false
97+
},
98+
"outputs": [],
99+
"source": [
100+
"X_padded = np.column_stack((np.ones((m,1)), X)) \n",
101+
"\n",
102+
"theta = np.zeros((2, 1)) # initialize fitting parameters"
103+
]
104+
},
105+
{
106+
"cell_type": "code",
107+
"execution_count": 26,
108+
"metadata": {
109+
"collapsed": false
110+
},
111+
"outputs": [
112+
{
113+
"data": {
114+
"text/plain": [
115+
"(97L,)"
116+
]
117+
},
118+
"execution_count": 26,
119+
"metadata": {},
120+
"output_type": "execute_result"
121+
}
122+
],
123+
"source": [
124+
"np.transpose(y).shape"
125+
]
126+
},
127+
{
128+
"cell_type": "code",
129+
"execution_count": 36,
130+
"metadata": {
131+
"collapsed": false
132+
},
133+
"outputs": [
134+
{
135+
"data": {
136+
"text/plain": [
137+
"(97L, 1L)"
138+
]
139+
},
140+
"execution_count": 36,
141+
"metadata": {},
142+
"output_type": "execute_result"
143+
}
144+
],
145+
"source": [
146+
"# (97L, 1L)\n",
147+
"np.power(( X_padded.dot(theta) - np.transpose([y]) ), 2)"
148+
]
149+
},
150+
{
151+
"cell_type": "code",
152+
"execution_count": 37,
153+
"metadata": {
154+
"collapsed": true
155+
},
156+
"outputs": [],
157+
"source": [
158+
"?np.sum()"
159+
]
160+
},
161+
{
162+
"cell_type": "code",
163+
"execution_count": null,
164+
"metadata": {
165+
"collapsed": true
166+
},
167+
"outputs": [],
168+
"source": [
169+
"def computeCost(X, y, theta):\n",
170+
"\n",
171+
" #COMPUTECOST Compute cost for linear regression\n",
172+
" # J = COMPUTECOST(X, y, theta) computes the cost of using theta as the\n",
173+
" # parameter for linear regression to fit the data points in X and y\n",
174+
"\n",
175+
" # Initialize some useful values\n",
176+
"\n",
177+
" m = len(y) # number of training examples\n",
178+
"\n",
179+
" # You need to return the following variables correctly \n",
180+
" J = 0\n",
181+
"\n",
182+
" # note that \n",
183+
"\n",
184+
" # theta is an (n+1)-dimensional vector \n",
185+
"\n",
186+
" # X is an m x (n+1)-dimensional matrix\n",
187+
"\n",
188+
" # y is an m-dimensional vector\n",
189+
"\n",
190+
" s = np.power(( X.dot(theta) - np.transpose([y]) ), 2)\n",
191+
"\n",
192+
" J = (1.0/(2*m)) * s.sum( axis = 0 )\n",
193+
"\n",
194+
" return J"
195+
]
196+
},
197+
{
198+
"cell_type": "code",
199+
"execution_count": null,
200+
"metadata": {
201+
"collapsed": true
202+
},
203+
"outputs": [],
204+
"source": [
205+
"def gradientDescent(X, y, theta, alpha, num_iters):\n",
206+
"\n",
207+
" # GRADIENTDESCENT Performs gradient descent to learn theta\n",
208+
"\n",
209+
" # theta = GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by \n",
210+
"\n",
211+
" # taking num_iters gradient steps with learning rate alpha\n",
212+
"\n",
213+
"\n",
214+
"\n",
215+
" # Initialize some useful values\n",
216+
"\n",
217+
" m = len(y) # number of training examples\n",
218+
"\n",
219+
" J_history = np.zeros((num_iters, 1))\n",
220+
"\n",
221+
"\n",
222+
"\n",
223+
" for i in xrange(num_iters):\n",
224+
"\n",
225+
"\n",
226+
"\n",
227+
" # ====================== YOUR CODE HERE ======================\n",
228+
"\n",
229+
" # Instructions: Perform a single gradient step on the parameter vector\n",
230+
"\n",
231+
" # theta. \n",
232+
"\n",
233+
" #\n",
234+
"\n",
235+
" # Hint: While debugging, it can be useful to print out the values\n",
236+
"\n",
237+
" # of the cost function (computeCost) and gradient here.\n",
238+
"\n",
239+
" #\n",
240+
"\n",
241+
" theta = theta - alpha*(1.0/m) * np.transpose(X).dot(X.dot(theta) - np.transpose([y]))\n",
242+
"\n",
243+
" # Save the cost J in every iteration \n",
244+
"\n",
245+
" import computeCost as cc\n",
246+
"\n",
247+
" J_history[i] = cc.computeCost(X, y, theta)\n",
248+
"\n",
249+
" return theta, J_history"
250+
]
251+
}
252+
],
253+
"metadata": {
254+
"anaconda-cloud": {},
255+
"kernelspec": {
256+
"display_name": "Python [conda root]",
257+
"language": "python",
258+
"name": "conda-root-py"
259+
},
260+
"language_info": {
261+
"codemirror_mode": {
262+
"name": "ipython",
263+
"version": 2
264+
},
265+
"file_extension": ".py",
266+
"mimetype": "text/x-python",
267+
"name": "python",
268+
"nbconvert_exporter": "python",
269+
"pygments_lexer": "ipython2",
270+
"version": "2.7.12"
271+
}
272+
},
273+
"nbformat": 4,
274+
"nbformat_minor": 1
275+
}

0 commit comments

Comments
 (0)