|
33 | 33 | "url = \"https://dl.dropbox.com/s/lnly9gw8pb1xhir/overfitting.zip\"\n",
|
34 | 34 | "\n",
|
35 | 35 | "\n",
|
36 |
| - "results = requests.get(url)\n", |
37 |
| - "\n" |
| 36 | + "results = requests.get(url)" |
38 | 37 | ],
|
39 | 38 | "language": "python",
|
40 | 39 | "metadata": {},
|
|
47 | 46 | "input": [
|
48 | 47 | "import StringIO\n",
|
49 | 48 | "z = zipfile.ZipFile(StringIO.StringIO(results.content))\n",
|
50 |
| - "#z.extractall()\n", |
51 |
| - "\n" |
| 49 | + "# z.extractall()" |
52 | 50 | ],
|
53 | 51 | "language": "python",
|
54 | 52 | "metadata": {},
|
|
120 | 118 | "cell_type": "code",
|
121 | 119 | "collapsed": false,
|
122 | 120 | "input": [
|
123 |
| - "M = np.fromstring(d.read(), sep=\",\" )" |
| 121 | + "M = np.fromstring(d.read(), sep=\",\")" |
124 | 122 | ],
|
125 | 123 | "language": "python",
|
126 | 124 | "metadata": {},
|
|
234 | 232 | "cell_type": "code",
|
235 | 233 | "collapsed": false,
|
236 | 234 | "input": [
|
237 |
| - "ix_training = data[:,1] == 1\n", |
238 |
| - "ix_testing = data[:,1] == 0\n", |
| 235 | + "ix_training = data[:, 1] == 1\n", |
| 236 | + "ix_testing = data[:, 1] == 0\n", |
239 | 237 | "\n",
|
240 |
| - "training_data = data[ ix_training, 5: ]\n", |
241 |
| - "testing_data = data[ ix_testing, 5: ]\n", |
| 238 | + "training_data = data[ix_training, 5:]\n", |
| 239 | + "testing_data = data[ix_testing, 5:]\n", |
242 | 240 | "\n",
|
243 |
| - "training_labels = data[ ix_training, 2]\n", |
244 |
| - "testing_labels = data[ ix_testing, 2]\n", |
| 241 | + "training_labels = data[ix_training, 2]\n", |
| 242 | + "testing_labels = data[ix_testing, 2]\n", |
245 | 243 | "\n",
|
246 | 244 | "print \"training:\", training_data.shape, training_labels.shape\n",
|
247 | 245 | "print \"testing: \", testing_data.shape, testing_labels.shape"
|
|
280 | 278 | "cell_type": "code",
|
281 | 279 | "collapsed": false,
|
282 | 280 | "input": [
|
283 |
| - "figsize( 12, 4 )" |
| 281 | + "figsize(12, 4)" |
284 | 282 | ],
|
285 | 283 | "language": "python",
|
286 | 284 | "metadata": {},
|
|
291 | 289 | "cell_type": "code",
|
292 | 290 | "collapsed": false,
|
293 | 291 | "input": [
|
294 |
| - "hist( training_data.flatten() )\n", |
295 |
| - "print training_data.shape[0]*training_data.shape[1]" |
| 292 | + "hist(training_data.flatten())\n", |
| 293 | + "print training_data.shape[0] * training_data.shape[1]" |
296 | 294 | ],
|
297 | 295 | "language": "python",
|
298 | 296 | "metadata": {},
|
|
324 | 322 | "input": [
|
325 | 323 | "import pymc as pm\n",
|
326 | 324 | "\n",
|
327 |
| - "to_include = pm.Bernoulli( \"to_include\", 0.5, size= 200 )" |
| 325 | + "to_include = pm.Bernoulli(\"to_include\", 0.5, size=200)" |
328 | 326 | ],
|
329 | 327 | "language": "python",
|
330 | 328 | "metadata": {},
|
|
335 | 333 | "cell_type": "code",
|
336 | 334 | "collapsed": false,
|
337 | 335 | "input": [
|
338 |
| - "coef = pm.Uniform( \"coefs\", 0, 1, size = 200 )" |
| 336 | + "coef = pm.Uniform(\"coefs\", 0, 1, size=200)" |
339 | 337 | ],
|
340 | 338 | "language": "python",
|
341 | 339 | "metadata": {},
|
|
347 | 345 | "collapsed": false,
|
348 | 346 | "input": [
|
349 | 347 | "@pm.deterministic\n",
|
350 |
| - "def Z( coef = coef, to_include = to_include, data = training_data ):\n", |
351 |
| - " ym = np.dot( to_include*training_data, coef )\n", |
| 348 | + "def Z(coef=coef, to_include=to_include, data=training_data):\n", |
| 349 | + " ym = np.dot(to_include * training_data, coef)\n", |
352 | 350 | " return ym - ym.mean()"
|
353 | 351 | ],
|
354 | 352 | "language": "python",
|
|
361 | 359 | "collapsed": false,
|
362 | 360 | "input": [
|
363 | 361 | "@pm.deterministic\n",
|
364 |
| - "def T( z = Z ):\n", |
365 |
| - " return 0.45*(np.sign(z) + 1.1)" |
| 362 | + "def T(z=Z):\n", |
| 363 | + " return 0.45 * (np.sign(z) + 1.1)" |
366 | 364 | ],
|
367 | 365 | "language": "python",
|
368 | 366 | "metadata": {},
|
|
373 | 371 | "cell_type": "code",
|
374 | 372 | "collapsed": false,
|
375 | 373 | "input": [
|
376 |
| - "obs = pm.Bernoulli( \"obs\", T, value = training_labels, observed = True)\n", |
| 374 | + "obs = pm.Bernoulli(\"obs\", T, value=training_labels, observed=True)\n", |
377 | 375 | "\n",
|
378 |
| - "model = pm.Model( [to_include, coef, Z, T, obs] )\n", |
379 |
| - "map_ = pm.MAP( model )\n", |
| 376 | + "model = pm.Model([to_include, coef, Z, T, obs])\n", |
| 377 | + "map_ = pm.MAP(model)\n", |
380 | 378 | "map_.fit()"
|
381 | 379 | ],
|
382 | 380 | "language": "python",
|
|
396 | 394 | "cell_type": "code",
|
397 | 395 | "collapsed": false,
|
398 | 396 | "input": [
|
399 |
| - "mcmc = pm.MCMC( model )" |
| 397 | + "mcmc = pm.MCMC(model)" |
400 | 398 | ],
|
401 | 399 | "language": "python",
|
402 | 400 | "metadata": {},
|
|
407 | 405 | "cell_type": "code",
|
408 | 406 | "collapsed": false,
|
409 | 407 | "input": [
|
410 |
| - "mcmc.sample(100000, 90000,1) " |
| 408 | + "mcmc.sample(100000, 90000, 1)" |
411 | 409 | ],
|
412 | 410 | "language": "python",
|
413 | 411 | "metadata": {},
|
|
434 | 432 | "cell_type": "code",
|
435 | 433 | "collapsed": false,
|
436 | 434 | "input": [
|
437 |
| - "(np.round(T.value) == training_labels ).mean()" |
| 435 | + "(np.round(T.value) == training_labels).mean()" |
438 | 436 | ],
|
439 | 437 | "language": "python",
|
440 | 438 | "metadata": {},
|
|
454 | 452 | "collapsed": false,
|
455 | 453 | "input": [
|
456 | 454 | "t_trace = mcmc.trace(\"T\")[:]\n",
|
457 |
| - "(np.round( t_trace[-500:-400,:]).mean(axis=0) == training_labels ).mean()" |
| 455 | + "(np.round(t_trace[-500:-400, :]).mean(axis=0) == training_labels).mean()" |
458 | 456 | ],
|
459 | 457 | "language": "python",
|
460 | 458 | "metadata": {},
|
|
473 | 471 | "cell_type": "code",
|
474 | 472 | "collapsed": false,
|
475 | 473 | "input": [
|
476 |
| - "t_mean = np.round( t_trace).mean(axis=1)\n" |
| 474 | + "t_mean = np.round(t_trace).mean(axis=1)" |
477 | 475 | ],
|
478 | 476 | "language": "python",
|
479 | 477 | "metadata": {},
|
|
484 | 482 | "cell_type": "code",
|
485 | 483 | "collapsed": false,
|
486 | 484 | "input": [
|
487 |
| - "imshow(t_trace[-10000:,:], aspect=\"auto\")\n", |
| 485 | + "imshow(t_trace[-10000:, :], aspect=\"auto\")\n", |
488 | 486 | "colorbar()"
|
489 | 487 | ],
|
490 | 488 | "language": "python",
|
|
508 | 506 | "cell_type": "code",
|
509 | 507 | "collapsed": false,
|
510 | 508 | "input": [
|
511 |
| - "figsize( 23, 8)\n", |
| 509 | + "figsize(23, 8)\n", |
512 | 510 | "coef_trace = mcmc.trace(\"coefs\")[:]\n",
|
513 |
| - "imshow(coef_trace[-10000:,:], aspect=\"auto\", cmap=pyplot.cm.RdBu, interpolation=\"none\")" |
| 511 | + "imshow(coef_trace[-10000:, :], aspect=\"auto\", cmap=pyplot.cm.RdBu, interpolation=\"none\")" |
514 | 512 | ],
|
515 | 513 | "language": "python",
|
516 | 514 | "metadata": {},
|
|
544 | 542 | "cell_type": "code",
|
545 | 543 | "collapsed": false,
|
546 | 544 | "input": [
|
547 |
| - "figsize( 23, 8)\n", |
548 |
| - "imshow(include_trace[-10000:,:], aspect=\"auto\", interpolation=\"none\")" |
| 545 | + "figsize(23, 8)\n", |
| 546 | + "imshow(include_trace[-10000:, :], aspect=\"auto\", interpolation=\"none\")" |
549 | 547 | ],
|
550 | 548 | "language": "python",
|
551 | 549 | "metadata": {},
|
|
0 commit comments