|
146 | 146 | "cell_type": "code", |
147 | 147 | "collapsed": false, |
148 | 148 | "input": [ |
149 | | - "### create the observed data\n", |
| 149 | + "# create the observed data\n", |
150 | 150 | "\n", |
151 | | - "#sample size of data we observe, trying varying this (keep it less than 100 ;)\n", |
| 151 | + "# sample size of data we observe, trying varying this (keep it less than 100 ;)\n", |
152 | 152 | "N = 1\n", |
153 | 153 | "\n", |
154 | | - "#the true parameters, but of course we do not see these values...\n", |
| 154 | + "# the true parameters, but of course we do not see these values...\n", |
155 | 155 | "lambda_1_true = 1\n", |
156 | 156 | "lambda_2_true = 3\n", |
157 | 157 | "\n", |
158 | 158 | "#...we see the data generated, dependent on the above two values.\n", |
159 | 159 | "data = np.concatenate([\n", |
160 | 160 | " stats.poisson.rvs(lambda_1_true, size=(N, 1)),\n", |
161 | 161 | " stats.poisson.rvs(lambda_2_true, size=(N, 1))\n", |
162 | | - " ], axis=1)\n", |
| 162 | + "], axis=1)\n", |
163 | 163 | "print \"observed (2-dimensional,sample size = %d):\" % N, data\n", |
164 | 164 | "\n", |
165 | | - "#plotting details.\n", |
| 165 | + "# plotting details.\n", |
166 | 166 | "x = y = np.linspace(.01, 5, 100)\n", |
167 | 167 | "likelihood_x = np.array([stats.poisson.pmf(data[:, 0], _x)\n", |
168 | 168 | " for _x in x]).prod(axis=1)\n", |
169 | 169 | "likelihood_y = np.array([stats.poisson.pmf(data[:, 1], _y)\n", |
170 | 170 | " for _y in y]).prod(axis=1)\n", |
171 | | - "L = np.dot(likelihood_x[:, None], likelihood_y[None, :])" |
| 171 | + "L = np.dot(likelihood_x[:, None], likelihood_y[None, :]);" |
172 | 172 | ], |
173 | 173 | "language": "python", |
174 | 174 | "metadata": {}, |
|
188 | 188 | "collapsed": false, |
189 | 189 | "input": [ |
190 | 190 | "figsize(12.5, 12)\n", |
191 | | - "#matplotlib heavy lifting below, beware!\n", |
| 191 | + "# matplotlib heavy lifting below, beware!\n", |
192 | 192 | "plt.subplot(221)\n", |
193 | 193 | "uni_x = stats.uniform.pdf(x, loc=0, scale=5)\n", |
194 | 194 | "uni_y = stats.uniform.pdf(x, loc=0, scale=5)\n", |
|
319 | 319 | "collapsed": false, |
320 | 320 | "input": [ |
321 | 321 | "figsize(12.5, 4)\n", |
322 | | - "data = np.loadtxt(\"data/mixture_data.csv\", delimiter=\",\")\n", |
| 322 | + "data = np.loadtxt(\"data/mixture_data.csv\", delimiter=\",\")\n", |
323 | 323 | "\n", |
324 | | - "plt.hist(data, bins=20, color=\"k\", histtype=\"stepfilled\", alpha=0.8)\n", |
| 324 | + "plt.hist(data, bins=20, color=\"k\", histtype=\"stepfilled\", alpha=0.8)\n", |
325 | 325 | "plt.title(\"Histogram of the dataset\")\n", |
326 | 326 | "plt.ylim([0, None])\n", |
327 | | - "print data[:10], \"...\"" |
| 327 | + "print data[:10], \"...\";" |
328 | 328 | ], |
329 | 329 | "language": "python", |
330 | 330 | "metadata": {}, |
|
376 | 376 | "\n", |
377 | 377 | "p = pm.Uniform(\"p\", 0, 1)\n", |
378 | 378 | "\n", |
379 | | - "assignment = pm.Categorical(\"assignment\", [p, 1-p], size=data.shape[0])\n", |
| 379 | + "assignment = pm.Categorical(\"assignment\", [p, 1 - p], size=data.shape[0])\n", |
380 | 380 | "print \"prior assignment, with p = %.2f:\" % p.value\n", |
381 | | - "print assignment.value[:10], \"...\"" |
| 381 | + "print assignment.value[:10], \"...\";" |
382 | 382 | ], |
383 | 383 | "language": "python", |
384 | 384 | "metadata": {}, |
|
415 | 415 | "cell_type": "code", |
416 | 416 | "collapsed": false, |
417 | 417 | "input": [ |
418 | | - "taus = 1.0/pm.Uniform(\"stds\", 0, 100, size=2) ** 2\n", |
| 418 | + "taus = 1.0 / pm.Uniform(\"stds\", 0, 100, size=2) ** 2\n", |
419 | 419 | "centers = pm.Normal(\"centers\", [120, 190], [0.01, 0.01], size=2)\n", |
420 | 420 | "\n", |
421 | 421 | "\"\"\"\n", |
422 | 422 | "The below deterministic functions map an assignment, in this case 0 or 1,\n", |
423 | 423 | "to a set of parameters, located in the (1,2) arrays `taus` and `centers`.\n", |
424 | 424 | "\"\"\"\n", |
425 | 425 | "\n", |
| 426 | + "\n", |
426 | 427 | "@pm.deterministic\n", |
427 | 428 | "def center_i(assignment=assignment, centers=centers):\n", |
428 | 429 | " return centers[assignment]\n", |
429 | 430 | "\n", |
| 431 | + "\n", |
430 | 432 | "@pm.deterministic\n", |
431 | 433 | "def tau_i(assignment=assignment, taus=taus):\n", |
432 | 434 | " return taus[assignment]\n", |
433 | 435 | "\n", |
434 | 436 | "print \"Random assignments: \", assignment.value[:4], \"...\"\n", |
435 | 437 | "print \"Assigned center: \", center_i.value[:4], \"...\"\n", |
436 | | - "print \"Assigned precision: \", tau_i.value[:4], \"...\"" |
| 438 | + "print \"Assigned precision: \", tau_i.value[:4], \"...\";" |
437 | 439 | ], |
438 | 440 | "language": "python", |
439 | 441 | "metadata": {}, |
|
454 | 456 | "cell_type": "code", |
455 | 457 | "collapsed": false, |
456 | 458 | "input": [ |
457 | | - "#and to combine it with the observations:\n", |
| 459 | + "# and to combine it with the observations:\n", |
458 | 460 | "observations = pm.Normal(\"obs\", center_i, tau_i, value=data, observed=True)\n", |
459 | 461 | "\n", |
460 | | - "#below we create a model class\n", |
461 | | - "model = pm.Model([p, assignment, taus, centers])" |
| 462 | + "# below we create a model class\n", |
| 463 | + "model = pm.Model([p, assignment, taus, centers]);" |
462 | 464 | ], |
463 | 465 | "language": "python", |
464 | 466 | "metadata": {}, |
|
481 | 483 | "collapsed": false, |
482 | 484 | "input": [ |
483 | 485 | "mcmc = pm.MCMC(model)\n", |
484 | | - "mcmc.sample(50000)" |
| 486 | + "mcmc.sample(50000);" |
485 | 487 | ], |
486 | 488 | "language": "python", |
487 | 489 | "metadata": {}, |
|
520 | 522 | "lw = 1\n", |
521 | 523 | "center_trace = mcmc.trace(\"centers\")[:]\n", |
522 | 524 | "\n", |
523 | | - "#for pretty colors later in the book.\n", |
| 525 | + "# for pretty colors later in the book.\n", |
524 | 526 | "colors = [\"#348ABD\", \"#A60628\"] if center_trace[-1, 0] > center_trace[-1, 1] \\\n", |
525 | | - " else [\"#A60628\", \"#348ABD\"]\n", |
| 527 | + " else [\"#A60628\", \"#348ABD\"]\n", |
526 | 528 | "\n", |
527 | 529 | "plt.plot(center_trace[:, 0], label=\"trace of center 0\", c=colors[0], lw=lw)\n", |
528 | 530 | "plt.plot(center_trace[:, 1], label=\"trace of center 1\", c=colors[1], lw=lw)\n", |
|
533 | 535 | "plt.subplot(312)\n", |
534 | 536 | "std_trace = mcmc.trace(\"stds\")[:]\n", |
535 | 537 | "plt.plot(std_trace[:, 0], label=\"trace of standard deviation of cluster 0\",\n", |
536 | | - " c=colors[0], lw=lw)\n", |
| 538 | + " c=colors[0], lw=lw)\n", |
537 | 539 | "plt.plot(std_trace[:, 1], label=\"trace of standard deviation of cluster 1\",\n", |
538 | | - " c=colors[1], lw=lw)\n", |
| 540 | + " c=colors[1], lw=lw)\n", |
539 | 541 | "plt.legend(loc=\"upper left\")\n", |
540 | 542 | "\n", |
541 | 543 | "plt.subplot(313)\n", |
542 | 544 | "p_trace = mcmc.trace(\"p\")[:]\n", |
543 | 545 | "plt.plot(p_trace, label=\"$p$: frequency of assignment to cluster 0\",\n", |
544 | | - " color=\"#467821\", lw=lw)\n", |
| 546 | + " color=\"#467821\", lw=lw)\n", |
545 | 547 | "plt.xlabel(\"Steps\")\n", |
546 | 548 | "plt.ylim(0, 1)\n", |
547 | 549 | "plt.legend();" |
|
580 | 582 | "cell_type": "code", |
581 | 583 | "collapsed": false, |
582 | 584 | "input": [ |
583 | | - "mcmc.sample(100000)" |
| 585 | + "mcmc.sample(100000);" |
584 | 586 | ], |
585 | 587 | "language": "python", |
586 | 588 | "metadata": {}, |
|
613 | 615 | "\n", |
614 | 616 | "x = np.arange(50000)\n", |
615 | 617 | "plt.plot(x, prev_center_trace[:, 0], label=\"previous trace of center 0\",\n", |
616 | | - " lw=lw, alpha=0.4, c=colors[1])\n", |
| 618 | + " lw=lw, alpha=0.4, c=colors[1])\n", |
617 | 619 | "plt.plot(x, prev_center_trace[:, 1], label=\"previous trace of center 1\",\n", |
618 | | - " lw=lw, alpha=0.4, c=colors[0])\n", |
| 620 | + " lw=lw, alpha=0.4, c=colors[0])\n", |
619 | 621 | "\n", |
620 | 622 | "x = np.arange(50000, 150000)\n", |
621 | | - "plt.plot(x, center_trace[:, 0], label=\"new trace of center 0\", lw=lw, c=\"#348ABD\")\n", |
622 | | - "plt.plot(x, center_trace[:, 1], label=\"new trace of center 1\", lw=lw, c=\"#A60628\")\n", |
| 623 | + "plt.plot(x, center_trace[:, 0], label=\"new trace of center 0\", lw=lw, c=\"#348ABD\")\n", |
| 624 | + "plt.plot(x, center_trace[:, 1], label=\"new trace of center 1\", lw=lw, c=\"#A60628\")\n", |
623 | 625 | "\n", |
624 | 626 | "plt.title(\"Traces of unknown center parameters\")\n", |
625 | 627 | "leg = plt.legend(loc=\"upper right\")\n", |
|
669 | 671 | " plt.title(\"Posterior of standard deviation of cluster %d\" % i)\n", |
670 | 672 | " plt.hist(std_trace[:, i], color=colors[i], bins=30,\n", |
671 | 673 | " histtype=\"stepfilled\")\n", |
672 | | - " #plt.autoscale(tight=True)\n", |
| 674 | + " # plt.autoscale(tight=True)\n", |
673 | 675 | "\n", |
674 | | - "plt.tight_layout()" |
| 676 | + "plt.tight_layout();" |
675 | 677 | ], |
676 | 678 | "language": "python", |
677 | 679 | "metadata": {}, |
|
704 | 706 | "figsize(12.5, 4.5)\n", |
705 | 707 | "plt.cmap = mpl.colors.ListedColormap(colors)\n", |
706 | 708 | "plt.imshow(mcmc.trace(\"assignment\")[::400, np.argsort(data)],\n", |
707 | | - " cmap=plt.cmap, aspect=.4, alpha=.9)\n", |
| 709 | + " cmap=plt.cmap, aspect=.4, alpha=.9)\n", |
708 | 710 | "plt.xticks(np.arange(0, data.shape[0], 40),\n", |
709 | | - " [\"%.2f\" % s for s in np.sort(data)[::40]])\n", |
| 711 | + " [\"%.2f\" % s for s in np.sort(data)[::40]])\n", |
710 | 712 | "plt.ylabel(\"posterior sample\")\n", |
711 | 713 | "plt.xlabel(\"value of $i$th data point\")\n", |
712 | 714 | "plt.title(\"Posterior labels of data points\");" |
|
738 | 740 | "input": [ |
739 | 741 | "cmap = mpl.colors.LinearSegmentedColormap.from_list(\"BMH\", colors)\n", |
740 | 742 | "assign_trace = mcmc.trace(\"assignment\")[:]\n", |
741 | | - "plt.scatter(data, 1-assign_trace.mean(axis=0), cmap=cmap,\n", |
742 | | - " c=assign_trace.mean(axis=0), s=50)\n", |
| 743 | + "plt.scatter(data, 1 - assign_trace.mean(axis=0), cmap=cmap,\n", |
| 744 | + " c=assign_trace.mean(axis=0), s=50)\n", |
743 | 745 | "plt.ylim(-0.05, 1.05)\n", |
744 | 746 | "plt.xlim(35, 300)\n", |
745 | 747 | "plt.title(\"Probability of data point belonging to cluster 0\")\n", |
746 | 748 | "plt.ylabel(\"probability\")\n", |
747 | | - "plt.xlabel(\"value of data point\")" |
| 749 | + "plt.xlabel(\"value of data point\");" |
748 | 750 | ], |
749 | 751 | "language": "python", |
750 | 752 | "metadata": {}, |
|
788 | 790 | "posterior_p_mean = mcmc.trace(\"p\")[:].mean()\n", |
789 | 791 | "\n", |
790 | 792 | "plt.hist(data, bins=20, histtype=\"step\", normed=True, color=\"k\",\n", |
791 | | - " lw=2, label=\"histogram of data\")\n", |
| 793 | + " lw=2, label=\"histogram of data\")\n", |
792 | 794 | "y = posterior_p_mean * norm.pdf(x, loc=posterior_center_means[0],\n", |
793 | 795 | " scale=posterior_std_means[0])\n", |
794 | 796 | "plt.plot(x, y, label=\"Cluster 0 (using posterior-mean parameters)\", lw=3)\n", |
|
800 | 802 | "plt.fill_between(x, y, color=colors[0], alpha=0.3)\n", |
801 | 803 | "\n", |
802 | 804 | "plt.legend(loc=\"upper left\")\n", |
803 | | - "plt.title(\"Visualizing Clusters using posterior-mean parameters\")" |
| 805 | + "plt.title(\"Visualizing Clusters using posterior-mean parameters\");" |
804 | 806 | ], |
805 | 807 | "language": "python", |
806 | 808 | "metadata": {}, |
|
851 | 853 | "\n", |
852 | 854 | "plt.plot(ex_mcmc.trace(\"x\")[:])\n", |
853 | 855 | "plt.plot(ex_mcmc.trace(\"y\")[:])\n", |
854 | | - "plt.title(\"Displaying (extreme) case of dependence between unknowns\")" |
| 856 | + "plt.title(\"Displaying (extreme) case of dependence between unknowns\");" |
855 | 857 | ], |
856 | 858 | "language": "python", |
857 | 859 | "metadata": {}, |
|
937 | 939 | "p_trace = mcmc.trace(\"p\")[:]\n", |
938 | 940 | "x = 175\n", |
939 | 941 | "\n", |
940 | | - "v = p_trace*norm_pdf(x, loc=center_trace[:, 0], scale=std_trace[:, 0]) > \\\n", |
941 | | - " (1-p_trace)*norm_pdf(x, loc=center_trace[:, 1], scale=std_trace[:, 1])\n", |
| 942 | + "v = p_trace * norm_pdf(x, loc=center_trace[:, 0], scale=std_trace[:, 0]) > \\\n", |
| 943 | + " (1 - p_trace) * norm_pdf(x, loc=center_trace[:, 1], scale=std_trace[:, 1])\n", |
942 | 944 | "\n", |
943 | | - "print \"Probability of belonging to cluster 1:\", v.mean()" |
| 945 | + "print \"Probability of belonging to cluster 1:\", v.mean();" |
944 | 946 | ], |
945 | 947 | "language": "python", |
946 | 948 | "metadata": {}, |
|
1064 | 1066 | "collapsed": false, |
1065 | 1067 | "input": [ |
1066 | 1068 | "def autocorr(x):\n", |
1067 | | - " #from http://tinyurl.com/afz57c4\n", |
| 1069 | + " # from http://tinyurl.com/afz57c4\n", |
1068 | 1070 | " result = np.correlate(x, x, mode='full')\n", |
1069 | 1071 | " result = result / np.max(result)\n", |
1070 | 1072 | " return result[result.size / 2:]\n", |
|
1187 | 1189 | "from pymc.Matplot import plot as mcplot\n", |
1188 | 1190 | "\n", |
1189 | 1191 | "mcmc.sample(25000, 0, 10)\n", |
1190 | | - "mcplot(mcmc.trace(\"centers\", 2), common_scale=False)" |
| 1192 | + "mcplot(mcmc.trace(\"centers\", 2), common_scale=False);" |
1191 | 1193 | ], |
1192 | 1194 | "language": "python", |
1193 | 1195 | "metadata": {}, |
|
1310 | 1312 | "def css_styling():\n", |
1311 | 1313 | " styles = open(\"../styles/custom.css\", \"r\").read()\n", |
1312 | 1314 | " return HTML(styles)\n", |
1313 | | - "css_styling()" |
| 1315 | + "css_styling();" |
1314 | 1316 | ], |
1315 | 1317 | "language": "python", |
1316 | 1318 | "metadata": {}, |
|
0 commit comments