|
300 | 300 | "import numpy as np\n",
|
301 | 301 | "n_data_points = 5 # in CH1 we had ~70 data points\n",
|
302 | 302 | "\n",
|
| 303 | + "\n", |
303 | 304 | "@pm.deterministic\n",
|
304 | 305 | "def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):\n",
|
305 | 306 | " out = np.zeros(n_data_points)\n",
|
|
532 | 533 | "cell_type": "code",
|
533 | 534 | "collapsed": false,
|
534 | 535 | "input": [
|
535 |
| - "alpha = 1./20.\n", |
| 536 | + "alpha = 1. / 20.\n", |
536 | 537 | "lambda_1, lambda_2 = pm.rexponential(alpha, 2)\n",
|
537 | 538 | "print lambda_1, lambda_2"
|
538 | 539 | ],
|
|
579 | 580 | "collapsed": false,
|
580 | 581 | "input": [
|
581 | 582 | "plt.bar(np.arange(80), data, color=\"#348ABD\")\n",
|
582 |
| - "plt.bar(tau-1, data[tau - 1], color=\"r\", label=\"user behaviour changed\")\n", |
| 583 | + "plt.bar(tau - 1, data[tau - 1], color=\"r\", label=\"user behaviour changed\")\n", |
583 | 584 | "plt.xlabel(\"Time (days)\")\n",
|
584 | 585 | "plt.ylabel(\"count of text-msgs received\")\n",
|
585 | 586 | "plt.title(\"Artificial dataset\")\n",
|
|
616 | 617 | "input": [
|
617 | 618 | "def plot_artificial_sms_dataset():\n",
|
618 | 619 | " tau = pm.rdiscrete_uniform(0, 80)\n",
|
619 |
| - " alpha = 1./20.\n", |
| 620 | + " alpha = 1. / 20.\n", |
620 | 621 | " lambda_1, lambda_2 = pm.rexponential(alpha, 2)\n",
|
621 | 622 | " data = np.r_[pm.rpoisson(lambda_1, tau), pm.rpoisson(lambda_2, 80 - tau)]\n",
|
622 | 623 | " plt.bar(np.arange(80), data, color=\"#348ABD\")\n",
|
623 |
| - " plt.bar(tau - 1, data[tau-1], color=\"r\", label=\"user behaviour changed\")\n", |
| 624 | + " plt.bar(tau - 1, data[tau - 1], color=\"r\", label=\"user behaviour changed\")\n", |
624 | 625 | " plt.xlim(0, 80)\n",
|
625 | 626 | "\n",
|
626 | 627 | "figsize(12.5, 5)\n",
|
627 | 628 | "plt.title(\"More example of artificial datasets\")\n",
|
628 | 629 | "for i in range(4):\n",
|
629 | 630 | " plt.subplot(4, 1, i)\n",
|
630 |
| - " plot_artificial_sms_dataset()\n" |
| 631 | + " plot_artificial_sms_dataset()" |
631 | 632 | ],
|
632 | 633 | "language": "python",
|
633 | 634 | "metadata": {},
|
|
709 | 710 | "cell_type": "code",
|
710 | 711 | "collapsed": false,
|
711 | 712 | "input": [
|
712 |
| - "#set constants\n", |
| 713 | + "# set constants\n", |
713 | 714 | "p_true = 0.05 # remember, this is unknown.\n",
|
714 | 715 | "N = 1500\n",
|
715 | 716 | "\n",
|
|
775 | 776 | "cell_type": "code",
|
776 | 777 | "collapsed": false,
|
777 | 778 | "input": [
|
778 |
| - "#include the observations, which are Bernoulli\n", |
| 779 | + "# include the observations, which are Bernoulli\n", |
779 | 780 | "obs = pm.Bernoulli(\"obs\", p, value=occurrences, observed=True)\n",
|
780 | 781 | "\n",
|
781 |
| - "#To be explained in chapter 3\n", |
| 782 | + "# To be explained in chapter 3\n", |
782 | 783 | "mcmc = pm.MCMC([p, obs])\n",
|
783 | 784 | "mcmc.sample(18000, 1000)"
|
784 | 785 | ],
|
|
860 | 861 | "import pymc as pm\n",
|
861 | 862 | "figsize(12, 4)\n",
|
862 | 863 | "\n",
|
863 |
| - "#these two quantities are unknown to us.\n", |
| 864 | + "# these two quantities are unknown to us.\n", |
864 | 865 | "true_p_A = 0.05\n",
|
865 | 866 | "true_p_B = 0.04\n",
|
866 | 867 | "\n",
|
867 |
| - "#notice the unequal sample sizes -- no problem in Bayesian analysis.\n", |
| 868 | + "# notice the unequal sample sizes -- no problem in Bayesian analysis.\n", |
868 | 869 | "N_A = 1500\n",
|
869 | 870 | "N_B = 750\n",
|
870 | 871 | "\n",
|
871 |
| - "#generate some observations\n", |
| 872 | + "# generate some observations\n", |
872 | 873 | "observations_A = pm.rbernoulli(true_p_A, N_A)\n",
|
873 | 874 | "observations_B = pm.rbernoulli(true_p_B, N_B)\n",
|
874 | 875 | "print \"Obs from Site A: \", observations_A[:30].astype(int), \"...\"\n",
|
|
978 | 979 | "input": [
|
979 | 980 | "figsize(12.5, 10)\n",
|
980 | 981 | "\n",
|
981 |
| - "#histogram of posteriors\n", |
| 982 | + "# histogram of posteriors\n", |
982 | 983 | "\n",
|
983 | 984 | "ax = plt.subplot(311)\n",
|
984 | 985 | "\n",
|
|
1257 | 1258 | " fc=first_coin_flips,\n",
|
1258 | 1259 | " sc=second_coin_flips):\n",
|
1259 | 1260 | "\n",
|
1260 |
| - " observed = fc*t_a + (1-fc)*sc\n", |
| 1261 | + " observed = fc * t_a + (1 - fc) * sc\n", |
1261 | 1262 | " return observed.sum() / float(N)"
|
1262 | 1263 | ],
|
1263 | 1264 | "language": "python",
|
|
1360 | 1361 | "input": [
|
1361 | 1362 | "figsize(12.5, 3)\n",
|
1362 | 1363 | "p_trace = mcmc.trace(\"freq_cheating\")[:]\n",
|
1363 |
| - "plt.hist(p_trace, histtype=\"stepfilled\", normed=True, alpha=0.85, bins=30, \n", |
| 1364 | + "plt.hist(p_trace, histtype=\"stepfilled\", normed=True, alpha=0.85, bins=30,\n", |
1364 | 1365 | " label=\"posterior distribution\", color=\"#348ABD\")\n",
|
1365 | 1366 | "plt.vlines([.05, .35], [0, 0], [5, 5], alpha=0.3)\n",
|
1366 | 1367 | "plt.xlim(0, 1)\n",
|
|
1415 | 1416 | "input": [
|
1416 | 1417 | "p = pm.Uniform(\"freq_cheating\", 0, 1)\n",
|
1417 | 1418 | "\n",
|
| 1419 | + "\n", |
1418 | 1420 | "@pm.deterministic\n",
|
1419 | 1421 | "def p_skewed(p=p):\n",
|
1420 |
| - " return 0.5*p + 0.25" |
| 1422 | + " return 0.5 * p + 0.25" |
1421 | 1423 | ],
|
1422 | 1424 | "language": "python",
|
1423 | 1425 | "metadata": {},
|
|
1491 | 1493 | "input": [
|
1492 | 1494 | "figsize(12.5, 3)\n",
|
1493 | 1495 | "p_trace = mcmc.trace(\"freq_cheating\")[:]\n",
|
1494 |
| - "plt.hist(p_trace, histtype=\"stepfilled\", normed=True, alpha=0.85, bins=30, \n", |
| 1496 | + "plt.hist(p_trace, histtype=\"stepfilled\", normed=True, alpha=0.85, bins=30,\n", |
1495 | 1497 | " label=\"posterior distribution\", color=\"#348ABD\")\n",
|
1496 | 1498 | "plt.vlines([.05, .35], [0, 0], [5, 5], alpha=0.2)\n",
|
1497 | 1499 | "plt.xlim(0, 1)\n",
|
|
1539 | 1541 | "N = 10\n",
|
1540 | 1542 | "x = np.empty(N, dtype=object)\n",
|
1541 | 1543 | "for i in range(0, N):\n",
|
1542 |
| - " x[i] = pm.Exponential('x_%i' % i, (i+1)**2)" |
| 1544 | + " x[i] = pm.Exponential('x_%i' % i, (i + 1) ** 2)" |
1543 | 1545 | ],
|
1544 | 1546 | "language": "python",
|
1545 | 1547 | "metadata": {},
|
|
1575 | 1577 | "challenger_data = np.genfromtxt(\"data/challenger_data.csv\", skip_header=1,\n",
|
1576 | 1578 | " usecols=[1, 2], missing_values=\"NA\",\n",
|
1577 | 1579 | " delimiter=\",\")\n",
|
1578 |
| - "#drop the NA values\n", |
| 1580 | + "# drop the NA values\n", |
1579 | 1581 | "challenger_data = challenger_data[~np.isnan(challenger_data[:, 1])]\n",
|
1580 | 1582 | "\n",
|
1581 |
| - "#plot it, as a function of tempature (the first column)\n", |
| 1583 | + "# plot it, as a function of tempature (the first column)\n", |
1582 | 1584 | "print \"Temp (F), O-Ring failure?\"\n",
|
1583 | 1585 | "print challenger_data\n",
|
1584 | 1586 | "\n",
|
|
1587 | 1589 | "plt.yticks([0, 1])\n",
|
1588 | 1590 | "plt.ylabel(\"Damage Incident?\")\n",
|
1589 | 1591 | "plt.xlabel(\"Outside temperature (Fahrenheit)\")\n",
|
1590 |
| - "plt.title(\"Defects of the Space Shuttle O-Rings vs temperature\")\n" |
| 1592 | + "plt.title(\"Defects of the Space Shuttle O-Rings vs temperature\")" |
1591 | 1593 | ],
|
1592 | 1594 | "language": "python",
|
1593 | 1595 | "metadata": {},
|
|
1769 | 1771 | "parameters = zip(mu, tau, colors)\n",
|
1770 | 1772 | "\n",
|
1771 | 1773 | "for _mu, _tau, _color in parameters:\n",
|
1772 |
| - " plt.plot(x, nor.pdf(x, _mu, scale=1./_tau),\n", |
| 1774 | + " plt.plot(x, nor.pdf(x, _mu, scale=1. / _tau),\n", |
1773 | 1775 | " label=\"$\\mu = %d,\\;\\\\tau = %.1f$\" % (_mu, _tau), color=_color)\n",
|
1774 |
| - " plt.fill_between(x, nor.pdf(x, _mu, scale=1./_tau), color=_color,\n", |
| 1776 | + " plt.fill_between(x, nor.pdf(x, _mu, scale=1. / _tau), color=_color,\n", |
1775 | 1777 | " alpha=.33)\n",
|
1776 | 1778 | "\n",
|
1777 | 1779 | "plt.legend(loc=\"upper right\")\n",
|
|
1820 | 1822 | "temperature = challenger_data[:, 0]\n",
|
1821 | 1823 | "D = challenger_data[:, 1] # defect or not?\n",
|
1822 | 1824 | "\n",
|
1823 |
| - "#notice the`value` here. We explain why below.\n", |
| 1825 | + "# notice the`value` here. We explain why below.\n", |
1824 | 1826 | "beta = pm.Normal(\"beta\", 0, 0.001, value=0)\n",
|
1825 | 1827 | "alpha = pm.Normal(\"alpha\", 0, 0.001, value=0)\n",
|
1826 | 1828 | "\n",
|
1827 | 1829 | "\n",
|
1828 | 1830 | "@pm.deterministic\n",
|
1829 | 1831 | "def p(t=temperature, alpha=alpha, beta=beta):\n",
|
1830 |
| - " return 1.0 / (1. + np.exp(beta*t + alpha))\n" |
| 1832 | + " return 1.0 / (1. + np.exp(beta * t + alpha))" |
1831 | 1833 | ],
|
1832 | 1834 | "language": "python",
|
1833 | 1835 | "metadata": {},
|
|
1920 | 1922 | "\n",
|
1921 | 1923 | "figsize(12.5, 6)\n",
|
1922 | 1924 | "\n",
|
1923 |
| - "#histogram of the samples:\n", |
| 1925 | + "# histogram of the samples:\n", |
1924 | 1926 | "plt.subplot(211)\n",
|
1925 | 1927 | "plt.title(r\"Posterior distributions of the variables $\\alpha, \\beta$\")\n",
|
1926 | 1928 | "plt.hist(beta_samples, histtype='stepfilled', bins=35, alpha=0.85,\n",
|
|
1963 | 1965 | "cell_type": "code",
|
1964 | 1966 | "collapsed": false,
|
1965 | 1967 | "input": [
|
1966 |
| - "t = np.linspace(temperature.min() - 5, temperature.max()+5, 50)[:, None]\n", |
| 1968 | + "t = np.linspace(temperature.min() - 5, temperature.max() + 5, 50)[:, None]\n", |
1967 | 1969 | "p_t = logistic(t.T, beta_samples, alpha_samples)\n",
|
1968 | 1970 | "\n",
|
1969 | 1971 | "mean_prob_t = p_t.mean(axis=0)"
|
|
2164 | 2166 | "plt.title(\"Simulated dataset using posterior parameters\")\n",
|
2165 | 2167 | "figsize(12.5, 6)\n",
|
2166 | 2168 | "for i in range(4):\n",
|
2167 |
| - " ax = plt.subplot(4, 1, i+1)\n", |
2168 |
| - " plt.scatter(temperature, simulations[1000*i, :], color=\"k\",\n", |
| 2169 | + " ax = plt.subplot(4, 1, i + 1)\n", |
| 2170 | + " plt.scatter(temperature, simulations[1000 * i, :], color=\"k\",\n", |
2169 | 2171 | " s=50, alpha=0.6)"
|
2170 | 2172 | ],
|
2171 | 2173 | "language": "python",
|
|
2369 | 2371 | "plt.title(\"Random model\")\n",
|
2370 | 2372 | "\n",
|
2371 | 2373 | "# constant model\n",
|
2372 |
| - "constant_prob = 7./23*np.ones(23)\n", |
| 2374 | + "constant_prob = 7. / 23 * np.ones(23)\n", |
2373 | 2375 | "separation_plot(constant_prob, D)\n",
|
2374 | 2376 | "plt.title(\"Constant-prediction model\")"
|
2375 | 2377 | ],
|
|
2448 | 2450 | "cell_type": "code",
|
2449 | 2451 | "collapsed": false,
|
2450 | 2452 | "input": [
|
2451 |
| - "#type your code here.\n", |
| 2453 | + "# type your code here.\n", |
2452 | 2454 | "figsize(12.5, 4)\n",
|
2453 | 2455 | "\n",
|
2454 | 2456 | "plt.scatter(alpha_samples, beta_samples, alpha=0.1)\n",
|
|
0 commit comments