Skip to content

Commit a14202f

Browse files
committed
RL - added SARSA, MC & Q examples
1 parent 4583f4e commit a14202f

File tree

5 files changed

+1634
-1
lines changed

5 files changed

+1634
-1
lines changed

ReinforcementLearning/Dynamic Programming - FrozenLake.ipynb

Lines changed: 141 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@
254254
" delta = 0\n",
255255
" # Iterate though each state\n",
256256
" for state in range(environment.nS):\n",
257-
" # Initial a new value of current state\n",
257+
" # We caculate the new value for v. This is the Bellman expectation equation.\n",
258258
" v = 0\n",
259259
" # Try all possible actions which can be taken from this state\n",
260260
" for action, action_probability in enumerate(policy[state]):\n",
@@ -594,6 +594,146 @@
594594
"source": [
595595
"We observe that value iteration has a better average reward and higher number of wins when it is run for 10,000 episodes."
596596
]
597+
},
598+
{
599+
"cell_type": "markdown",
600+
"metadata": {},
601+
"source": [
602+
"# Alternate value iteration example\n",
603+
"https://github.com/OneRaynyDay/FrozenLakeMDP/blob/master/frozenlake.py"
604+
]
605+
},
606+
{
607+
"cell_type": "code",
608+
"execution_count": 60,
609+
"metadata": {},
610+
"outputs": [
611+
{
612+
"data": {
613+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAP4AAAECCAYAAADesWqHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAEwhJREFUeJzt3WtsXGedx/Hvf26ei2+xZ2xPbnaS2omd0jY0SelyCSACXSjLvgCprNSVELtZCSEViZXgXYX2zcKu0K4ALUSi20VFZatCd1lEtyrQCyktbRKSNmnSEHVzcZzWt/gSx/d59oWNGwdTj12fOR4/v49kyc+Zo5nf8cxvznPOjHXMOYeI+CUSdgARKT0VX8RDKr6Ih1R8EQ+p+CIeUvFFPFTWxTezu8zsNTM7a2ZfDTvPSjKzB8ys28xOhJ0lCGa2ycyeMrNTZnbSzO4LO9NKMbOkmb1oZsdnt+1rYWe6kZXr5/hmFgXOAPuBTuAl4LPOuVdDDbZCzOwDwFXgB865m8POs9LMLA/knXNHzawKOAL85Vp4/szMgIxz7qqZxYFDwH3OuRdCjjannPf4e4GzzrnXnXMTwI+AT4WcacU4554F+sPOERTn3GXn3NHZ34eBU8CGcFOtDDfj6uwwPvuzqvaw5Vz8DcDF68adrJEXjm/MrAXYBfw23CQrx8yiZnYM6AaedM6tqm0r5+LbAstW1buqLM7MKoEfA19yzg2FnWelOOemnXO3ARuBvWa2qg7Xyrn4ncCm68Ybga6QssgyzB7//hj4oXPuJ2HnCYJzbgB4Grgr5CjzlHPxXwJazWyLmSWAe4CfhpxJijR7Auz7wCnn3DfDzrOSzCxnZrWzv6eAjwCnw001X9kW3zk3BXwReIKZE0OPOOdOhptq5ZjZw8DzwHYz6zSzz4edaYW9F7gX+LCZHZv9+XjYoVZIHnjKzF5mZgf1pHPuZyFnmqdsP84TkeUr2z2+iCyfii/iIRVfxEMqvoiHVHwRD5V98c3sQNgZgqTtK2+rdfvKvvjAqvzDriBtX3lbldu3FoovIksUyBd4EtGUS8VrVvx+FzIxPUoimirJY/1BoSJWsseanBghnsiU7PEApisW+v+nYEyNjhBLlXb7ouOl+9JaqZ+/sdErTE6MLPoEBvIKTsVruHPzXwdx16vC6Lb6sCMEanBLPOwIgar5v8mwIwTm6G++VdR6muqLeEjFF/GQii/iIRVfxEMqvoiHVHwRD6n4Ih5S8UU8pOKLeEjFF/GQii/iIRVfxEMqvoiHVHwRD6n4Ih5S8UU8pOKLeEjFF/GQii/iIRVfxEMqvoiHVHwRDxVVfDO7y8xeM7OzZvbVoEOJSLAWLb6ZRYHvAH8OdACfNbOOoIMt18hEPwNjXQRxoZDVYOjKea5d7V6z21euxkYHuNJ3NuwYRSvmghp7gbPOudcBzOxHwKeAV4MMtlyJaJoXLj5ExKLkMlvJZbZRn24hFkmEHW1FWCTG4UPfJJmupz7XTn1DO9XrWohEomFH84pzBYYHL9Hfc4q+7lOMDF+m/da/CjtW0Yop/gbg4nXjTuCOYOIs3bkrh+kcenneMucKTBTGuTR0gktDJ4hYlLrUZnKZbeQy20jFq0NKu3RnTjzK0JUL8xdahLFrfVw6f4hL5w8RjSWpy7ZR19BOXXY78UQ6nLBr3PTUBFf6fk9f9yn6e04zOXF13u3nzj7J+bO/mBsnktXcsudvSh2zKMUc4y90Ha4/mmea2QEzO2xmhyemR995MlkiwxZ8qlavs499m5MP3h92jCWz8vozL6iYPX4nsOm68Uag68aVnHMHgYMANcmmkh2AtqzbTcu63XPjyekxnj13kEQkPbeHr083l+1Uv+3mT88bDw9e4nfPf4tUOktdww7qc5rql0o0liDbuJNs406cK3B16BJ93W9N9Vtu2k8uf0vYMYtSTPFfAlrNbAtwCbgHWLUHMxPT17h9w2eoqWjC1sJb8w2cm2b3+75MujIXdhSvmUWoqtlEVc0mWlo/ytjoAKPXesOOVbRFi++cmzKzLwJPAFHgAefcycCTLVMmURd2hEBV124OO4IsIJmqJZmqDTtG0Yq6TLZz7ufAzwPOIp642vU6VzvPMD01jisU6Hn518RSlaxr3RV2NG/om3tScuncRrqPPcVYbxfT49foeu6/SOc2hh3LKyq+lFwknqBqY9vcuKI2R0WtzlmUkoovoahueevLn9XNO0NM4icVX0JRvfm64res2m+Ar1kqvoQiXllDKruRaEWKTFNL2HG8U9RZfZEgVLd0MD7Yg+nLRyWn4ktoqls6GB8ony+9rCUqvoQmld1IRW1D2DG8pGN8CY2ZEY1XhB3DSyq+iIdUfBEPqfgiHlLxRTyk4ot4SMUX8ZCKL+IhFV/EQyq+iIdUfBEPqfgiHlLxRTyk4ot4KJB/yy1UxBjdVh/EXa8KiScOhx0hUMPfuDPsCIHKfXftPn/mrhW1nvb4Ih5S8UU8pOKLeEjFF/GQii/iIRVfxEMqvoiHVHwRD6n4Ih5S8UU8pOKLeEjFF/GQii/iIRVfxEMqvoiHVHwRD6n4Ih5S8UU8pOKLeEjFF/GQii/iIRVfxEMqvoiHVHwRDy1afDN7wMy6zexEKQKJSPCK2eM/CNwVcA4pUo/r4rK7wKSbCDuKLJFzjnPuNAOul4IrhJpl0UtoOeeeNbOW4KPIQgpuGnfdOE0lL/AkADWunhx5suRJU4WZhRNSFuSco8D8gk8xxWGeJk6CetdEjjz1NBGzeEmzBXLtvFKZnBhhbPQKqXQ9sXgq7DiBOMZz9NO94G0D9DJAL7/nFVJUknN5NrKNtFWWOOXyTHR346amqFi/PuwogRjjGs/x+IK3TTLBG1zgDS5gGOtcjhwb2MAWIhb8qbcVK76ZHQAOAFQka1fqbt9Wf89rvPbKI3Tsupds486SPGap1dFAguS8ZW9yETc7DzCMGv6w519fNqUH6H30ESZ7emi+/2thRwlElBhNbJ63bJQRBumbG8dJUE8TOdZTT2NJSg8rWHzn3EHgIEBVzUa3yOpSpBbbMW/c6y7Ty+XZF8vMNDFuiZDSydtJWAU3s3fesmPuOTJUkyVPjjw11IdyiFbWU30fZajmA3yyZHsGWTnOOdq4dVXMyor5OO9h4Hlgu5l1mtnng4/19pxzFApT85bdOF6rUpYp+9K7qT9+rhZattaY2aooPRR3Vv+zpQiyFGbGiSM/IBKdORPaffkYb3YdZeeue0NOJsUYv3iBgad+xdTwMIXJSbp/9DCpm1qp2r077GjeKNtdR6YqT9+bJwHofeMVKqvX5pnhtahiczNj588z1duLGx/n6pHDpLZvDzuWV8q2+PW5HTeM20NKIktl0Sjp7W89fxWbNhOrqgoxkX/KtvjVtZuJxdMAVCRryFTlQ04kS5Hu6FjwdymNsi2+RaLU5Wamh3W5dn1rrcykt++AyMzLL92xNr+DsZqVbfHhrel9fYOm+eUmkkqR3LqVaG0tibxma6VW1p/jr8u2EYslqa3bGnYUWYZMx04me3o0WwtBWRc/Fk/S3PrRuY/1pLyk2zuYzPWEHcNLZV18gPWb7gg7gixTPJsltm5d2DG8VNbH+DBzkk/Kl0X1/IWh7IsvIkun4ot4SMUX8ZCKL+IhFV/EQyq+iIdUfBEPqfgiHlLxRTyk4ot4SMUX8ZCKL+IhFV/EQ4H8W+50hTG4Ze3+j/zwN+4MO0Kgkq2DYUeQgGmPL+IhFV/EQyq+iIdUfBEPqfgiHlLxRTyk4ot4SMUX8ZCKL+IhFV/EQyq+iIdUfBEPqfgiHlLxRTyk4ot4SMUX8ZCKL+IhFV/EQyq+iIdUfBEPqfgiHlLxRTyk4ot4SMUX8dCixTezTWb2lJmdMrOTZnZfKYKJSHCKuZLOFPBl59xRM6sCjpjZk865VwPOtizXui/Qd/J5qps7qNzURjReEXakFTV89AgTnZ2kO3aS3LIFi0bDjiRFcs5xiqNUUk2WPGmrDC3LosV3zl0GLs/+Pmxmp4ANwKoo/sRQPxNXB65b4hjuPEP/6RexaIzK9TdR3dJBdXMHiap1oeVcronLl5keHZ0bR5IpBp87xOCvnyWSTJLasYN0ewfpHe1E0+kQk8qNpt00Q/TPWxYhwhmOc4bjZFwVWfLkWE8N9ZhZybIt6dp5ZtYC7AJ+G0SY5eh79QW6f/fLBW9z01MMXzzN8MXTXPr1T0g3bCb/Z5+kMr+1xCmXr+9/fsro788seFthbIyRY8cYOXYMIhHS7e3U3/0XxLPZEqdcnuEXX2N6ZJzaD90SdpRATDDGEZ75k7ePMMwIw5znDHESNLvtbKaViAV/6q3o4ptZJfBj4EvOuaEFbj8AHACIV5Zuz1rd3E4sNX/K1HP8aSZHZi78GEtVUd3cTnVLB5Uby2/qX33nnaTb2+fGbmqK/v99HAoFAOLZLOmOnaQ7Oki2lNfUv/ex3zDR1b9mix8nQRu3zls2QB/ddAJgGOvIkSVf8ql/UcU3szgzpf+hc+4nC63jnDsIHARIN2xyK5ZwEZn8FjL5LXPjaz2d9J95iXU79lDTvJNUw0asBO+gQcm8a34phl96kWTLFtIdHaTbO0g0NISUTBYTszibaZ0bO+fo403yNJMlTz2NxCycq0ovWnybOfD4PnDKOffN4CO9M6m6PNs/8+WwYwSm8t23U7Vnb9gxZJlu470lPZb/U4rZFb4XuBf4sJkdm/35eMC5lq2cprrLsRa2b7J/GDc5PTd2Bcdkz2CIiUrDzFZF6aG4s/qHgNWRVtaE6aFrnPvKAxQmpyiMjvP6l75H7p59xHM1YUfzxpLO6oushIrmmfMS0wMjAEx09ZHZVT6ftKwF5XvWS8qWmVG5p21unL65mWiqvD5tKXcqvoSianfrdb+3vc2aEgQVX0KRvrkFS858lHX93l9KQ8f4EopIIkblbduY6Oon0VgbdhzvqPgSmqo9bYx39YUdw0sqvoSm8vZWEpvK4/8K1hoVX0ITq80Qq82EHcNLOrkn4iEVX8RDKr6Ih1R8EQ+p+CIeUvFFPKTii3hIxRfxkIov4iEVX8RDKr6Ih1R8EQ+p+CIeUvFFPBTIv+XGekbIfff5IO56VZh8rCPsCIH69NZjYUcI1P1dq+J6r4HY+7FrRa2nPb6Ih1R8EQ+p+CIeUvFFPKTii3hIxRfxkIov4iEVX8RDKr6Ih1R8EQ+p+CIeUvFFPKTii3hIxRfxkIov4iEVX8RDKr6Ih1R8EQ+p+CIeUvFFPKTii3hIxRfxkIov4iEVX8RDixbfzJJm9qKZHTezk2b2tVIEW66CK4QdIVBuajrsCIGanly7z1+h4JiedmHHAIq7ks448GHn3FUziwOHzOxx59wLAWdblqsM8qo7TJY8OfJUU4eZhR1rxQw+8woDvzpG5Z42qna3kdhQv6a275f/+ir950do29dE675GqnLJsCOtGDP45L2XydZF+cT+NB/7YJqa6mgoWRYtvnPOAVdnh/HZn9XxtgUMuF6GuDJv2SQTnOM05zhNggqyLk+WPHU0ErNArhoWmKHnTzHZMzg3dlPTXDvdybWTF+h+8Bck8nVU7mmlak8b6fbNWCycF9JyHX7kHFPj82cxZ555kzPPvAlAvqOGtn1NtO1rpKm9pqze5IaGC/z7w0PzllVmIjz06DAPPTpMLAb77kxx9/4Mn9ifYVtLvGTZbKbXi6xkFgWOADcB33HOfWWBdQ4ABwCSpG9/n318haMu7Kx7hXO8VtS61dTRxi3UWvYdPWZXCa+dd/7+hxg5/vriK0aMqt1tNH5uP4l83Tt6zFJeO++f3v84o4OTi64XS0a57VOb+NAXd5CqSbyjx7w/V5pr5527OMm2veeLWjdbF+Hvv7CO+/62lkRi+W9uez92kcPHxxa9g6J2f865aeA2M6sFHjOzm51zJ25Y5yBwEKDa6ko2I9jAVuppmrfsBC8yzigRIqyjgRwze/ykpUsVa8U0fm4/0yNjc+PCyBgX//ERKDgimSSV776Jqt2tVL77JqJVqRCTLs89376DwtRbL5c3Tg/yxNdnXlpVDUna9jXS9sEmWvZkiSfLazbTlIvy9GMb5i37z/++yr89ODOD62hLcPdH09y9P8N7bk8SjZZuNrOkea9zbsDMngbuAk4ssnpJpCxDiszceND1U08jWfLU00i0zKb2N0q2NM4bDz7zCnV33zE7td9UdlP7G226df7s5PzhXvZ9YTtt+5po2lFdVlP7GyWTEd7/nrfejAsFx0OPDvMv/5DlE/szbG0u3dT+Rou2wsxywORs6VPAR4CvB55smWqsjhre2VR3NavZ9y5q9r0r7BiB+cDfbQ87QmAiEeN7/9wQdgyguD1+HviP2eP8CPCIc+5nwcYSkSAVc1b/ZWBXCbKISInom3siHlLxRTyk4ot4SMUX8ZCKL+IhFV/EQyq+iIdUfBEPqfgiHlLxRTyk4ot4SMUX8ZCKL+IhFV/EQyq+iIdUfBEPqfgiHlLxRTyk4ot4SMUX8ZCKL+IhFV/EQ0VdO2/Jd2rWAxR30bB3Lgv0luixwqDtK2+l3r5m51xusZUCKX4pmdlh59zusHMERdtX3lbr9mmqL+IhFV/EQ2uh+AfDDhAwbV95W5XbV/bH+CKydGthjy8iS6Tii3hIxRfxkIov4iEVX8RD/w+fTnux4S6a9QAAAABJRU5ErkJggg==\n",
614+
"text/plain": [
615+
"<Figure size 288x288 with 1 Axes>"
616+
]
617+
},
618+
"metadata": {
619+
"needs_background": "light"
620+
},
621+
"output_type": "display_data"
622+
}
623+
],
624+
"source": [
625+
"\"\"\"\n",
626+
"Let's use Value Iteration to solve FrozenLake!\n",
627+
"\n",
628+
"Setup\n",
629+
"-----\n",
630+
"We start off by defining our actions:\n",
631+
"A = {move left, move right...} = {(0,1),(0,-1),...}\n",
632+
"S = {(i,j) for 0 <= i,j < 4}\n",
633+
"Reward for (3,3) = 1, and otherwise 0.\n",
634+
"Probability distribution is a 4x(4x4) matrix of exactly the policy.\n",
635+
"We have pi(a|s), where a in A, and s in S.\n",
636+
"\n",
637+
"Problem formulation : https://gym.openai.com/envs/FrozenLake-v0/\n",
638+
"\n",
639+
"Algorithm\n",
640+
"---------\n",
641+
"Because our situation is deterministic for now, we have the value iteration eq:\n",
642+
"\n",
643+
"v <- 0 for all states.\n",
644+
"v_{k+1}(s) = max_a (\\sum_{s',r} p(s',r|s,a) (r + \\gamma * v_k(s'))\n",
645+
"\n",
646+
"... which decays to:\n",
647+
"\n",
648+
"v_{k+1}(s = max_a (\\sum_{s'} 1_(end(s')) + \\gamma * v_k(s'))\n",
649+
"\n",
650+
"Because of our deterministic state and the deterministic reward.\n",
651+
"\"\"\"\n",
652+
"import numpy as np\n",
653+
"import random\n",
654+
"from numpy.linalg import norm\n",
655+
"import matplotlib.pyplot as plt\n",
656+
"%matplotlib inline \n",
657+
"\n",
658+
"N = 4\n",
659+
"v = np.zeros((N, N), dtype=np.float32) # Is our value vector.\n",
660+
"ITER = 1000\n",
661+
"A = [(0,1),(0,-1),(1,0),(-1,0)]\n",
662+
"\n",
663+
"# If you're feeling adventurous, make your own MAP\n",
664+
"MAP = [\n",
665+
" \"SFFF\",\n",
666+
" \"FHFH\",\n",
667+
" \"FFFH\",\n",
668+
" \"HFFG\"\n",
669+
"]\n",
670+
"\n",
671+
"def proj(n, minn, maxn):\n",
672+
" \"\"\"\n",
673+
" projects n into the range [minn, maxn). \n",
674+
" \"\"\"\n",
675+
" return max(min(maxn-1, n), minn)\n",
676+
"\n",
677+
"def move(s, tpl, stochasticity=0):\n",
678+
" \"\"\"\n",
679+
" Set stochasticity to any number in [0,1].\n",
680+
" This is equivalent to \"slipping on the ground\"\n",
681+
" in FrozenLake.\n",
682+
"\t\"\"\"\n",
683+
" if MAP[s[0]][s[1]] == 'H': # Go back to the start\n",
684+
" return (0,0)\n",
685+
" if np.random.random() < stochasticity:\n",
686+
" return random.choice(A)\n",
687+
" return (proj(s[0] + tpl[0], 0, N), proj(s[1] + tpl[1], 0, N))\n",
688+
"\n",
689+
"def reward(s):\n",
690+
" return MAP[s[0]][s[1]] == 'G'\n",
691+
" \n",
692+
"def run_with_value(v, gamma=0.9):\n",
693+
" old_v = v.copy()\n",
694+
" for i in range(N):\n",
695+
" for j in range(N):\n",
696+
" best_val = 0\n",
697+
" for a in A:\n",
698+
" new_s = move((i,j), a)\n",
699+
" best_val = max(best_val, gamma * old_v[new_s])\n",
700+
" v[i,j] = best_val + reward((i,j))\n",
701+
" return old_v\n",
702+
"\n",
703+
"# Extracting policy from v:\n",
704+
"def pi(s, v):\n",
705+
" cur_best = float(\"-inf\")\n",
706+
" cur_a = None\n",
707+
" for a in A:\n",
708+
" val = v[move(s, a, stochasticity=0)]\n",
709+
" if val > cur_best:\n",
710+
" cur_a = a\n",
711+
" cur_best = val\n",
712+
" return cur_a\n",
713+
"\n",
714+
"np.random.seed(0)\n",
715+
"random.seed(0)\n",
716+
"# Performing Value Iteration\n",
717+
"old_v = run_with_value(v)\n",
718+
"for i in range(ITER):\n",
719+
" old_v = run_with_value(v)\n",
720+
"\n",
721+
"# Plotting a nice arrow map.\n",
722+
"action_map = np.array([\n",
723+
" [pi((i,j), v) for j in range(N)] for i in range(N)])\n",
724+
"Fx = np.array([ [col[1] for col in row] for row in action_map ])\n",
725+
"Fy = np.array([ [-col[0] for col in row] for row in action_map ])\n",
726+
"plt.matshow(v) \n",
727+
"plt.quiver(Fx,Fy)\n",
728+
"plt.show()"
729+
]
730+
},
731+
{
732+
"cell_type": "code",
733+
"execution_count": null,
734+
"metadata": {},
735+
"outputs": [],
736+
"source": []
597737
}
598738
],
599739
"metadata": {

ReinforcementLearning/Monte Carlo - Blackjack + Cliff Walk.ipynb

Lines changed: 323 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)