Skip to content

Commit 79cadc0

Browse files
author
Alex
committed
Lambda to Gamma. Updated Readme.
1 parent 7a31a2b commit 79cadc0

File tree

6 files changed

+37
-81
lines changed

6 files changed

+37
-81
lines changed

MC/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737

3838
### Exercises
3939

40-
- [Get familiar with the Blackjack environment (Blackjack-v0)](Blackjack%20Playground.ipynb)
40+
- Get familiar with the [Blackjack environment (Blackjack-v0)](Blackjack%20Playground.ipynb)
4141
- Implement the Monte Carlo Prediction to estimate state-action values
4242
- [Exercise](MC%20Prediction.ipynb)
4343
- [Solution](MC%20Prediction%20Solution.ipynb)

TD/Q-Learning Solution.ipynb

+8-18
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
{
44
"cell_type": "code",
55
"execution_count": null,
6-
"metadata": {
7-
"collapsed": false
8-
},
6+
"metadata": {},
97
"outputs": [],
108
"source": [
119
"%matplotlib inline\n",
@@ -31,9 +29,7 @@
3129
{
3230
"cell_type": "code",
3331
"execution_count": 15,
34-
"metadata": {
35-
"collapsed": false
36-
},
32+
"metadata": {},
3733
"outputs": [],
3834
"source": [
3935
"env = CliffWalkingEnv()"
@@ -73,9 +69,7 @@
7369
{
7470
"cell_type": "code",
7571
"execution_count": 17,
76-
"metadata": {
77-
"collapsed": false
78-
},
72+
"metadata": {},
7973
"outputs": [],
8074
"source": [
8175
"def q_learning(env, num_episodes, discount_factor=1.0, alpha=0.5, epsilon=0.1):\n",
@@ -86,7 +80,7 @@
8680
" Args:\n",
8781
" env: OpenAI environment.\n",
8882
" num_episodes: Number of episodes to run for.\n",
89-
" discount_factor: Lambda time discount factor.\n",
83+
" discount_factor: Gamma discount factor.\n",
9084
" alpha: TD learning rate.\n",
9185
" epsilon: Chance the sample a random action. Float betwen 0 and 1.\n",
9286
" \n",
@@ -147,9 +141,7 @@
147141
{
148142
"cell_type": "code",
149143
"execution_count": 18,
150-
"metadata": {
151-
"collapsed": false
152-
},
144+
"metadata": {},
153145
"outputs": [
154146
{
155147
"name": "stdout",
@@ -166,9 +158,7 @@
166158
{
167159
"cell_type": "code",
168160
"execution_count": 19,
169-
"metadata": {
170-
"collapsed": false
171-
},
161+
"metadata": {},
172162
"outputs": [
173163
{
174164
"data": {
@@ -231,9 +221,9 @@
231221
"name": "python",
232222
"nbconvert_exporter": "python",
233223
"pygments_lexer": "ipython3",
234-
"version": "3.5.1"
224+
"version": "3.5.2"
235225
}
236226
},
237227
"nbformat": 4,
238-
"nbformat_minor": 0
228+
"nbformat_minor": 1
239229
}

TD/Q-Learning.ipynb

+8-18
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
{
44
"cell_type": "code",
55
"execution_count": 3,
6-
"metadata": {
7-
"collapsed": false
8-
},
6+
"metadata": {},
97
"outputs": [],
108
"source": [
119
"%matplotlib inline\n",
@@ -30,9 +28,7 @@
3028
{
3129
"cell_type": "code",
3230
"execution_count": 4,
33-
"metadata": {
34-
"collapsed": false
35-
},
31+
"metadata": {},
3632
"outputs": [],
3733
"source": [
3834
"env = CliffWalkingEnv()"
@@ -72,9 +68,7 @@
7268
{
7369
"cell_type": "code",
7470
"execution_count": 6,
75-
"metadata": {
76-
"collapsed": false
77-
},
71+
"metadata": {},
7872
"outputs": [],
7973
"source": [
8074
"def q_learning(env, num_episodes, discount_factor=1.0, alpha=0.5, epsilon=0.1):\n",
@@ -85,7 +79,7 @@
8579
" Args:\n",
8680
" env: OpenAI environment.\n",
8781
" num_episodes: Number of episodes to run for.\n",
88-
" discount_factor: Lambda time discount factor.\n",
82+
" discount_factor: Gamma discount factor.\n",
8983
" alpha: TD learning rate.\n",
9084
" epsilon: Chance the sample a random action. Float betwen 0 and 1.\n",
9185
" \n",
@@ -121,9 +115,7 @@
121115
{
122116
"cell_type": "code",
123117
"execution_count": 7,
124-
"metadata": {
125-
"collapsed": false
126-
},
118+
"metadata": {},
127119
"outputs": [
128120
{
129121
"name": "stdout",
@@ -140,9 +132,7 @@
140132
{
141133
"cell_type": "code",
142134
"execution_count": 8,
143-
"metadata": {
144-
"collapsed": false
145-
},
135+
"metadata": {},
146136
"outputs": [
147137
{
148138
"data": {
@@ -205,9 +195,9 @@
205195
"name": "python",
206196
"nbconvert_exporter": "python",
207197
"pygments_lexer": "ipython3",
208-
"version": "3.5.1"
198+
"version": "3.5.2"
209199
}
210200
},
211201
"nbformat": 4,
212-
"nbformat_minor": 0
202+
"nbformat_minor": 1
213203
}

TD/README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@
4040

4141
### Exercises
4242

43-
- [Windy Gridworld Playground](Windy%20Gridworld%20Playground.ipynb)
43+
- Get familiar with the [Windy Gridworld Playground](Windy%20Gridworld%20Playground.ipynb)
4444
- Implement SARSA
4545
- [Exercise](SARSA.ipynb)
4646
- [Solution](SARSA%20Solution.ipynb)
47-
- [Cliff Environment Playground](Cliff%20Environment%20Playground.ipynb)
47+
- Get familiar with the [Cliff Environment Playground](Cliff%20Environment%20Playground.ipynb)
4848
- Implement Q-Learning in Python
4949
- [Exercise](Q-Learning.ipynb)
5050
- [Solution](Q-Learning%20Solution.ipynb)

TD/SARSA Solution.ipynb

+9-21
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
{
44
"cell_type": "code",
55
"execution_count": 19,
6-
"metadata": {
7-
"collapsed": false
8-
},
6+
"metadata": {},
97
"outputs": [],
108
"source": [
119
"%matplotlib inline\n",
@@ -39,9 +37,7 @@
3937
{
4038
"cell_type": "code",
4139
"execution_count": 20,
42-
"metadata": {
43-
"collapsed": false
44-
},
40+
"metadata": {},
4541
"outputs": [],
4642
"source": [
4743
"env = WindyGridworldEnv()"
@@ -81,9 +77,7 @@
8177
{
8278
"cell_type": "code",
8379
"execution_count": 22,
84-
"metadata": {
85-
"collapsed": false
86-
},
80+
"metadata": {},
8781
"outputs": [],
8882
"source": [
8983
"def sarsa(env, num_episodes, discount_factor=1.0, alpha=0.5, epsilon=0.1):\n",
@@ -93,7 +87,7 @@
9387
" Args:\n",
9488
" env: OpenAI environment.\n",
9589
" num_episodes: Number of episodes to run for.\n",
96-
" discount_factor: Lambda time discount factor.\n",
90+
" discount_factor: Gamma discount factor.\n",
9791
" alpha: TD learning rate.\n",
9892
" epsilon: Chance the sample a random action. Float betwen 0 and 1.\n",
9993
" \n",
@@ -156,9 +150,7 @@
156150
{
157151
"cell_type": "code",
158152
"execution_count": 23,
159-
"metadata": {
160-
"collapsed": false
161-
},
153+
"metadata": {},
162154
"outputs": [
163155
{
164156
"name": "stdout",
@@ -175,9 +167,7 @@
175167
{
176168
"cell_type": "code",
177169
"execution_count": 24,
178-
"metadata": {
179-
"collapsed": false
180-
},
170+
"metadata": {},
181171
"outputs": [
182172
{
183173
"data": {
@@ -217,9 +207,7 @@
217207
{
218208
"cell_type": "code",
219209
"execution_count": null,
220-
"metadata": {
221-
"collapsed": false
222-
},
210+
"metadata": {},
223211
"outputs": [],
224212
"source": []
225213
}
@@ -240,9 +228,9 @@
240228
"name": "python",
241229
"nbconvert_exporter": "python",
242230
"pygments_lexer": "ipython3",
243-
"version": "3.5.1"
231+
"version": "3.5.2"
244232
}
245233
},
246234
"nbformat": 4,
247-
"nbformat_minor": 0
235+
"nbformat_minor": 1
248236
}

TD/SARSA.ipynb

+9-21
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
{
44
"cell_type": "code",
55
"execution_count": 11,
6-
"metadata": {
7-
"collapsed": false
8-
},
6+
"metadata": {},
97
"outputs": [],
108
"source": [
119
"%matplotlib inline\n",
@@ -30,9 +28,7 @@
3028
{
3129
"cell_type": "code",
3230
"execution_count": 12,
33-
"metadata": {
34-
"collapsed": false
35-
},
31+
"metadata": {},
3632
"outputs": [],
3733
"source": [
3834
"env = WindyGridworldEnv()"
@@ -72,9 +68,7 @@
7268
{
7369
"cell_type": "code",
7470
"execution_count": 14,
75-
"metadata": {
76-
"collapsed": false
77-
},
71+
"metadata": {},
7872
"outputs": [],
7973
"source": [
8074
"def sarsa(env, num_episodes, discount_factor=1.0, alpha=0.5, epsilon=0.1):\n",
@@ -84,7 +78,7 @@
8478
" Args:\n",
8579
" env: OpenAI environment.\n",
8680
" num_episodes: Number of episodes to run for.\n",
87-
" discount_factor: Lambda time discount factor.\n",
81+
" discount_factor: Gamma discount factor.\n",
8882
" alpha: TD learning rate.\n",
8983
" epsilon: Chance the sample a random action. Float betwen 0 and 1.\n",
9084
" \n",
@@ -121,9 +115,7 @@
121115
{
122116
"cell_type": "code",
123117
"execution_count": 16,
124-
"metadata": {
125-
"collapsed": false
126-
},
118+
"metadata": {},
127119
"outputs": [
128120
{
129121
"name": "stdout",
@@ -140,9 +132,7 @@
140132
{
141133
"cell_type": "code",
142134
"execution_count": 17,
143-
"metadata": {
144-
"collapsed": false
145-
},
135+
"metadata": {},
146136
"outputs": [
147137
{
148138
"data": {
@@ -182,9 +172,7 @@
182172
{
183173
"cell_type": "code",
184174
"execution_count": null,
185-
"metadata": {
186-
"collapsed": false
187-
},
175+
"metadata": {},
188176
"outputs": [],
189177
"source": []
190178
}
@@ -205,9 +193,9 @@
205193
"name": "python",
206194
"nbconvert_exporter": "python",
207195
"pygments_lexer": "ipython3",
208-
"version": "3.5.1"
196+
"version": "3.5.2"
209197
}
210198
},
211199
"nbformat": 4,
212-
"nbformat_minor": 0
200+
"nbformat_minor": 1
213201
}

0 commit comments

Comments
 (0)