We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent ca1bd91 commit 74a70e1Copy full SHA for 74a70e1
reinforcement_learning/reinforce.py
@@ -2,7 +2,7 @@
2
import gym
3
import numpy as np
4
from itertools import count
5
-
+from collections import deque
6
import torch
7
import torch.nn as nn
8
import torch.nn.functional as F
@@ -62,10 +62,10 @@ def select_action(state):
62
def finish_episode():
63
R = 0
64
policy_loss = []
65
- returns = []
+ returns = deque()
66
for r in policy.rewards[::-1]:
67
R = r + args.gamma * R
68
- returns.insert(0, R)
+ returns.appendleft(R)
69
returns = torch.tensor(returns)
70
returns = (returns - returns.mean()) / (returns.std() + eps)
71
for log_prob, R in zip(policy.saved_log_probs, returns):
0 commit comments