Speed-up to O(1) from O(N) of the computation of each return in REINFORCE (#1083)

Chris1nexus · web-flow · commit 74a70e107dbf · 2022-10-17T14:34:08.000-07:00
Replace list with deque to obtain O(1) time complexity of insertion at the beginning of the list of returns
diff --git a/reinforcement_learning/reinforce.py b/reinforcement_learning/reinforce.py
@@ -2,7 +2,7 @@
 import gym
 import numpy as np
 from itertools import count
-
+from collections import deque
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -62,10 +62,10 @@ def select_action(state):
 def finish_episode():
     R = 0
     policy_loss = []
-    returns = []
+    returns = deque()
     for r in policy.rewards[::-1]:
         R = r + args.gamma * R
-        returns.insert(0, R)
+        returns.appendleft(R)
     returns = torch.tensor(returns)
     returns = (returns - returns.mean()) / (returns.std() + eps)
     for log_prob, R in zip(policy.saved_log_probs, returns):