okrusch
diff --git a/‎Protokoll.txt
+6 b/‎Protokoll.txt
+6
diff --git a/‎agents/AbstractAgent.py
+85 b/‎agents/AbstractAgent.py
+85
diff --git a/‎agents/BasicAgent.py
+43 b/‎agents/BasicAgent.py
+43
diff --git a/‎agents/QLearningAgent.py
+101 b/‎agents/QLearningAgent.py
+101
diff --git a/‎agents/RandomAgent.py
+26 b/‎agents/RandomAgent.py
+26
diff --git a/‎env.py
+26 b/‎env.py
+26
diff --git a/‎runScripts/graphs/BasicAgent/240425_1854/run/events.out.tfevents.1714064067.Leonhards-MacBook-Pro.local.5957.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240425_1854/run/events.out.tfevents.1714064067.Leonhards-MacBook-Pro.local.5957.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240425_1856/run/events.out.tfevents.1714064216.Leonhards-MacBook-Pro.local.5991.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240425_1856/run/events.out.tfevents.1714064216.Leonhards-MacBook-Pro.local.5991.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240425_1903/run/events.out.tfevents.1714064608.Leonhards-MacBook-Pro.local.6042.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240425_1903/run/events.out.tfevents.1714064608.Leonhards-MacBook-Pro.local.6042.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240425_1905/run/events.out.tfevents.1714064710.Leonhards-MacBook-Pro.local.6063.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240425_1905/run/events.out.tfevents.1714064710.Leonhards-MacBook-Pro.local.6063.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240425_1906/run/events.out.tfevents.1714064790.Leonhards-MacBook-Pro.local.6077.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240425_1906/run/events.out.tfevents.1714064790.Leonhards-MacBook-Pro.local.6077.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240425_1907/run/events.out.tfevents.1714064863.Leonhards-MacBook-Pro.local.6102.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240425_1907/run/events.out.tfevents.1714064863.Leonhards-MacBook-Pro.local.6102.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240425_1909/run/events.out.tfevents.1714064943.Leonhards-MacBook-Pro.local.6117.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240425_1909/run/events.out.tfevents.1714064943.Leonhards-MacBook-Pro.local.6117.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240425_1910/run/events.out.tfevents.1714065018.Leonhards-MacBook-Pro.local.6127.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240425_1910/run/events.out.tfevents.1714065018.Leonhards-MacBook-Pro.local.6127.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240425_1911/run/events.out.tfevents.1714065091.Leonhards-MacBook-Pro.local.6138.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240425_1911/run/events.out.tfevents.1714065091.Leonhards-MacBook-Pro.local.6138.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240425_1912/run/events.out.tfevents.1714065150.Leonhards-MacBook-Pro.local.6160.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240425_1912/run/events.out.tfevents.1714065150.Leonhards-MacBook-Pro.local.6160.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240425_1915/run/events.out.tfevents.1714065355.Leonhards-MacBook-Pro.local.6178.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240425_1915/run/events.out.tfevents.1714065355.Leonhards-MacBook-Pro.local.6178.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240425_2000/run/events.out.tfevents.1714068028.Leonhards-MacBook-Pro.local.6291.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240425_2000/run/events.out.tfevents.1714068028.Leonhards-MacBook-Pro.local.6291.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240425_2002/run/events.out.tfevents.1714068153.Leonhards-MacBook-Pro.local.6330.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240425_2002/run/events.out.tfevents.1714068153.Leonhards-MacBook-Pro.local.6330.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240425_2005/run/events.out.tfevents.1714068315.Leonhards-MacBook-Pro.local.6443.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240425_2005/run/events.out.tfevents.1714068315.Leonhards-MacBook-Pro.local.6443.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240425_2008/run/events.out.tfevents.1714068499.Leonhards-MacBook-Pro.local.6491.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240425_2008/run/events.out.tfevents.1714068499.Leonhards-MacBook-Pro.local.6491.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240425_2021/run/events.out.tfevents.1714069308.Leonhards-MacBook-Pro.local.6590.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240425_2021/run/events.out.tfevents.1714069308.Leonhards-MacBook-Pro.local.6590.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240426_1228/run/events.out.tfevents.1714127324.Leonhards-MacBook-Pro.local.8225.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240426_1228/run/events.out.tfevents.1714127324.Leonhards-MacBook-Pro.local.8225.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240426_1230/run/events.out.tfevents.1714127436.Leonhards-MacBook-Pro.local.8245.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240426_1230/run/events.out.tfevents.1714127436.Leonhards-MacBook-Pro.local.8245.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240426_1232/run/events.out.tfevents.1714127531.Leonhards-MacBook-Pro.local.8289.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240426_1232/run/events.out.tfevents.1714127531.Leonhards-MacBook-Pro.local.8289.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240426_1233/run/events.out.tfevents.1714127590.Leonhards-MacBook-Pro.local.8311.0
5.41 KB b/‎runScripts/graphs/BasicAgent/240426_1233/run/events.out.tfevents.1714127590.Leonhards-MacBook-Pro.local.8311.0
5.41 KB
diff --git a/‎runScripts/graphs/BasicAgent/240426_1305/run/events.out.tfevents.1714129509.Leonhards-MacBook-Pro.local.8672.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240426_1305/run/events.out.tfevents.1714129509.Leonhards-MacBook-Pro.local.8672.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240426_1339/run/events.out.tfevents.1714131556.Leonhards-MacBook-Pro.local.8765.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240426_1339/run/events.out.tfevents.1714131556.Leonhards-MacBook-Pro.local.8765.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240426_1404/run/events.out.tfevents.1714133062.Leonhards-MacBook-Pro.local.9150.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240426_1404/run/events.out.tfevents.1714133062.Leonhards-MacBook-Pro.local.9150.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240426_1422/run/events.out.tfevents.1714134138.Leonhards-MacBook-Pro.local.9235.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240426_1422/run/events.out.tfevents.1714134138.Leonhards-MacBook-Pro.local.9235.0
40 Bytes
diff --git a/‎runScripts/graphs/BasicAgent/240426_1426/run/events.out.tfevents.1714134367.Leonhards-MacBook-Pro.local.9282.0
40 Bytes b/‎runScripts/graphs/BasicAgent/240426_1426/run/events.out.tfevents.1714134367.Leonhards-MacBook-Pro.local.9282.0
40 Bytes
diff --git a/‎runScripts/graphs/RandomAgent/230427_1026/run/events.out.tfevents.1682583983.stemarco-ThinkPad-X1-Carbon-Gen-10.5970.0
95 Bytes b/‎runScripts/graphs/RandomAgent/230427_1026/run/events.out.tfevents.1682583983.stemarco-ThinkPad-X1-Carbon-Gen-10.5970.0
95 Bytes
diff --git a/‎runScripts/graphs/RandomAgent/230503_1118/run/events.out.tfevents.1683105538.stemarco-ThinkPad-X1-Carbon-Gen-10.74573.0
1.01 KB b/‎runScripts/graphs/RandomAgent/230503_1118/run/events.out.tfevents.1683105538.stemarco-ThinkPad-X1-Carbon-Gen-10.74573.0
1.01 KB
diff --git a/‎runScripts/graphs/RandomAgent/230503_1450/run/events.out.tfevents.1683118247.stemarco-ThinkPad-X1-Carbon-Gen-10.4488.0
810 Bytes b/‎runScripts/graphs/RandomAgent/230503_1450/run/events.out.tfevents.1683118247.stemarco-ThinkPad-X1-Carbon-Gen-10.4488.0
810 Bytes
diff --git a/‎runScripts/graphs/RandomAgent/230504_1733/run/events.out.tfevents.1683214435.stemarco-ThinkPad-X1-Carbon-Gen-10.4883.0
260 Bytes b/‎runScripts/graphs/RandomAgent/230504_1733/run/events.out.tfevents.1683214435.stemarco-ThinkPad-X1-Carbon-Gen-10.4883.0
260 Bytes
diff --git a/‎runScripts/graphs/RandomAgent/240425_1900/run/events.out.tfevents.1714064447.Leonhards-MacBook-Pro.local.6014.0
150 Bytes b/‎runScripts/graphs/RandomAgent/240425_1900/run/events.out.tfevents.1714064447.Leonhards-MacBook-Pro.local.6014.0
150 Bytes
diff --git a/‎runScripts/runBasicAgent.py
+39 b/‎runScripts/runBasicAgent.py
+39
diff --git a/‎runScripts/runQLAgent.py
+40 b/‎runScripts/runQLAgent.py
+40
@@ -0,0 +1,6 @@
+Gruppe: 
+
+Bearbeitet von:
+Aufgabe 1 (Markov Decision Process) ->
+Aufgabe 2 (Basic Agent) ->
+Aufgabe 3 (Q-Learning) ->
@@ -0,0 +1,85 @@
+from abc import abstractmethod
+
+import numpy as np
+from pysc2.lib import actions, features
+
+
+class AbstractAgent:
+    _DIRECTIONS = {'N': [0, -1],
+                   'NE': [1, -1],
+                   'E': [1, 0],
+                   'SE': [1, 1],
+                   'S': [0, 1],
+                   'SW': [-1, 1],
+                   'W': [-1, 0],
+                   'NW': [-1, -1]}
+
+    _DIRECTIONS_REVERSE = {"[0, -1]": 'N',
+                           "[1, -1]": 'NE',
+                           "[1, 0]": 'E',
+                           "[1, 1]": 'SE',
+                           "[0, 1]": 'S',
+                           "[-1, 1]": 'SW',
+                           "[-1, 0]": 'W',
+                           "[-1, -1]": 'NW'}
+
+    """Sc2 Actions"""
+    _MOVE_SCREEN = actions.FUNCTIONS.Move_screen
+    _NO_OP = actions.FUNCTIONS.no_op()
+    _SELECT_ARMY = actions.FUNCTIONS.select_army("select")
+
+    def __init__(self, screen_size):
+        self.screen_size = screen_size
+
+    @abstractmethod
+    def step(self, obs):
+        ...
+
+    @abstractmethod
+    def save_model(self, path):
+        ...
+
+    @abstractmethod
+    def load_model(self, path):
+        ...
+
+    def _get_beacon(self, obs):
+        """Returns the unit obj representation of the beacon"""
+        beacon = next(unit for unit in obs.observation.feature_units
+                      if unit.alliance == features.PlayerRelative.NEUTRAL)
+        return beacon
+
+    def _get_marine(self, obs):
+        """Returns the unit obj representation of the marine"""
+        marine = next(unit for unit in obs.observation.feature_units
+                      if unit.alliance == features.PlayerRelative.SELF)
+        return marine
+
+    def _get_unit_pos(self, unit):
+        """Returns the (x, y) position of a unit obj"""
+        return np.array([unit.x, unit.y])
+
+    def _dir_to_sc2_action(self, d, marine_center):
+        """Takes the direction the marine should walk and outputs an action for PySC2"""
+
+        def _xy_offset(start, offset_x, offset_y):
+            """Return point (x', y') offset from start.
+               Pays attention to not set the point off beyond the screen border"""
+            dest = start + np.array([offset_x, offset_y])
+            if dest[0] < 0:
+                dest[0] = 0
+            elif dest[0] >= self.screen_size:
+                dest[0] = self.screen_size - 1
+            if dest[1] < 0:
+                dest[1] = 0
+            elif dest[1] >= self.screen_size:
+                dest[1] = self.screen_size - 1
+            return dest
+
+        if d in self._DIRECTIONS.keys():
+            next_pos = _xy_offset(marine_center,
+                                  self.screen_size * self._DIRECTIONS[d][0],
+                                  self.screen_size * self._DIRECTIONS[d][1])
+            return self._MOVE_SCREEN("now", next_pos)
+        else:
+            return self._NO_OP
@@ -0,0 +1,43 @@
+from agents.AbstractAgent import AbstractAgent
+
+
+class BasicAgent(AbstractAgent):
+    def __init__(self, train, screen_size):
+        super(BasicAgent, self).__init__(screen_size)
+
+    def step(self, obs):
+        if self._MOVE_SCREEN.id in obs.observation.available_actions:
+            marine = self._get_marine(obs)
+            if marine is None:
+                return self._NO_OP
+            marine_coordinates = self._get_unit_pos(marine)
+            marine_x, marine_y = marine_coordinates[0], marine_coordinates[1]
+
+            beacon = self._get_beacon(obs)
+            if beacon is None:
+                return self._NO_OP
+            beacon_coordinates = self._get_unit_pos(beacon)
+            beacon_x, beacon_y = beacon_coordinates[0], beacon_coordinates[1]
+
+            move = ""
+            if(marine_y < beacon_y):
+                move += "S"
+            elif (marine_y > beacon_y):
+                move += "N"
+
+            if(marine_x < beacon_x):
+                move += "E"
+            elif(marine_x > beacon_x):
+                move += "W"
+
+            assert move != ""
+
+            return self._dir_to_sc2_action(move, marine_coordinates)
+        else:
+            return self._SELECT_ARMY
+
+    def save_model(self, filename):
+        pass
+
+    def load_model(self, filename):
+        pass
@@ -0,0 +1,101 @@
+from agents.AbstractAgent import AbstractAgent
+import pandas as pd
+import numpy as np
+
+def get_row_index_in_string_format(state):
+    """
+        Returns a state (row index) as q-table row index string.
+
+                Parameters:
+                        state ([int, int]): The distance between agent (marine) and beacon.
+
+                Returns:
+                        state (str): Transformed state, so it can be used as index in the q-table.
+    """
+    return "("+str(state[0]) + "," + str(state[1]) + ")"
+
+
+class QLearningAgent(AbstractAgent):
+    def __init__(self, train, screen_size, explore=1):
+        super(QLearningAgent, self).__init__(screen_size)
+        # TODO Initialize all hyperparameter and the q-table (with the helper function below)
+        self.train = train
+        self.explore = explore
+        self.actions = self._DIRECTIONS.keys()
+        self.states = []
+        for x in range(-64, 65):
+            for y in range(-64, 65):
+                self.states.append((x, y))
+        self.q_table = self.init_q_table()
+
+        pass
+
+
+    def step(self, obs):
+        # TODO step method
+        if self._MOVE_SCREEN.id in obs.observation.available_actions:
+            pass
+        else:
+            return self._SELECT_ARMY    # initialize army in first step
+
+    def save_model(self, path):
+        self.q_table.to_pickle(path)
+
+    def load_model(self, path):
+        self.q_table = pd.read_pickle(path)
+
+    def get_new_action(self, state):
+        """
+            Returns the action to execute.
+
+                    Parameters:
+                            state ([int, int]): A row index (a state) of the q-table.
+
+                    Returns:
+                            action (str): e.g. 'N', 'NW', 'NO', ...
+        """
+        # TODO get_new_action method
+        ...
+
+    def get_q_value(self, q_table_column_index, q_table_row_index):
+        """
+            Returns a q-value.
+
+                    Parameters:
+                            q_table_column_index (str): The column index of the searched value (the action).
+                            q_table_row_index (str): The row index of the searched value (the state).
+
+                    Returns:
+                            action (float): The value for the given indices.
+        """
+        # TODO get_new_action method
+        ...
+
+    def update_q_value(self, old_state, old_action, new_state, reward, terminal):
+        # TODO update_q_value method
+        ...
+
+    def get_q_state_from_position(self, marine_position, beacon_position):
+        """
+            Transforms the position of agent (marine) and beacon into a q-table row index.
+
+                    Parameters:
+                            marine_position ([int, int]): The position of the agent (marine).
+                            beacon_position ([int, int]): The position of the beacon.
+
+                    Returns:
+                            state ([int, int]): A row index (a state) of the q-table.
+        """
+        # TODO get_q_state_from_position method
+        ...
+
+    def init_q_table(self):
+        """
+            Initializes the q-table
+
+                    Returns:
+                            q_table (panda.Dataframe): The q-table.
+                                                       The row indices must be in the format '(x,y)'
+                                                       The column indices must be in the format 'action' (e.g. 'W')
+        """
+        return pd.DataFrame(np.random.rand(len(self.states), len(self.actions)), index=self.states, columns=self.actions)
@@ -0,0 +1,26 @@
+from agents.AbstractAgent import AbstractAgent
+import numpy as np
+
+
+class RandomAgent(AbstractAgent):
+
+    def __init__(self, train, screen_size):
+        super(RandomAgent, self).__init__(screen_size)
+
+    def step(self, obs):
+        if self._MOVE_SCREEN.id in obs.observation.available_actions:
+            marine = self._get_marine(obs)
+            if marine is None:
+                return self._NO_OP
+            marine_coordinates = self._get_unit_pos(marine)
+
+            d = np.random.choice(list(self._DIRECTIONS.keys()))
+            return self._dir_to_sc2_action(d, marine_coordinates)
+        else:
+            return self._SELECT_ARMY
+
+    def save_model(self, filename):
+        pass
+
+    def load_model(self, filename):
+        pass
@@ -0,0 +1,26 @@
+from pysc2.env import sc2_env
+from pysc2.lib import features
+
+
+class Env:
+    def __init__(self, screen_size=32, minimap_size=32, visualize=False):
+        self.sc2_env = sc2_env.SC2Env(
+            map_name="MoveToBeacon",
+            players=[sc2_env.Agent(sc2_env.Race.terran)],
+            agent_interface_format=features.AgentInterfaceFormat(
+                feature_dimensions=features.Dimensions(screen=screen_size, minimap=minimap_size),
+                use_feature_units=True
+            ),
+            step_mul=8,
+            visualize=visualize
+        )
+
+    def reset(self):
+        return self.preprocess_obs(self.sc2_env.reset())
+
+    def step(self, action):
+        return self.preprocess_obs(self.sc2_env.step([action]))
+
+    def preprocess_obs(self, timsteps):
+        # Any kind of preprocessing can take place here
+        return timsteps[0]
@@ -0,0 +1,39 @@
+from absl import app
+
+from env import Env
+from runners.basic_runner import Runner
+from agents.BasicAgent import BasicAgent
+
+_CONFIG = dict(
+    episodes=100,
+    screen_size=64,
+    minimap_size=64,
+    visualize=True,
+    train=False,
+    agent=BasicAgent
+)
+
+def main(unused_argv):
+
+    agent = _CONFIG['agent'](
+        train=_CONFIG['train'],
+        screen_size=_CONFIG['screen_size']
+    )
+
+    env = Env(
+        screen_size=_CONFIG['screen_size'],
+        minimap_size=_CONFIG['minimap_size'],
+        visualize=_CONFIG['visualize']
+    )
+
+    runner = Runner(
+        agent=agent,
+        env=env,
+        train=_CONFIG['train']
+    )
+
+    runner.run(episodes=_CONFIG['episodes'])
+
+
+if __name__ == "__main__":
+    app.run(main)
@@ -0,0 +1,40 @@
+# TODO Write the runQLAgent script
+from absl import app
+
+from env import Env
+from runners.basic_runner import Runner
+from agents.QLearningAgent import QLearningAgent
+
+_CONFIG = dict(
+    episodes=100,
+    screen_size=64,
+    minimap_size=64,
+    visualize=True,
+    train=False,
+    agent=QLearningAgent
+)
+
+def main(unused_argv):
+
+    agent = _CONFIG['agent'](
+        train=_CONFIG['train'],
+        screen_size=_CONFIG['screen_size']
+    )
+
+    env = Env(
+        screen_size=_CONFIG['screen_size'],
+        minimap_size=_CONFIG['minimap_size'],
+        visualize=_CONFIG['visualize']
+    )
+
+    runner = Runner(
+        agent=agent,
+        env=env,
+        train=_CONFIG['train']
+    )
+
+    runner.run(episodes=_CONFIG['episodes'])
+
+
+if __name__ == "__main__":
+    app.run(main)