|
| 1 | +import gym |
| 2 | +from gym import spaces |
| 3 | +import numpy as np |
| 4 | +# from os import path |
| 5 | +import snakeoil3_gym as snakeoil3 |
| 6 | +import numpy as np |
| 7 | +import copy |
| 8 | +import collections as col |
| 9 | +import os |
| 10 | +import time |
| 11 | + |
| 12 | + |
| 13 | +class TorcsEnv: |
| 14 | + terminal_judge_start = 100 # If after 100 timestep still no progress, terminated |
| 15 | + termination_limit_progress = 5 # [km/h], episode terminates if car is running slower than this limit |
| 16 | + default_speed = 50 |
| 17 | + |
| 18 | + initial_reset = True |
| 19 | + |
| 20 | + def __init__(self, vision=False, throttle=False, gear_change=False): |
| 21 | + self.vision = vision |
| 22 | + self.throttle = throttle |
| 23 | + self.gear_change = gear_change |
| 24 | + |
| 25 | + self.initial_run = True |
| 26 | + |
| 27 | + ##print("launch torcs") |
| 28 | + os.system('pkill torcs') |
| 29 | + time.sleep(0.5) |
| 30 | + if self.vision is True: |
| 31 | + os.system('torcs -nofuel -nodamage -nolaptime -vision &') |
| 32 | + else: |
| 33 | + os.system('torcs -nofuel -nolaptime &') |
| 34 | + time.sleep(0.5) |
| 35 | + os.system('sh autostart.sh') |
| 36 | + time.sleep(0.5) |
| 37 | + |
| 38 | + """ |
| 39 | + # Modify here if you use multiple tracks in the environment |
| 40 | + self.client = snakeoil3.Client(p=3101, vision=self.vision) # Open new UDP in vtorcs |
| 41 | + self.client.MAX_STEPS = np.inf |
| 42 | +
|
| 43 | + client = self.client |
| 44 | + client.get_servers_input() # Get the initial input from torcs |
| 45 | +
|
| 46 | + obs = client.S.d # Get the current full-observation from torcs |
| 47 | + """ |
| 48 | + if throttle is False: |
| 49 | + self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1,)) |
| 50 | + else: |
| 51 | + self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2,)) |
| 52 | + |
| 53 | + if vision is False: |
| 54 | + high = np.array( |
| 55 | + [1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf]) |
| 56 | + low = np.array( |
| 57 | + [0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf]) |
| 58 | + self.observation_space = spaces.Box(low=low, high=high) |
| 59 | + else: |
| 60 | + high = np.array( |
| 61 | + [1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255]) |
| 62 | + low = np.array( |
| 63 | + [0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0]) |
| 64 | + self.observation_space = spaces.Box(low=low, high=high) |
| 65 | + |
| 66 | + def step(self, u): |
| 67 | + # print("Step") |
| 68 | + # convert thisAction to the actual torcs actionstr |
| 69 | + client = self.client |
| 70 | + |
| 71 | + this_action = self.agent_to_torcs(u) |
| 72 | + |
| 73 | + # Apply Action |
| 74 | + action_torcs = client.R.d |
| 75 | + |
| 76 | + # Steering |
| 77 | + action_torcs['steer'] = this_action['steer'] # in [-1, 1] |
| 78 | + |
| 79 | + # Simple Autnmatic Throttle Control by Snakeoil |
| 80 | + if self.throttle is False: |
| 81 | + target_speed = self.default_speed |
| 82 | + if client.S.d['speedX'] < target_speed - (client.R.d['steer'] * 50): |
| 83 | + client.R.d['accel'] += .01 |
| 84 | + else: |
| 85 | + client.R.d['accel'] -= .01 |
| 86 | + |
| 87 | + if client.R.d['accel'] > 0.2: |
| 88 | + client.R.d['accel'] = 0.2 |
| 89 | + |
| 90 | + if client.S.d['speedX'] < 10: |
| 91 | + client.R.d['accel'] += 1 / (client.S.d['speedX'] + .1) |
| 92 | + |
| 93 | + # Traction Control System |
| 94 | + if ((client.S.d['wheelSpinVel'][2] + client.S.d['wheelSpinVel'][ |
| 95 | + 3]) - |
| 96 | + (client.S.d['wheelSpinVel'][0] + client.S.d['wheelSpinVel'][ |
| 97 | + 1]) > 5): |
| 98 | + action_torcs['accel'] -= .2 |
| 99 | + else: |
| 100 | + action_torcs['accel'] = this_action['accel'] |
| 101 | + action_torcs['brake'] = this_action['brake'] |
| 102 | + |
| 103 | + # Automatic Gear Change by Snakeoil |
| 104 | + if self.gear_change is True: |
| 105 | + action_torcs['gear'] = this_action['gear'] |
| 106 | + else: |
| 107 | + # Automatic Gear Change by Snakeoil is possible |
| 108 | + action_torcs['gear'] = 1 |
| 109 | + if self.throttle: |
| 110 | + if client.S.d['speedX'] > 50: |
| 111 | + action_torcs['gear'] = 2 |
| 112 | + if client.S.d['speedX'] > 80: |
| 113 | + action_torcs['gear'] = 3 |
| 114 | + if client.S.d['speedX'] > 110: |
| 115 | + action_torcs['gear'] = 4 |
| 116 | + if client.S.d['speedX'] > 140: |
| 117 | + action_torcs['gear'] = 5 |
| 118 | + if client.S.d['speedX'] > 170: |
| 119 | + action_torcs['gear'] = 6 |
| 120 | + # Save the privious full-obs from torcs for the reward calculation |
| 121 | + obs_pre = copy.deepcopy(client.S.d) |
| 122 | + |
| 123 | + # One-Step Dynamics Update ################################# |
| 124 | + # Apply the Agent's action into torcs |
| 125 | + client.respond_to_server() |
| 126 | + # Get the response of TORCS |
| 127 | + client.get_servers_input() |
| 128 | + |
| 129 | + # Get the current full-observation from torcs |
| 130 | + obs = client.S.d |
| 131 | + |
| 132 | + # Make an obsevation from a raw observation vector from TORCS |
| 133 | + self.observation = self.make_observaton(obs) |
| 134 | + |
| 135 | + # Reward setting Here ####################################### |
| 136 | + # direction-dependent positive reward |
| 137 | + track = np.array(obs['track']) |
| 138 | + trackPos = np.array(obs['trackPos']) |
| 139 | + sp = np.array(obs['speedX']) |
| 140 | + damage = np.array(obs['damage']) |
| 141 | + rpm = np.array(obs['rpm']) |
| 142 | + |
| 143 | + progress = sp * np.cos(obs['angle']) - np.abs( |
| 144 | + sp * np.sin(obs['angle'])) - sp * np.abs(obs['trackPos']) |
| 145 | + reward = progress |
| 146 | + |
| 147 | + # collision detection |
| 148 | + if obs['damage'] - obs_pre['damage'] > 0: |
| 149 | + reward = -1 |
| 150 | + |
| 151 | + # Termination judgement ######################### |
| 152 | + episode_terminate = False |
| 153 | + # if (abs(track.any()) > 1 or abs(trackPos) > 1): # Episode is terminated if the car is out of track |
| 154 | + # reward = -200 |
| 155 | + # episode_terminate = True |
| 156 | + # client.R.d['meta'] = True |
| 157 | + |
| 158 | + # if self.terminal_judge_start < self.time_step: # Episode terminates if the progress of agent is small |
| 159 | + # if progress < self.termination_limit_progress: |
| 160 | + # print("No progress") |
| 161 | + # episode_terminate = True |
| 162 | + # client.R.d['meta'] = True |
| 163 | + |
| 164 | + if np.cos(obs[ |
| 165 | + 'angle']) < 0: # Episode is terminated if the agent runs backward |
| 166 | + episode_terminate = True |
| 167 | + client.R.d['meta'] = True |
| 168 | + |
| 169 | + if client.R.d['meta'] is True: # Send a reset signal |
| 170 | + self.initial_run = False |
| 171 | + client.respond_to_server() |
| 172 | + |
| 173 | + self.time_step += 1 |
| 174 | + |
| 175 | + return self.get_obs(), reward, client.R.d['meta'], {} |
| 176 | + |
| 177 | + def reset(self, relaunch=False): |
| 178 | + # print("Reset") |
| 179 | + |
| 180 | + self.time_step = 0 |
| 181 | + |
| 182 | + if self.initial_reset is not True: |
| 183 | + self.client.R.d['meta'] = True |
| 184 | + self.client.respond_to_server() |
| 185 | + |
| 186 | + ## TENTATIVE. Restarting TORCS every episode suffers the memory leak bug! |
| 187 | + if relaunch is True: |
| 188 | + self.reset_torcs() |
| 189 | + print("### TORCS is RELAUNCHED ###") |
| 190 | + |
| 191 | + # Modify here if you use multiple tracks in the environment |
| 192 | + self.client = snakeoil3.Client(p=3101, |
| 193 | + vision=self.vision) # Open new UDP in vtorcs |
| 194 | + self.client.MAX_STEPS = np.inf |
| 195 | + |
| 196 | + client = self.client |
| 197 | + client.get_servers_input() # Get the initial input from torcs |
| 198 | + |
| 199 | + obs = client.S.d # Get the current full-observation from torcs |
| 200 | + self.observation = self.make_observaton(obs) |
| 201 | + |
| 202 | + self.last_u = None |
| 203 | + |
| 204 | + self.initial_reset = False |
| 205 | + return self.get_obs() |
| 206 | + |
| 207 | + def end(self): |
| 208 | + os.system('pkill torcs') |
| 209 | + |
| 210 | + def get_obs(self): |
| 211 | + return self.observation |
| 212 | + |
| 213 | + def reset_torcs(self): |
| 214 | + # print("relaunch torcs") |
| 215 | + os.system('pkill torcs') |
| 216 | + time.sleep(0.5) |
| 217 | + if self.vision is True: |
| 218 | + os.system('torcs -nofuel -nodamage -nolaptime -vision &') |
| 219 | + else: |
| 220 | + os.system('torcs -nofuel -nolaptime &') |
| 221 | + time.sleep(0.5) |
| 222 | + os.system('sh autostart.sh') |
| 223 | + time.sleep(0.5) |
| 224 | + |
| 225 | + def agent_to_torcs(self, u): |
| 226 | + torcs_action = {'steer': u[0]} |
| 227 | + |
| 228 | + if self.throttle is True: # throttle action is enabled |
| 229 | + torcs_action.update({'accel': u[1]}) |
| 230 | + torcs_action.update({'brake': u[2]}) |
| 231 | + |
| 232 | + if self.gear_change is True: # gear change action is enabled |
| 233 | + torcs_action.update({'gear': int(u[3])}) |
| 234 | + |
| 235 | + return torcs_action |
| 236 | + |
| 237 | + def obs_vision_to_image_rgb(self, obs_image_vec): |
| 238 | + image_vec = obs_image_vec |
| 239 | + r = image_vec[0:len(image_vec):3] |
| 240 | + g = image_vec[1:len(image_vec):3] |
| 241 | + b = image_vec[2:len(image_vec):3] |
| 242 | + |
| 243 | + sz = (64, 64) |
| 244 | + r = np.array(r).reshape(sz) |
| 245 | + g = np.array(g).reshape(sz) |
| 246 | + b = np.array(b).reshape(sz) |
| 247 | + return np.array([r, g, b], dtype=np.uint8) |
| 248 | + |
| 249 | + def make_observaton(self, raw_obs): |
| 250 | + if self.vision is False: |
| 251 | + names = ['focus', |
| 252 | + 'speedX', 'speedY', 'speedZ', 'angle', 'damage', |
| 253 | + 'opponents', |
| 254 | + 'rpm', |
| 255 | + 'track', |
| 256 | + 'trackPos', |
| 257 | + 'wheelSpinVel'] |
| 258 | + Observation = col.namedtuple('Observaion', names) |
| 259 | + return Observation( |
| 260 | + focus=np.array(raw_obs['focus'], dtype=np.float32) / 200., |
| 261 | + speedX=np.array(raw_obs['speedX'], dtype=np.float32) / 300.0, |
| 262 | + speedY=np.array(raw_obs['speedY'], dtype=np.float32) / 300.0, |
| 263 | + speedZ=np.array(raw_obs['speedZ'], dtype=np.float32) / 300.0, |
| 264 | + angle=np.array(raw_obs['angle'], dtype=np.float32) / 3.1416, |
| 265 | + damage=np.array(raw_obs['damage'], dtype=np.float32), |
| 266 | + opponents=np.array(raw_obs['opponents'], |
| 267 | + dtype=np.float32) / 200., |
| 268 | + rpm=np.array(raw_obs['rpm'], dtype=np.float32) / 10000, |
| 269 | + track=np.array(raw_obs['track'], dtype=np.float32) / 200., |
| 270 | + trackPos=np.array(raw_obs['trackPos'], dtype=np.float32) / 1., |
| 271 | + wheelSpinVel=np.array(raw_obs['wheelSpinVel'], |
| 272 | + dtype=np.float32)) |
| 273 | + else: |
| 274 | + names = ['focus', |
| 275 | + 'speedX', 'speedY', 'speedZ', 'angle', |
| 276 | + 'opponents', |
| 277 | + 'rpm', |
| 278 | + 'track', |
| 279 | + 'trackPos', |
| 280 | + 'wheelSpinVel', |
| 281 | + 'img'] |
| 282 | + Observation = col.namedtuple('Observaion', names) |
| 283 | + |
| 284 | + # Get RGB from observation |
| 285 | + image_rgb = self.obs_vision_to_image_rgb(raw_obs[names[8]]) |
| 286 | + |
| 287 | + return Observation( |
| 288 | + focus=np.array(raw_obs['focus'], dtype=np.float32) / 200., |
| 289 | + speedX=np.array(raw_obs['speedX'], |
| 290 | + dtype=np.float32) / self.default_speed, |
| 291 | + speedY=np.array(raw_obs['speedY'], |
| 292 | + dtype=np.float32) / self.default_speed, |
| 293 | + speedZ=np.array(raw_obs['speedZ'], |
| 294 | + dtype=np.float32) / self.default_speed, |
| 295 | + opponents=np.array(raw_obs['opponents'], |
| 296 | + dtype=np.float32) / 200., |
| 297 | + rpm=np.array(raw_obs['rpm'], dtype=np.float32), |
| 298 | + track=np.array(raw_obs['track'], dtype=np.float32) / 200., |
| 299 | + trackPos=np.array(raw_obs['trackPos'], dtype=np.float32) / 1., |
| 300 | + wheelSpinVel=np.array(raw_obs['wheelSpinVel'], |
| 301 | + dtype=np.float32), |
| 302 | + img=image_rgb) |
0 commit comments