Skip to content

Commit bd53a22

Browse files
committedOct 2, 2017
ddpg on torcs
1 parent d252ede commit bd53a22

11 files changed

+1237
-0
lines changed
 

‎__init__.py

Whitespace-only changes.

‎autostart.sh

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/bin/bash
2+
xte 'key Return'
3+
xte 'usleep 100000'
4+
xte 'key Return'
5+
xte 'usleep 100000'
6+
xte 'key Up'
7+
xte 'usleep 100000'
8+
xte 'key Up'
9+
xte 'usleep 100000'
10+
xte 'key Return'
11+
xte 'usleep 100000'
12+
xte 'key Return'

‎ddpg_actor.h5

769 KB
Binary file not shown.

‎ddpg_critic.h5

2.13 MB
Binary file not shown.

‎example_1.py

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from gym_torcs import TorcsEnv
2+
3+
env = TorcsEnv(vision=False, throttle=True, gear_change=False)
4+
5+
print('testing environment')
6+
7+
for i in range(10):
8+
print("episode: ", i)
9+
10+
if i % 3 == 0:
11+
env.reset(relaunch=True)
12+
else:
13+
env.reset()
14+
15+
for i in range(100):
16+
# action = np.random.random(3)
17+
# print(action)
18+
action = [0.2, 1, 0]
19+
observe, reward, done, info = env.step(action)
20+
21+
# print(observe.rpm)
22+
# print(observe.wheelSpinVel)
23+
# print(observe.track, done)
24+
# -1 ~ 1
25+
# print('the distance between car and track: ', observe.trackPos)
26+
# -1 : -180 , +1 : +180
27+
# print('the angle between car and track: ', observe.angle)
28+
# print("x speed of car: ", observe.speedX, " y speed of car: ",
29+
# observe.speedY, " z speed of car: ", observe.speedZ)

‎example_2.py

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from gym_torcs import TorcsEnv
2+
from collections import deque
3+
import numpy as np
4+
from keras.layers import Dense, Input, merge
5+
from keras.initializations import normal
6+
from keras.models import Model
7+
8+
9+
class DDPGAgent:
10+
def __init__(self):
11+
self.actor = self.build_actor()
12+
self.memory = deque(maxlen=100000)
13+
14+
def build_actor(self):
15+
print("build actor network")
16+
input = Input(shape=[29])
17+
h1 = Dense(300, activation='relu')(input)
18+
h2 = Dense(600, activation='relu')(h1)
19+
steer = Dense(1, activation='tanh', init=lambda shape,
20+
name: normal(shape, scale=1e-4, name=name))(h2)
21+
accel = Dense(1, activation='sigmoid', init=lambda shape,
22+
name: normal(shape, scale=1e-4, name=name))(h2)
23+
brake = Dense(1, activation='sigmoid', init=lambda shape,
24+
name: normal(shape, scale=1e-4, name=name))(h2)
25+
action = merge([steer, accel, brake], mode='concat')
26+
actor = Model(input=input, output=action)
27+
return actor
28+
29+
def get_action(self, state):
30+
action = self.actor.predict(state)[0]
31+
return action
32+
33+
34+
agent = DDPGAgent()
35+
env = TorcsEnv(vision=False, throttle=True, gear_change=False)
36+
37+
print('testing sample agent on torcs')
38+
39+
for i in range(10):
40+
if i % 3 == 0:
41+
observe = env.reset(relaunch=True)
42+
else:
43+
observe = env.reset()
44+
45+
state = np.hstack((observe.angle, observe.track, observe.trackPos,
46+
observe.speedX, observe.speedY, observe.speedZ,
47+
observe.wheelSpinVel / 100.0, observe.rpm))
48+
state = np.reshape(state, [1, np.shape(state)[0]])
49+
done = False
50+
51+
while not done:
52+
action = agent.get_action(state)
53+
observe, reward, done, info = env.step(action)
54+
next_state = np.hstack((observe.angle, observe.track, observe.trackPos,
55+
observe.speedX, observe.speedY, observe.speedZ,
56+
observe.wheelSpinVel / 100.0, observe.rpm))
57+
next_state = np.reshape(next_state, [1, np.shape(next_state)[0]])
58+
59+
state = next_state

‎example_ddpg.py

+197
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
from gym_torcs import TorcsEnv
2+
from collections import deque
3+
import numpy as np
4+
from keras.layers import Dense, Input, Add, Concatenate
5+
from keras.optimizers import Adam
6+
from keras.models import Model
7+
from keras import backend as K
8+
import tensorflow as tf
9+
import random
10+
11+
12+
def ou_noise(x, mu, theta, sigma):
13+
return theta * (mu - x) + sigma * np.random.randn(1)
14+
15+
16+
def normal(shape, scale=0.05, name=None):
17+
return K.variable(np.random.normal(loc=0.0, scale=scale, size=shape),
18+
name=name)
19+
20+
21+
class DDPGAgent:
22+
def __init__(self):
23+
self.action_size = 3
24+
self.state_size = 29
25+
26+
self.actor, self.actor_weight = self.build_actor()
27+
self.actor_target, self.actor_target_weight = self.build_actor()
28+
self.critic, self.critic_state, self.critic_action = self.build_critic()
29+
self.critic_target, _, _ = self.build_critic()
30+
31+
# actor optimizer
32+
self.action_grads = K.placeholder(shape=[None, self.action_size])
33+
params_grad = tf.gradients(self.actor.output, self.actor_weight,
34+
-self.action_grads)
35+
grads = zip(params_grad, self.actor_weight)
36+
self.optimize = tf.train.AdamOptimizer(0.0001).apply_gradients(grads)
37+
38+
self.memory = deque(maxlen=100000)
39+
self.batch_size = 32
40+
self.discount_factor = 0.99
41+
self.tau = 0.001
42+
self.epsilon = 1
43+
self.epsilon_decay = 1/100000
44+
45+
self.sess = tf.Session()
46+
K.set_session(self.sess)
47+
self.sess.run(tf.global_variables_initializer())
48+
49+
def build_actor(self):
50+
print("building actor network")
51+
input = Input(shape=[self.state_size])
52+
h1 = Dense(300, activation='relu')(input)
53+
h2 = Dense(600, activation='relu')(h1)
54+
steer = Dense(1, activation='tanh')(h2)
55+
accel = Dense(1, activation='sigmoid')(h2)
56+
brake = Dense(1, activation='sigmoid')(h2)
57+
action = Concatenate()([steer, accel, brake])
58+
actor = Model(inputs=input, outputs=action)
59+
return actor, actor.trainable_weights
60+
61+
'''
62+
def actor_optimizer(self):
63+
self.action_grads = K.placeholder(shape=[None, self.action_size])
64+
# loss = -self.actor.output * action_grads
65+
params_grad = tf.gradients(self.actor.output, self.actor_weight,
66+
-self.action_grads)
67+
grads = zip(params_grad, self.actor_weight)
68+
self.optimize = tf.train.AdamOptimizer(0.0001).apply_gradients(grads)
69+
# optimizer = Adam(lr=0.0001)
70+
# updates = optimizer.get_updates(self.actor_weight, [], loss)
71+
# train = K.function([self.actor.input, action_grads], [], updates=updates)
72+
# return train
73+
'''
74+
75+
def update_actor(self, states, gradient):
76+
self.sess.run(self.optimize, feed_dict={
77+
self.actor.input: states,
78+
self.action_grads: gradient
79+
})
80+
81+
def build_critic(self):
82+
print("building critic network")
83+
state = Input(shape=[29])
84+
action = Input(shape=[3], name='action_input')
85+
w1 = Dense(300, activation='relu')(state)
86+
h1 = Dense(600, activation='linear')(w1)
87+
a1 = Dense(600, activation='linear')(action)
88+
h2 = Add()([h1, a1])
89+
h3 = Dense(600, activation='relu')(h2)
90+
V = Dense(1, activation='linear')(h3)
91+
model = Model(inputs=[state, action], outputs=V)
92+
model.compile(loss='mse', optimizer=Adam(lr=0.001))
93+
# model.summary()
94+
return model, state, action
95+
96+
def get_action(self, state):
97+
self.epsilon -= self.epsilon_decay
98+
noise = np.zeros([self.action_size])
99+
action = self.actor.predict(state)[0]
100+
noise[0] = max(self.epsilon, 0) * ou_noise(action[0], 0.0, 0.60, 0.30)
101+
noise[1] = max(self.epsilon, 0) * ou_noise(action[1], 0.5, 1.00, 0.10)
102+
noise[2] = max(self.epsilon, 0) * ou_noise(action[2], -0.1, 1.00, 0.05)
103+
real = action + noise
104+
return real
105+
106+
def save_sample(self, state, action, reward, next_state, done):
107+
self.memory.append((state, action, reward, next_state, done))
108+
109+
def train_model(self):
110+
mini_batch = random.sample(self.memory, self.batch_size)
111+
112+
states = np.asarray([e[0] for e in mini_batch])
113+
actions = np.asarray([e[1] for e in mini_batch])
114+
rewards = np.asarray([e[2] for e in mini_batch])
115+
next_states = np.asarray([e[3] for e in mini_batch])
116+
dones = np.asarray([e[4] for e in mini_batch])
117+
118+
target_q_values = self.critic_target.predict(
119+
[next_states, self.actor_target.predict(next_states)])
120+
121+
targets = np.zeros([self.batch_size, 1])
122+
for i in range(self.batch_size):
123+
if dones[i]:
124+
targets[i] = rewards[i]
125+
else:
126+
targets[i] = rewards[i] + self.discount_factor * target_q_values[i]
127+
128+
loss = 0
129+
loss += self.critic.train_on_batch([states, actions], targets)
130+
131+
a_for_grad = self.actor.predict(states)
132+
133+
action_grads = tf.gradients(self.critic.output, self.critic_action)
134+
grads = self.sess.run(action_grads, feed_dict={
135+
self.critic_state: states, self.critic_action: a_for_grad})[0]
136+
self.update_actor(states, grads)
137+
138+
actor_weights = self.actor.get_weights()
139+
actor_target_weights = self.actor_target.get_weights()
140+
for i in range(len(actor_weights)):
141+
actor_target_weights[i] = self.tau * actor_weights[i] + \
142+
(1 - self.tau) * \
143+
actor_target_weights[i]
144+
self.actor_target.set_weights(actor_target_weights)
145+
146+
critic_weights = self.critic.get_weights()
147+
critic_target_weights = self.critic_target.get_weights()
148+
for i in range(len(critic_weights)):
149+
critic_target_weights[i] = self.tau * critic_weights[i] + \
150+
(1 - self.tau) * \
151+
critic_target_weights[i]
152+
self.critic_target.set_weights(critic_target_weights)
153+
154+
155+
agent = DDPGAgent()
156+
env = TorcsEnv(vision=False, throttle=True, gear_change=False)
157+
158+
print('testing sample agent on torcs')
159+
global_step = 0
160+
161+
for e in range(2000):
162+
step = 0
163+
score = 0
164+
if e % 10 == 0:
165+
observe = env.reset(relaunch=True)
166+
print("Now we save model")
167+
agent.actor.save_weights("ddpg_actor.h5", overwrite=True)
168+
agent.critic.save_weights("ddpg_critic.h5", overwrite=True)
169+
else:
170+
observe = env.reset()
171+
172+
state = np.hstack((observe.angle, observe.track, observe.trackPos,
173+
observe.speedX, observe.speedY, observe.speedZ,
174+
observe.wheelSpinVel / 100.0, observe.rpm))
175+
done = False
176+
177+
while not done:
178+
step += 1
179+
global_step += 1
180+
action = agent.get_action(state.reshape(1, state.shape[0]))
181+
observe, reward, done, info = env.step(action)
182+
score += reward
183+
next_state = np.hstack((observe.angle, observe.track, observe.trackPos,
184+
observe.speedX, observe.speedY, observe.speedZ,
185+
observe.wheelSpinVel / 100.0, observe.rpm))
186+
187+
agent.save_sample(state, action, reward, next_state, done)
188+
189+
if global_step > 1000:
190+
agent.train_model()
191+
192+
# print(' step: ', step, ' action: ', action, ' reward: ', reward)
193+
state = next_state
194+
195+
if done:
196+
print('episode: ', e, ' score: ', score, ' step: ', global_step,
197+
' epsilon: ', agent.epsilon)

‎gym_torcs.py

+302
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,302 @@
1+
import gym
2+
from gym import spaces
3+
import numpy as np
4+
# from os import path
5+
import snakeoil3_gym as snakeoil3
6+
import numpy as np
7+
import copy
8+
import collections as col
9+
import os
10+
import time
11+
12+
13+
class TorcsEnv:
14+
terminal_judge_start = 100 # If after 100 timestep still no progress, terminated
15+
termination_limit_progress = 5 # [km/h], episode terminates if car is running slower than this limit
16+
default_speed = 50
17+
18+
initial_reset = True
19+
20+
def __init__(self, vision=False, throttle=False, gear_change=False):
21+
self.vision = vision
22+
self.throttle = throttle
23+
self.gear_change = gear_change
24+
25+
self.initial_run = True
26+
27+
##print("launch torcs")
28+
os.system('pkill torcs')
29+
time.sleep(0.5)
30+
if self.vision is True:
31+
os.system('torcs -nofuel -nodamage -nolaptime -vision &')
32+
else:
33+
os.system('torcs -nofuel -nolaptime &')
34+
time.sleep(0.5)
35+
os.system('sh autostart.sh')
36+
time.sleep(0.5)
37+
38+
"""
39+
# Modify here if you use multiple tracks in the environment
40+
self.client = snakeoil3.Client(p=3101, vision=self.vision) # Open new UDP in vtorcs
41+
self.client.MAX_STEPS = np.inf
42+
43+
client = self.client
44+
client.get_servers_input() # Get the initial input from torcs
45+
46+
obs = client.S.d # Get the current full-observation from torcs
47+
"""
48+
if throttle is False:
49+
self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1,))
50+
else:
51+
self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2,))
52+
53+
if vision is False:
54+
high = np.array(
55+
[1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf])
56+
low = np.array(
57+
[0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf])
58+
self.observation_space = spaces.Box(low=low, high=high)
59+
else:
60+
high = np.array(
61+
[1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255])
62+
low = np.array(
63+
[0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0])
64+
self.observation_space = spaces.Box(low=low, high=high)
65+
66+
def step(self, u):
67+
# print("Step")
68+
# convert thisAction to the actual torcs actionstr
69+
client = self.client
70+
71+
this_action = self.agent_to_torcs(u)
72+
73+
# Apply Action
74+
action_torcs = client.R.d
75+
76+
# Steering
77+
action_torcs['steer'] = this_action['steer'] # in [-1, 1]
78+
79+
# Simple Autnmatic Throttle Control by Snakeoil
80+
if self.throttle is False:
81+
target_speed = self.default_speed
82+
if client.S.d['speedX'] < target_speed - (client.R.d['steer'] * 50):
83+
client.R.d['accel'] += .01
84+
else:
85+
client.R.d['accel'] -= .01
86+
87+
if client.R.d['accel'] > 0.2:
88+
client.R.d['accel'] = 0.2
89+
90+
if client.S.d['speedX'] < 10:
91+
client.R.d['accel'] += 1 / (client.S.d['speedX'] + .1)
92+
93+
# Traction Control System
94+
if ((client.S.d['wheelSpinVel'][2] + client.S.d['wheelSpinVel'][
95+
3]) -
96+
(client.S.d['wheelSpinVel'][0] + client.S.d['wheelSpinVel'][
97+
1]) > 5):
98+
action_torcs['accel'] -= .2
99+
else:
100+
action_torcs['accel'] = this_action['accel']
101+
action_torcs['brake'] = this_action['brake']
102+
103+
# Automatic Gear Change by Snakeoil
104+
if self.gear_change is True:
105+
action_torcs['gear'] = this_action['gear']
106+
else:
107+
# Automatic Gear Change by Snakeoil is possible
108+
action_torcs['gear'] = 1
109+
if self.throttle:
110+
if client.S.d['speedX'] > 50:
111+
action_torcs['gear'] = 2
112+
if client.S.d['speedX'] > 80:
113+
action_torcs['gear'] = 3
114+
if client.S.d['speedX'] > 110:
115+
action_torcs['gear'] = 4
116+
if client.S.d['speedX'] > 140:
117+
action_torcs['gear'] = 5
118+
if client.S.d['speedX'] > 170:
119+
action_torcs['gear'] = 6
120+
# Save the privious full-obs from torcs for the reward calculation
121+
obs_pre = copy.deepcopy(client.S.d)
122+
123+
# One-Step Dynamics Update #################################
124+
# Apply the Agent's action into torcs
125+
client.respond_to_server()
126+
# Get the response of TORCS
127+
client.get_servers_input()
128+
129+
# Get the current full-observation from torcs
130+
obs = client.S.d
131+
132+
# Make an obsevation from a raw observation vector from TORCS
133+
self.observation = self.make_observaton(obs)
134+
135+
# Reward setting Here #######################################
136+
# direction-dependent positive reward
137+
track = np.array(obs['track'])
138+
trackPos = np.array(obs['trackPos'])
139+
sp = np.array(obs['speedX'])
140+
damage = np.array(obs['damage'])
141+
rpm = np.array(obs['rpm'])
142+
143+
progress = sp * np.cos(obs['angle']) - np.abs(
144+
sp * np.sin(obs['angle'])) - sp * np.abs(obs['trackPos'])
145+
reward = progress
146+
147+
# collision detection
148+
if obs['damage'] - obs_pre['damage'] > 0:
149+
reward = -1
150+
151+
# Termination judgement #########################
152+
episode_terminate = False
153+
# if (abs(track.any()) > 1 or abs(trackPos) > 1): # Episode is terminated if the car is out of track
154+
# reward = -200
155+
# episode_terminate = True
156+
# client.R.d['meta'] = True
157+
158+
# if self.terminal_judge_start < self.time_step: # Episode terminates if the progress of agent is small
159+
# if progress < self.termination_limit_progress:
160+
# print("No progress")
161+
# episode_terminate = True
162+
# client.R.d['meta'] = True
163+
164+
if np.cos(obs[
165+
'angle']) < 0: # Episode is terminated if the agent runs backward
166+
episode_terminate = True
167+
client.R.d['meta'] = True
168+
169+
if client.R.d['meta'] is True: # Send a reset signal
170+
self.initial_run = False
171+
client.respond_to_server()
172+
173+
self.time_step += 1
174+
175+
return self.get_obs(), reward, client.R.d['meta'], {}
176+
177+
def reset(self, relaunch=False):
178+
# print("Reset")
179+
180+
self.time_step = 0
181+
182+
if self.initial_reset is not True:
183+
self.client.R.d['meta'] = True
184+
self.client.respond_to_server()
185+
186+
## TENTATIVE. Restarting TORCS every episode suffers the memory leak bug!
187+
if relaunch is True:
188+
self.reset_torcs()
189+
print("### TORCS is RELAUNCHED ###")
190+
191+
# Modify here if you use multiple tracks in the environment
192+
self.client = snakeoil3.Client(p=3101,
193+
vision=self.vision) # Open new UDP in vtorcs
194+
self.client.MAX_STEPS = np.inf
195+
196+
client = self.client
197+
client.get_servers_input() # Get the initial input from torcs
198+
199+
obs = client.S.d # Get the current full-observation from torcs
200+
self.observation = self.make_observaton(obs)
201+
202+
self.last_u = None
203+
204+
self.initial_reset = False
205+
return self.get_obs()
206+
207+
def end(self):
208+
os.system('pkill torcs')
209+
210+
def get_obs(self):
211+
return self.observation
212+
213+
def reset_torcs(self):
214+
# print("relaunch torcs")
215+
os.system('pkill torcs')
216+
time.sleep(0.5)
217+
if self.vision is True:
218+
os.system('torcs -nofuel -nodamage -nolaptime -vision &')
219+
else:
220+
os.system('torcs -nofuel -nolaptime &')
221+
time.sleep(0.5)
222+
os.system('sh autostart.sh')
223+
time.sleep(0.5)
224+
225+
def agent_to_torcs(self, u):
226+
torcs_action = {'steer': u[0]}
227+
228+
if self.throttle is True: # throttle action is enabled
229+
torcs_action.update({'accel': u[1]})
230+
torcs_action.update({'brake': u[2]})
231+
232+
if self.gear_change is True: # gear change action is enabled
233+
torcs_action.update({'gear': int(u[3])})
234+
235+
return torcs_action
236+
237+
def obs_vision_to_image_rgb(self, obs_image_vec):
238+
image_vec = obs_image_vec
239+
r = image_vec[0:len(image_vec):3]
240+
g = image_vec[1:len(image_vec):3]
241+
b = image_vec[2:len(image_vec):3]
242+
243+
sz = (64, 64)
244+
r = np.array(r).reshape(sz)
245+
g = np.array(g).reshape(sz)
246+
b = np.array(b).reshape(sz)
247+
return np.array([r, g, b], dtype=np.uint8)
248+
249+
def make_observaton(self, raw_obs):
250+
if self.vision is False:
251+
names = ['focus',
252+
'speedX', 'speedY', 'speedZ', 'angle', 'damage',
253+
'opponents',
254+
'rpm',
255+
'track',
256+
'trackPos',
257+
'wheelSpinVel']
258+
Observation = col.namedtuple('Observaion', names)
259+
return Observation(
260+
focus=np.array(raw_obs['focus'], dtype=np.float32) / 200.,
261+
speedX=np.array(raw_obs['speedX'], dtype=np.float32) / 300.0,
262+
speedY=np.array(raw_obs['speedY'], dtype=np.float32) / 300.0,
263+
speedZ=np.array(raw_obs['speedZ'], dtype=np.float32) / 300.0,
264+
angle=np.array(raw_obs['angle'], dtype=np.float32) / 3.1416,
265+
damage=np.array(raw_obs['damage'], dtype=np.float32),
266+
opponents=np.array(raw_obs['opponents'],
267+
dtype=np.float32) / 200.,
268+
rpm=np.array(raw_obs['rpm'], dtype=np.float32) / 10000,
269+
track=np.array(raw_obs['track'], dtype=np.float32) / 200.,
270+
trackPos=np.array(raw_obs['trackPos'], dtype=np.float32) / 1.,
271+
wheelSpinVel=np.array(raw_obs['wheelSpinVel'],
272+
dtype=np.float32))
273+
else:
274+
names = ['focus',
275+
'speedX', 'speedY', 'speedZ', 'angle',
276+
'opponents',
277+
'rpm',
278+
'track',
279+
'trackPos',
280+
'wheelSpinVel',
281+
'img']
282+
Observation = col.namedtuple('Observaion', names)
283+
284+
# Get RGB from observation
285+
image_rgb = self.obs_vision_to_image_rgb(raw_obs[names[8]])
286+
287+
return Observation(
288+
focus=np.array(raw_obs['focus'], dtype=np.float32) / 200.,
289+
speedX=np.array(raw_obs['speedX'],
290+
dtype=np.float32) / self.default_speed,
291+
speedY=np.array(raw_obs['speedY'],
292+
dtype=np.float32) / self.default_speed,
293+
speedZ=np.array(raw_obs['speedZ'],
294+
dtype=np.float32) / self.default_speed,
295+
opponents=np.array(raw_obs['opponents'],
296+
dtype=np.float32) / 200.,
297+
rpm=np.array(raw_obs['rpm'], dtype=np.float32),
298+
track=np.array(raw_obs['track'], dtype=np.float32) / 200.,
299+
trackPos=np.array(raw_obs['trackPos'], dtype=np.float32) / 1.,
300+
wheelSpinVel=np.array(raw_obs['wheelSpinVel'],
301+
dtype=np.float32),
302+
img=image_rgb)

‎gym_torcs.pyc

7.38 KB
Binary file not shown.

‎snakeoil3_gym.py

+638
Large diffs are not rendered by default.

‎snakeoil3_gym.pyc

17.9 KB
Binary file not shown.

0 commit comments

Comments
 (0)
Please sign in to comment.