1
+ using ContinuousTimePolicyGradients
2
+ using DiffEqFlux, ComponentArrays, LinearAlgebra, JLD2, OrdinaryDiffEq
3
+ using Plots
4
+
5
+ function main (maxiters_1:: Int , maxiters_2:: Int , Δt_save:: Float32 ; p_NN_0 = nothing , k_a_val = 10.0 )
6
+ # model + problem parameters
7
+ (aₐ, aₙ, bₙ, cₙ, dₙ, aₘ, bₘ, cₘ, dₘ) = Float32 .([- 0.3 , 19.373 , - 31.023 , - 9.717 , - 1.948 , 40.44 , - 64.015 , 2.922 , - 11.803 ])
8
+ (m, I_yy, S, d, ωₐ, ζₐ) = Float32 .([204.02 , 247.439 , 0.0409 , 0.2286 , 150.0 , 0.7 ])
9
+ (g, ρ₀, H, γₐ, Rₐ, T₀, λ) = Float32 .([9.8 , 1.225 , 8435.0 , 1.4 , 286.0 , 288.15 , 0.0065 ])
10
+ (a_max, α_max, δ_max, δ̇_max, q_max, M_max, h_max) = Float32 .([100.0 , deg2rad (30 ), deg2rad (25 ), 1.5 , deg2rad (60 ), 4 , 11E3 ])
11
+
12
+ (k_a, k_q, k_δ, k_δ̇, k_R) = Float32 .([k_a_val, 0.0 , 0.01 , 0.1 , 1E-4 ])
13
+
14
+ # dynamic model
15
+ dim_x = 7
16
+ function dynamics_plant (t, x, u)
17
+ (h, V, α, q, θ, δ, δ̇) = x
18
+ δ_c = u[1 ]
19
+
20
+ ρ = ρ₀ * exp (- h / H)
21
+ Vₛ = sqrt (γₐ * Rₐ * (T₀ - λ * h))
22
+ M = V / Vₛ
23
+ γ = θ - α
24
+ Q = 0.5f0 * ρ * V^ 2
25
+
26
+ C_A = aₐ
27
+ C_N = aₙ * α^ 3 + bₙ * α * abs (α) + cₙ * (2.0f0 - M / 3.0f0 ) * α + dₙ * δ
28
+ C_M = aₘ * α^ 3 + bₘ * α * abs (α) + cₘ * (- 7.0f0 + 8.0f0 * M / 3.0f0 ) * α + dₘ * δ
29
+
30
+ α̇ = Q * S / m / V * (C_N * cos (α) - C_A * sin (α)) + g / V * cos (γ) + q
31
+
32
+ dx = [V * sin (γ);
33
+ Q * S / m * (C_N * sin (α) + C_A * cos (α)) - g * sin (γ);
34
+ α̇;
35
+ Q * S * d / I_yy * C_M;
36
+ q;
37
+ δ̇;
38
+ - ωₐ^ 2 * (δ - δ_c) - 2.0f0 * ζₐ * ωₐ * δ̇]
39
+ return dx
40
+ end
41
+
42
+ dim_x_c = 2
43
+ function dynamics_controller (t, x_c, y, r, p_NN, policy_NN)
44
+ (a_z, h, V, M, α, q, γ) = y
45
+ a_z_cmd = r[1 ]
46
+ x_int = x_c[1 ]
47
+ x_ref = x_c[2 ]
48
+
49
+ y_NN = (K_A, K_I, K_R) = policy_NN ([ abs (α) / α_max; M / M_max; h / h_max], p_NN)
50
+
51
+ # dx_ref = [-36.6667f0 -13.8889f0; 8.0f0 0.0f0] * x_ref + [4.0f0; 0.0f0] * a_z_cmd
52
+ # a_z_ref = [-1.0083f0 3.4722f0] * x_ref
53
+ dx_ref = (a_z_cmd - x_ref) / 0.2f0
54
+ a_z_ref = x_ref
55
+
56
+ dx_c = [K_A * (a_z_cmd - a_z) + q + (a_z_cmd + g * cos (γ)) / V;
57
+ dx_ref]
58
+ u = [K_I * x_int + K_R * q;
59
+ a_z_ref]
60
+ return dx_c, u, y_NN
61
+ end
62
+
63
+ function dynamics_sensor (t, x)
64
+ (h, V, α, q, θ, δ, _) = x
65
+
66
+ ρ = ρ₀ * exp (- h / H)
67
+ Vₛ = sqrt (γₐ * Rₐ * (T₀ - λ * h))
68
+ M = V / Vₛ
69
+ Q = 0.5f0 * ρ * V^ 2
70
+ γ = θ - α
71
+
72
+ C_A = aₐ
73
+ C_N = aₙ * α^ 3 + bₙ * α * abs (α) + cₙ * (2.0f0 - M / 3.0f0 ) * α + dₙ * δ
74
+
75
+ a_z = Q * S / m * (C_N * cos (α) - C_A * sin (α))
76
+ y = [a_z;
77
+ h;
78
+ V;
79
+ M;
80
+ α;
81
+ q;
82
+ γ]
83
+ return y
84
+ end
85
+
86
+ # cost definition
87
+ function cost_running (t, x, y, u, r)
88
+ q = x[4 ]
89
+ δ̇ = x[7 ]
90
+ a_z = y[1 ]
91
+ δ_c = u[1 ]
92
+ a_z_ref = u[2 ]
93
+ a_z_cmd = r[1 ]
94
+ return k_a * ((a_z - a_z_ref) / (1.0f0 + abs (a_z_cmd)))^ 2 + k_δ̇ * (δ̇ / δ̇_max)^ 2 + k_δ * (δ_c / δ_max)^ 2 + k_q * (q / q_max)^ 2
95
+ end
96
+
97
+ function cost_terminal (x_f, r)
98
+ # a_z_cmd = r[1]
99
+ # y = dynamics_sensor(3.0f0, x_f)
100
+ # a_z = y[1]
101
+ # return ((a_z - a_z_cmd) / (1.0f0 + a_z_cmd))^2
102
+ return 0.0f0
103
+ end
104
+
105
+ function cost_regularisor (p_NN)
106
+ return k_R * norm (p_NN)^ 2
107
+ # return 0.0f0
108
+ end
109
+
110
+ # NN construction
111
+ dim_NN_hidden = 10
112
+ dim_NN_input = 3
113
+ dim_K = 3
114
+ K_lb = Float32 .(0.001 * ones (3 ))
115
+ K_ub = Float32[4 , 0.2 , 2 ]
116
+ policy_NN = FastChain (
117
+ FastDense (dim_NN_input, dim_NN_hidden, tanh),
118
+ FastDense (dim_NN_hidden, dim_NN_hidden, tanh),
119
+ FastDense (dim_NN_hidden, dim_K),
120
+ (x, p) -> (K_ub - K_lb) .* σ .(x) .+ K_lb
121
+ )
122
+
123
+ # scenario definition
124
+ ensemble = [ (; x₀ = Float32[h₀; V₀; zeros (5 )], r = Float32[a_z_cmd])
125
+ for h₀ = 5E3 : 1E3 : 8E3
126
+ for V₀ = 7E2 : 1E2 : 9E2
127
+ for a_z_cmd = - 1E2 : 2.5E1 : 1E2 ]
128
+ t_span = Float32 .((0.0 , 3.0 ))
129
+ t_save = t_span[1 ]: Δt_save: t_span[2 ]
130
+
131
+ scenario = (; ensemble = ensemble, t_span = t_span, t_save = t_save, dim_x = dim_x, dim_x_c = dim_x_c)
132
+
133
+ # NN training
134
+ (result, fwd_ensemble_sol, loss_history) = CTPG_train (dynamics_plant, dynamics_controller, dynamics_sensor, cost_running, cost_terminal, cost_regularisor, policy_NN, scenario; sense_alg = InterpolatingAdjoint (autojacvec = ReverseDiffVJP (true )), ensemble_alg = EnsembleThreads (), maxiters_1 = maxiters_1, maxiters_2 = maxiters_2, opt_2 = BFGS (initial_stepnorm = 0.0001 ), i_nominal = 1 , p_NN_0 = p_NN_0, progress_plot = false )
135
+
136
+ return result, policy_NN, fwd_ensemble_sol, loss_history
137
+ end
138
+
139
+ # # execute optimisation and simulation
140
+ @time (result, policy_NN, fwd_ensemble_sol, loss_history) = main (1000 , 1000 , 0.01f0 ; k_a_val = 100.0 )
141
+
142
+ # re-execute optimisation and simulation
143
+ # p_NN_prev = result.u
144
+ # @time (result, policy_NN, fwd_ensemble_sol, loss_history) = main(1, 1000, 0.01f0; k_a_val = 100.0, p_NN_0 = p_NN_prev)
145
+
146
+ # # save results
147
+ jldsave (" DS_autopilot.jld2" ; result, fwd_ensemble_sol, loss_history)
148
+ # p_NN_base = result.u
149
+ # jldsave("p_NN_loss_base.jld2"; p_NN_base, loss_history)
150
+
151
+ # plot simulation results
152
+ # (result, fwd_ensemble_sol, loss_history) = load(".jld2", "result", "fwd_ensemble_sol", "loss_history")
153
+
154
+ x_names = [" \$ h\$ " " \$ V\$ " " \$\\ alpha\$ " " \$ q\$ " " \$\\ theta\$ " " \$\\ delta\$ " " \$\\ dot{\\ delta}\$ " ]
155
+ vars_x = 1 : 6 # [1,2,3, (1,2), (2,3)]
156
+ u_names = [" \$\\ delta_{c}\$ " ]
157
+ vars_u = 1
158
+ y_names = [" \$ a_{z}\$ " ]
159
+ vars_y = 1
160
+ y_NN_names = [" \$ K_A\$ " " \$ K_I\$ " " \$ K_R\$ " ]
161
+ vars_y_NN = 1 : 3
162
+
163
+ (f_x, f_u, f_y, f_y_NN, f_L) = view_result ([], fwd_ensemble_sol, loss_history; x_names = x_names, vars_x = vars_x, u_names = u_names, vars_u = vars_u, y_names = y_names, vars_y = vars_y, y_NN_names = y_NN_names, vars_y_NN = vars_y_NN, linealpha = 0.6 )
164
+
165
+ # plot gain surfaces
166
+ # (p_NN_base) = load("p_NN_loss_base.jld2", "p_NN_base")
167
+ # (a_max, α_max, δ_max, δ̇_max, q_max, M_max, h_max) = Float32.([100.0, deg2rad(30), deg2rad(25), 1.5, deg2rad(60), 4, 11E3])
168
+
169
+ # dim_NN_hidden = 10
170
+ # dim_NN_input = 3
171
+ # dim_K = 3
172
+ # K_lb = Float32.(0.001*ones(3))
173
+ # K_ub = Float32[4, 0.2, 2]
174
+ # policy_NN = FastChain(
175
+ # FastDense(dim_NN_input, dim_NN_hidden, tanh),
176
+ # FastDense(dim_NN_hidden, dim_NN_hidden, tanh),
177
+ # FastDense(dim_NN_hidden, dim_K),
178
+ # (x, p) -> (K_ub - K_lb) .* σ.(x) .+ K_lb
179
+ # )
180
+
181
+ # h = 5000.0
182
+ # α_list = 0:1E-3:deg2rad(45)
183
+ # M_list = 0.5:0.1:3.0
184
+
185
+ # func_K_A(α, M) = policy_NN([abs(α) / α_max; M / M_max; h / h_max], p_NN_base)[1]
186
+ # func_K_I(α, M) = policy_NN([abs(α) / α_max; M / M_max; h / h_max], p_NN_base)[2]
187
+ # func_K_R(α, M) = policy_NN([abs(α) / α_max; M / M_max; h / h_max], p_NN_base)[3]
188
+
189
+ # f_K_A = plot(α_list, M_list, func_K_A, st=:surface, label = :false, zlabel = "\$K_{A}\$", xlabel = "\$\\left|\\alpha\\right|\$", ylabel = "\$M\$")
190
+ # display(f_K_A)
191
+ # savefig(f_K_A, "f_K_A.pdf")
192
+
193
+ # f_K_I = plot(α_list, M_list, func_K_I, st=:surface, label = :false, zlabel = "\$K_{I}\$", xlabel = "\$\\left|\\alpha\\right|\$", ylabel = "\$M\$")
194
+ # display(f_K_I)
195
+ # savefig(f_K_I, "f_K_I.pdf")
196
+
197
+ # f_K_R = plot(α_list, M_list, func_K_R, st=:surface, label = :false, zlabel = "\$K_{R}\$", xlabel = "\$\\left|\\alpha\\right|\$", ylabel = "\$M\$")
198
+ # display(f_K_R)
199
+ # savefig(f_K_R, "f_K_R.pdf")
0 commit comments