-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathproject1.jl
3741 lines (3114 loc) · 130 KB
/
project1.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
### A Pluto.jl notebook ###
# v0.20.4
using Markdown
using InteractiveUtils
# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error).
macro bind(def, element)
#! format: off
quote
local iv = try Base.loaded_modules[Base.PkgId(Base.UUID("6e696c72-6542-2067-7265-42206c756150"), "AbstractPlutoDingetjes")].Bonds.initial_value catch; b -> missing; end
local el = $(esc(element))
global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : iv(el)
el
end
#! format: on
end
# ╔═╡ 14964632-98d8-4a2f-b2f6-e3f28b558803
# ╠═╡ show_logs = false
using StanfordAA228V
# ╔═╡ 173388ab-207a-42a6-b364-b2c1cb335f6b
# ╠═╡ show_logs = false
begin
import MarkdownLiteral: @mdx
using ProgressLogging
using Test
using Base64
using PlutoUI
using Distributions
using Random
using Plots
using ReverseDiff
using Optim
using Parameters
using BSON
using GridInterpolations
using LinearAlgebra
default(fontfamily="Computer Modern", framestyle=:box) # LaTeX-style plotting
md"> _Additional package management._"
end
# ╔═╡ 117d0059-ce1a-497e-8667-a0c2ef20c632
md"""
# Project 1: Finding the most-likely failure
_Please wait until the entire notebook is finished loading before proceeding (you may get temporary errors)._
"""
# ╔═╡ f8612f4e-8519-4ee3-ac09-80b7c126b238
highlight(md"""_See the three **"⟶ Task"** sections below for where to fill out the algorithms._""")
# ╔═╡ da5b4000-0bce-4fc2-be85-dada21264ca3
textbook_details([
"Chapter 4. _Falsification through Optimization_",
"Chapter 5. _Falsification through Planning_"])
# ╔═╡ 0456a732-2672-4108-a241-db9ae879a913
# ╔═╡ 6e8ab7c9-fb49-4d89-946d-c7d7588c199a
md"""
## Julia/Pluto tips
Useful tips you may be interested in regarding Julia and Pluto.
"""
# ╔═╡ fe044059-9102-4e7f-9888-d9f03eec69ff
html_expand("Expand for general Julia/Pluto tips.", [
html"<h2hide>Julia packages</h2hide>",
md"""
Feel free to use as many external Julia packages as you like. Running `using PackageName` will automatically install it in this notebook.
[List of Julia packages.](https://juliapackages.com/)""",
html"<h2hide>Dependent cells</h2hide>",
md"""
Unlike Jupyter notebooks, Pluto has _dependent cells_. Meaning, if one cell uses the variable `x` and another cell defines `x`, then the first cell will _re-run_ when `x` is changed. This means there is no "hidden global state".
See the [Pluto README](https://github.com/fonsp/Pluto.jl?tab=readme-ov-file) for details/examples.""",
html"<h2hide>New cells</h2hide>",
md"""
You can create as many new cells anywhere as you like. Click the `+` icon on the right or hit `CTRL+Enter` within an existing cell to create a new one below it.
- **Important**: Please do not modify/delete any existing cells.""",
html"<h2hide>Running code</h2hide>",
md"""
After editing a cell, you can run it several ways:
1. To run the current cell: `<SHIFT+ENTER>`
1. To run the current cell and create a new one below: `<CTRL+S>` or `<CMD+S>`
1. To run all cells with unsaved changes: `<CTRL+S>` or `<CMD+S>`
""",
html"<h2hide>Multiple lines of code in a cell</h2hide>",
md"""
To put multple lines of code in a single cell in Pluto, wrap with `begin` and `end`:
```julia
begin
x = 10
y = x^2
end
```""",
html"<h2hide>Locally scoped cells</h2hide>",
md"""
Use Julia's `let` block to create locally scoped variables:
```julia
ℓ = let d = get_depth(sys)
p = NominalTrajectoryDistribution(sys, d)
τ = rollout(sys, d)
logpdf(p, τ)
end
```
The last line of code in the `let` block will be returned and assigned to the globally scoped `ℓ` variable in this case.
This way, you can reuse variable names such as `τ` without affecting other cells that may also use that name in global scope.
You could also just define a new function:
```julia
function my_test(sys)
d = get_depth(sys)
p = NominalTrajectoryDistribution(sys, d)
τ = rollout(sys, d)
return logpdf(p, τ)
end
ℓ = my_test(sys)
```
""",
html"<h2hide>Suppress cell output</h2hide>",
md"""
To suppress the Pluto output of a cell, add a semicolon `;` at the end.
```julia
x = 10;
```
or
```julia
begin
x = 10
y = x^2
end;
```
""",
html"<h2hide>Underscore as digit separator</h2hide>",
md"""
You can use the underscore `_` as a convenient digit separator:
```julia
1000000 == 1_000_000 # true
100000 == 100_000 # true
10000 == 10_000 # true
1000 == 1_000 # true
```
[Link to Julia docs](https://docs.julialang.org/en/v1/manual/integers-and-floating-point-numbers/#:~:text=The%20underscore).
""",
html"<h2hide>Unicode symbols</h2hide>",
md"""
You can use Unicode—and even emojis 🙃—as variable and function names. Here are some common ones we use throughout this course:
| Unicode | Code |
|:-------:|:----:|
| `τ` | `\tab` |
| `ψ` | `\psi` |
| `ℓ` | `\ell` |
| `π` | `\pi` |
| `σ` | `\sigma` |
| `Σ` | `\Sigma` |
| `θ` | `\theta` |
| `ω` | `\omega` |
| `²` | `\^2` |
| `₂` | `\_2` |
| `🍕` | `\:pizza:` |
To enter them into cells, type the above "**Code**" and hit `<TAB><TAB>` (or `<TAB><ENTER>`). Feel free to use any Unicode/emojis to your hearts desire.
See the Julia docs for more examples: [https://docs.julialang.org/en/v1/manual/unicode-input/](https://docs.julialang.org/en/v1/manual/unicode-input/)
"""
])
# ╔═╡ a21612a1-1092-4892-9132-629833e7c867
# ╔═╡ ec776b30-6a30-4643-a22c-e071a365d50b
md"""
## Hints
Expand the sections below for some helpful hints.
"""
# ╔═╡ 18754cc6-c089-4245-ad10-2848594e49b4
html_expand("Expand for useful interface functions.", [
html"<h2hide>Useful interface functions</h2hide>",
md"""
The following functions are provided by `StanfordAA228V.jl` that you may use.
""",
html"<h3hide><code>NominalTrajectoryDistribution</code></h3hide>",
md"""
**`NominalTrajectoryDistribution(sys::System, d::Int)::TrajectoryDistribution`** — Returns the nominal trajectory distribution for the system `sys` over depth `d`.
- Use this to evaluate the likelihood of the trajectory (using `logpdf` below).
""",
html"<h3hide><code>logpdf</code></h3hide>",
md"""
**`logpdf(p::TrajectoryDistribution, τ::Vector)::Float64`** — Evaluate the log probability density of the trajectory `τ` using the trajectory distribution `p`.
- Use `logpdf` instead of `pdf` for numerical stability.
""",
html"<h3hide><code>rollout</code></h3hide>",
md"""
**`rollout(sys::System; d)::Array`** — Run a single rollout of the system `sys` to a depth of `d`.
- `τ` is written as `\tau<TAB>` in code.
```julia
function rollout(sys::System; d=1)
s = rand(Ps(sys.env))
τ = []
for t in 1:d
o, a, s′ = step(sys, s) # For each rollout call, step is called d times.
push!(τ, (; s, o, a))
s = s′
end
return τ
end
```
""",
html"<h3hide><code>isfailure</code></h3hide>",
md"""
**`isfailure(ψ, τ)::Bool`** — Using the specification `ψ`, check if the trajector `τ` led to a failure.
- `ψ` is written as `\psi<TAB>` in code.
"""])
# ╔═╡ e888241c-b89f-4db4-ac35-6d826ec4c36c
html_expand("Expand if using optimization-based falsification.", [
html"<h2hide>Robustness and gradients</h2hide>",
md"""
Robustness can be a useful metric to find failures. If the robustness is $\le 0$, this indicates a failure.
- To take a gradient of _robustness_ w.r.t. a trajectory `τ`, you can use `ReverseDiff` like so (where we already load the `ReverseDiff` package for you):
```julia
function robustness_gradient(sys, ψ, τ)
𝐬 = [step.s for step in τ]
f(x) = robustness_objective(x, sys, ψ)
return ReverseDiff.gradient(f, 𝐬)
end
```
- For the `robustness_objective` function of:
```julia
function robustness_objective(input, sys, ψ; smoothness=1.0)
s, 𝐱 = extract(sys.env, input)
τ = rollout(sys, s, 𝐱)
𝐬 = [step.s for step in τ]
return robustness(𝐬, ψ.formula, w=smoothness)
end
```
- You can then evaluate the robustness gradient of a single trajectory like so:
```julia
τ = rollout(sys_small)
robustness_gradient(sys_small, ψ_small, τ)
```
- **However**, your objective is not quite to minimize robustness.
- **Hint**: You also want to _maximize likelihood_ (i.e., minimize negative likelihood).
""",
html"<h2hide>Optimization-based falsification</h2hide>",
md"""
- If you are using **Optim.jl**, the following options may be helpful (especially `f_calls_limit` for gradient free methods, `g_calls_limit` (typically n÷2) for gradient-based methods, and `iterations`): [https://julianlsolvers.github.io/Optim.jl/v0.9.3/user/config/](https://julianlsolvers.github.io/Optim.jl/v0.9.3/user/config/)
- Optim also requires an initial guess `x0`, you can use the following for each environment (see Example 4.5 in the textbook):
```julia
x0 = initial_guess(sys::SmallSystem) # SimpleGaussian
x0 = initial_guess(sys::MediumSystem) # InvertedPendulum
x0 = initial_guess(sys::LargeSystem) # CollisionAvoidance
initial_guess(sys::SmallSystem) = [0.0]
initial_guess(sys::MediumSystem) = zeros(84)
initial_guess(sys::LargeSystem) = [rand(Normal(0,100)), 0 0, 40, zeros(39)...]
```
- To explain where these numbers came from:
- `SmallSystem`: the initial guess is $0$ for the only search parameter: the initial state.
- `MediumSystem`: the initial guess is $d \times |x| + |s_0| = 84$ for $d = 41$, $|x| = 2$ (disturbance on both $\theta$ and $\omega$), and $|s_0| = 2$ for both components of the initial state.
- `LargeSystem`: the initial guess is $d \times |x| + |\{s_0^{(1)}, s_0^{(2)}\}| = 43$ for $d = 41$, $|x| = 1$ (disturbance is only on the environment), and $|\{s_0^{(1)}, s_0^{(2)}\}| = 2$ for searching only over the $h$ and $\dot{h}$ initial state variables, setting the initial $h$ to $h \sim \mathcal{N}(0, 100)$, the initial $t_\text{col}$ to $40$ and the other initial state variables and initial disturbances to zero.
- Or you can write your own optimization algorithm :)
""",
html"<h2hide>Details on the <code>extract</code> function</h2hide>",
md"""
- The `extract` function is used to _extract_ the initial state `s` and the set of disturbances `𝐱` (written `\bfx<TAB>`) so that off-the-shelf optimization algorithms (e.g., from Optim.jl) can search over the required variables.
- The `SimpleGaussian` environment only searches over initial states and has no disturbances.
```julia
function extract(env::SimpleGaussian, input)
s = input[1] # Objective is simply over the initial state
𝐱 = [Disturbance(0,0,0)] # No disturbances for the SimpleGaussian
return s, 𝐱
end
```
- **Note**: We provide the `extract` algorithms for each of the environment types:
```julia
s, 𝐱 = extract(env::SimpleGaussian, input)
s, 𝐱 = extract(env::InvertedPendulum, input)
s, 𝐱 = extract(env::CollisionAvoidance, input)
```
""",
html"<h2hide>Differing <code>step</code> calls using Optim.jl</h2hide>",
md"""
Note that the number of function calls `f(x)` output by the Optim results when running `display(results)` may be different than the `stepcount()`.
This is because Optim counts the number of objective function calls `f` and the objective function may run `rollout` (i.e., mulitple calls to `step` based on depth `d`) multiple times.
This is not applicable for the small problem, as the depth is $d=1$."""
])
# ╔═╡ c4fa9af9-1a79-43d7-9e8d-2854652a4ea2
html_expand("Stuck? Expand for hints on what to try.", md"""
$(hint(md"Try fuzzing! See _Example 4.3_ in the textbook.
_Other techniques_: optimization or planning (or something entirely different!?)"))""")
# ╔═╡ 6bad6e8b-c021-41d2-afbb-bcd0242138dd
# ╔═╡ dba42df0-3199-4c31-a735-b6b514703d50
md"""
## Common errors
These are some common errors you may run into.
"""
# ╔═╡ a0a60728-4ee0-4fd0-bd65-c056956b9712
html_expand("Expand if you get an error <code>reducing over an empty collection</code>.", md"""
The following error may occur:
> **ArgumentError**: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
This is usually because there were no failures found and you are trying to iterate over an empty set. Example: `τs_failures` may be equal to `[]`, resulting in the error:
```julia
τ_most_likely = argmax(τ->logpdf(pτ, τ), τs_failures)
```
**Potential solution**: Try increasing `m` to sample more rollouts.
""")
# ╔═╡ b0a4461b-89d0-48ee-9bcf-b544b9f08154
html_expand("Expand if you're getting <code>NaN</code> likelihood errors.", md"""
Likelihoods or log-likelihoods equal to `NaN` may be a result of `log(pdf(p, τ))` due to numerical stability issues.
**Instead**, please use `logpdf(p, τ)` instead (better numerical stability).
""")
# ╔═╡ 109c3d27-2c23-48a7-9fd7-be8a1f359e55
html_expand("Expand if you're using <code>Normal</code> and/or <code>MvNormal</code>.", md"""
The univariate normal (Gaussian) distribution in Julia takes in a mean $\mu$ and standard deviation $\sigma$:
```julia
Normal(μ, σ)
```
Where the **multivariate** normal distribution takes in a mean vector $\mathbf{\mu}$ and the _covariance_ matrix $\mathbf{\Sigma}$:
```julia
MvNormal(𝛍, 𝚺)
```
Meaning, if you want a 2d diagonal multivariate normal with mean zero and standard deviation of $\sigma = 0.1$, then you can do:
```julia
MvNormal(zeros(2), 0.1^2*I)
```
where "`I`" comes from the `LinearAlgebra` module (already loaded for you).
""")
# ╔═╡ bc2f62f5-1330-46cd-bb81-411baa483488
html_expand("Expand if you're using <code>initial_state_distribution</code>, <code>disturbance_distribution</code>, or <code>depth</code>.", md"""
The `StanfordAA228V` module defines several functions that you **might be adding new methods to (i.e., new type signatures)**.
- `initial_state_distribution`
- `disturbance_distribution`
- `depth`
Say you're implementing fuzzing and you define a new `TrajectoryDistribution` type and want to create your own version of `disturbance_distribution` for your new type:
```julia
struct NewTrajectoryDistribution <: TrajectoryDistribution
some_parameter
end
```
Then you will need to use the Julia dot notation to add a method to `StanfordAA228V`:
```julia
function StanfordAA228V.disturbance_distribution(p::NewTrajectoryDistribution)
# some code
end
```
This code will add a new method for `disturbance_distribution` with the input type of `NewTrajectoryDistribution`. Make sure to add the `StanfordAA228V.` to the function names that you create which are _also_ defined in `StanfordAA228V`:
- See the [`StanfordAA228V.jl`](https://github.com/sisl/StanfordAA228V.jl/blob/d2357ba8cdaf680b207a261495d785456981c66d/src/StanfordAA228V.jl#L39-L41) file.
This is common in Julia where you need to use the funciton name qualified with the module name. Read more in the ["Namespace Management" section of the Julia docs.](https://docs.julialang.org/en/v1/manual/modules/#namespace-management)
""")
# ╔═╡ a46702a3-4a8c-4749-bd00-52f8cce5b8ee
html_half_space()
# ╔═╡ 17fa8557-9656-4347-9d44-213fd3b635a6
Markdown.parse("""
## Small system
The system is comprised of an `agent`, environment (`env`), and `sensor`.
""")
# ╔═╡ 22feee3d-4627-4358-9937-3c780b7e8bcb
sys_small = System(NoAgent(), SimpleGaussian(), IdealSensor());
# ╔═╡ fd8c851a-3a42-41c5-b0fd-a12085543c9b
Markdown.MD(
md"""
# 1️⃣ **Small**: 1D Gaussian
The small system is a simple 1D Gaussian system.
- There are no dynamics (rollout depth $d=1$).
- There are no disturbances.
- The (initial and only) state $s$ is sampled from $\mathcal{N}(0,1)$.
""",
depth_highlight(sys_small)
)
# ╔═╡ 6f3e24de-094c-49dc-b892-6721b3cc54ed
SmallSystem::Type = typeof(sys_small) # Type used for multiple dispatch
# ╔═╡ 45f7c3a5-5763-43db-aba8-41ef8db39a53
md"""
## Small environment
The environment is a standard normal (Gaussian) distribution $\mathcal{N}(0, 1)$.
"""
# ╔═╡ 9c1daa96-76b2-4a6f-8d0e-f95d26168d2b
ps_small = Ps(sys_small.env)
# ╔═╡ ab4c6807-5b4e-4688-b794-159e26a1599b
ψ_small = LTLSpecification(@formula □(s->s > -2));
# ╔═╡ 370a15eb-df4b-493a-af77-00914b4616ea
Markdown.parse("""
## Small specification \$\\psi\$
The specification \$\\psi\$ (written `\\psi<TAB>` in code) indicates what the system should do:
\$\$\\psi(\\tau) = \\square(s > $(ψ_small.formula.ϕ.c))\$\$
i.e., "the state \$s\$ in the trajectory \$\\tau\$ should _always_ (\$\\square\$) be greater than \$$(ψ_small.formula.ϕ.c)\$, anything else is a failure."
""")
# ╔═╡ 166bd412-d433-4dc9-b874-7359108c0a8b
Markdown.parse("""
A failure is highly unlikely given that the probability of failure is:
\$\$P(\\neg\\psi(\\tau)) = P(s < $(ψ_small.formula.ϕ.c)) \\approx $(round(cdf(ps_small, ψ_small.formula.ϕ.c), sigdigits=4))\$\$
where \$\\neg\\psi(\\tau)\$ indicates that the specification was violated and \$P\$ is the _cumulative distribution function_ (`cdf` in Julia).
""")
# ╔═╡ 9132a200-f63b-444b-9830-b03cf075021b
md"""
## Random baseline
The following function is a baseline random falsification algorithm that returns the trajectory that led to the most-likely failure.
"""
# ╔═╡ bb3b33e6-fd05-4631-b0bd-c71ef3dbee38
n_baseline_small = 100
# ╔═╡ cc11217f-e070-4d20-8ebe-18e7eb977487
highlight(md"""**Note**: You can access the number of `step` calls via `stepcount()`""")
# ╔═╡ a6603deb-57fa-403e-a2e5-1195ae7c016c
md"""
Here we plot $100$ states showing which ones were _successes_ and which ones were _failures_.
_(Note that since trajectories for the small problem have depth $1$, each trajectory holds a single state, so here you can think about each of the points below as individual trajectories)._
"""
# ╔═╡ e52ffc4f-947d-468e-9650-b6c67a57a62b
html_quarter_space()
# ╔═╡ 92f20cc7-8bc0-4aea-8c70-b0f759748fbf
Markdown.parse("""
## ⟶ **Task (Small)**: Most-likely failure
Please fill in the following `most_likely_failure` function.
""")
# ╔═╡ a003beb6-6235-455c-943a-e381acd00c0e
start_code()
# ╔═╡ c494bb97-14ef-408c-9de1-ecabe221eea6
end_code()
# ╔═╡ e2418154-4471-406f-b900-97905f5d2f59
html_quarter_space()
# ╔═╡ 1789c8b5-b314-4aba-ad44-555be9a85984
md"""
# 📊 Small Tests
We'll automatically test your `most_likely_failure(::SmallSystem, ψ)` function below.
**Note**: The next three tests are _only_ local validation tests.
_The **graded** tests to be submitted to Gradescope are located [below](#graded-test)._
"""
# ╔═╡ 535261e3-4cb3-4b0b-954d-7452b2a91b5d
md"""
## Different failure threshold
Let's test a different failure threshold.
"""
# ╔═╡ 052cc2e3-ca8a-4043-9a7d-7947a7f1fd0c
md"""
## Random failure threshold
In most cases, we don't know the _failure distribution_. If we did, we could just sample from it!
In this test, we make sure that your algorithm is robust to random failure thresholds.
"""
# ╔═╡ ce99f0cc-5fe8-42c2-af78-ac7211b6b699
@bind rerun_rand_small Button("Click to rerun random test.")
# ╔═╡ 7910c15c-a231-4a0f-a4ed-1fe0b52f62c7
@bind γ Slider(-3:0.1:3, default=0, show_value=true)
# ╔═╡ cbc3a060-b4ec-4572-914c-e07880dd3537
md"""
_You can also click the slider then use the arrow keys for finer control._
"""
# ╔═╡ fda151a1-5069-44a8-baa1-d7903bc89797
html_space()
# ╔═╡ d18c2105-c2af-4dda-8388-617aa816a567
Markdown.parse("""
## Medium system
An inverted pendulum comprised of a `ProportionalController` with an `AdditiveNoiseSensor`.
""")
# ╔═╡ 77637b5e-e3ce-4ecd-90fc-95611af18002
sys_medium = System(
ProportionalController([-15.0, -8.0]),
InvertedPendulum(),
AdditiveNoiseSensor(MvNormal(zeros(2), 0.1^2*I))
);
# ╔═╡ 8c78529c-1e00-472c-bb76-d984b37235ab
Markdown.MD(
md"""
# 2️⃣ **Medium**: Inverted Pendulum
The medium system is a swinging inverted pendulum.
- It uses a proportional controller to keep it upright.
- The state is comprised of the angle $\theta$ and angular velocity $\omega$ making $s = [\theta, \omega]$
- Actions are left/right adjustments in the range $[-2, 2]$
- Disturbances $x$ are treated as additive noise: $x \sim \mathcal{N}(\mathbf{0}, 0.1^2I)$
""",
depth_highlight(sys_medium)
)
# ╔═╡ c4c0328d-8cb3-41d5-9740-0197cbf760c2
MediumSystem::Type = typeof(sys_medium) # Type used for multiple dispatch
# ╔═╡ b1e9bd40-a401-4630-9a1f-d61b276e72f7
md"""
## Medium specification $\psi$
The inverted pendulum specification $\psi$ indicates what the system should do:
$$\psi(\tau) = \square\big(|\theta| < \pi/4\big)$$
i.e., "the absolute value of the pendulum angle $\theta$ (first element of the state $s$) in the trajectory $\tau$ should _always_ ($\square$) be less than $\pi/4$, anything else is a failure."
"""
# ╔═╡ fe272c1b-421c-49de-a513-80c7bcefdd9b
ψ_medium = LTLSpecification(@formula □(s -> abs(s[1]) < π / 4));
# ╔═╡ a16cf110-4afa-4792-9d3f-f13b24349886
md"""
## Medium example rollouts
Example rollouts of the pendulum system and their plot below.
"""
# ╔═╡ f005da72-d7b5-4f01-8882-ed4e2bdcf4bd
n_baseline_medium = 41_000
# ╔═╡ d75f34d3-384c-486b-b648-61ef8fd52167
Markdown.parse("""
**Large likelihood values.** \$\\,\$ _It's perfectly normal for the likelihood to be extremely large, \$\\exp(\\ell) \\gg 1\$, this is because we're dealing with probablity **density** functions which will **integrate** to one. Don't be alarmed._
_This is particularly apparent when the distribution has **small variance**. Here's an example at \$x = 0\$:_
```julia
pdf(Normal(0, 1e-15), 0) # $(round(pdf(Normal(0, 1e-15), 0), sigdigits=3))
```
*For the inverted pendulum, the `AdditiveNoiseSensor` is a multivariate Gaussian with mean zeros and diagonal standard deviation of \$\\sigma = \\mathit{0.1}\$. Say the disturbances were \$\\mathit{x_t = [\\!0,0]}\$ for all \$d = \\mathit{41}\$ steps, the trajectory likelihood would be:*
\$\$\\prod_{t=1}^{41} p(x_t) \\quad \\text{where} \\quad x_t \\sim \\mathcal{N}(\\mathbf{0}, 0.1^2I)\$\$
*For the disturbances \$\\mathbf{x} = \\big\\{[0,0], \\ldots, [0,0]\\big\\}\$ (the most-likely values of \$x_t\$), we get the following likelihood:*
```julia
prod(pdf(MvNormal(zeros(2), 0.1^2*I), [0,0]) for t in 1:41) # $(round(prod(pdf(MvNormal(zeros(2), 0.1^2*I), [0,0]) for t in 1:41), sigdigits=3))
```
_Note that this ignores the initial state distribution._
_But we tend to work with **log likelihoods** to avoid numerical issues with these large magnitude numbers._
\$\$\\sum_{t=1}^{41} \\log p(x_t) \\quad \\text{where} \\quad x_t \\sim \\mathcal{N}(\\mathbf{0}, 0.1^2I)\$\$
```julia
sum(logpdf(MvNormal(zeros(2), 0.1^2*I), [0,0]) for t in 1:41) # $(round(sum(logpdf(MvNormal(zeros(2), 0.1^2*I), [0,0]) for t in 1:41), digits=3))
```
""")
# ╔═╡ bac5c489-553c-436f-b332-8a8e97126a51
html_quarter_space()
# ╔═╡ 1da9695f-b7fc-46eb-9ef9-12160246018d
Markdown.parse("""
## ⟶ **Task (Medium)**: Most-likely failure
Please fill in the following `most_likely_failure` function.
""")
# ╔═╡ 0606d827-9c70-4a79-afa7-14fb6b806546
start_code()
# ╔═╡ 759534ca-b40b-4824-b7ec-3a5c06cbd23e
end_code()
# ╔═╡ 7987c20d-68e8-441b-bddc-3f0ae7c3591d
html_quarter_space()
# ╔═╡ da2d692a-8378-435e-bd6b-c0e65caef542
md"""
# 📊 Medium Test
We'll automatically test your `most_likely_failure(::MediumSystem, ψ)` function below.
"""
# ╔═╡ 60ab8107-db65-4fb6-aeea-d4978aed77bd
html_space()
# ╔═╡ 7d054465-9f80-4dfb-9b5f-76c3977de7cd
Markdown.parse("""
## Large system
An aircraft collision avoidance system that uses an interpolated lookup-table policy.
""")
# ╔═╡ 1ec68a39-8de9-4fd3-be8a-26cf7706d1d6
begin
local grid, Q = load_cas_policy(joinpath(@__DIR__, "cas_policy.bson"))
cas_agent = InterpAgent(grid, Q)
cas_env = CollisionAvoidance(Ds=Normal(0, 1.5))
cas_sensor = IdealSensor()
sys_large = System(cas_agent, cas_env, cas_sensor)
LargeSystem::Type = typeof(sys_large) # Type used for multiple dispatch
end
# ╔═╡ 9f739929-1cd3-4935-b229-ae3aeac7e131
begin
global ThisProject = Project1
max_steps(sys::SmallSystem) = 20
max_steps(sys::MediumSystem) = 1_000
max_steps(sys::LargeSystem) = 10_000
end;
# ╔═╡ 60f72d30-ab80-11ef-3c20-270dbcdf0cc4
begin
function try_max_steps(sys)
try
return "\$n = $(format(max_steps(sys); latex=true))\$"
catch
return "**LOADING...**"
end
end
Markdown.parse("""
**Task**: Efficiently find likely failures using \$n\$ total function calls to the system `step` function.
- **Small system**: 1D Gaussian \$\\mathcal{N}(0,1)\$. With $(try_max_steps(sys_small)) `step` calls.
- **Medium system**: Swinging inverted pendulum. With $(try_max_steps(sys_medium)) `step` calls.
- **Large system**: Aircraft collision avoidance system (CAS). With $(try_max_steps(sys_large)) `step` calls.
Your job is to write the following function that returns the failure trajectory `τ` (i.e., a `Vector` of \$(s,a,o,x)\$ tuples) with the highest likelihood you found:
```julia
most_likely_failure(sys, ψ; n)::Vector{NamedTuple}
```
""")
end
# ╔═╡ d566993e-587d-4aa3-995b-eb955dec5758
html_expand("Expand for baseline implementation using <code>DirectFalsification</code>.", [
html"<h2hide>Using <code>DirectFalsification</code> algorithm</h2hide>",
Markdown.parse("""
We could instead use the `DirectFalsification` algorithm for the small system where instead of using the `NominalTrajectoryDistribution`, we evaluate the pdf directly on the initial state distribution `ps_small`:
```julia
struct DirectFalsification
d # depth
m # number of samples
end
function falsify(alg::DirectFalsification, sys, ψ)
d, m = alg.d, alg.m
τs = [rollout(sys, d=d) for i in 1:m]
return filter(τ->isfailure(ψ, τ), τs)
end
alg = DirectFalsification(1, $(max_steps(sys_small)))
τ_failures = falsify(alg, sys_small, ψ_small)
ℓτ = maximum(s->pdf(ps_small, s[1].s), τ_failures)
```
**Note**: _But we want to use the `NominalTrajectoryDistribution` to keep the algorithm general for the medium/large problems that **do** have disturbances._
""")])
# ╔═╡ c2ae204e-dbcc-453a-81f5-791ba4be39db
@tracked function most_likely_failure_baseline(sys, ψ; n=max_steps(sys), full=false)
d = get_depth(sys)
m = n ÷ d # Get num rollouts, \div for ÷
pτ = NominalTrajectoryDistribution(sys, d) # Trajectory distribution
τs = [rollout(sys, pτ; d) for _ in 1:m] # Rollout with pτ, m*d steps
τs_failures = filter(τ->isfailure(ψ, τ), τs) # Filter to get failure trajs.
τ_most_likely = argmax(τ->logpdf(pτ, τ), τs_failures) # Most-likely failure traj
return full ? (τ_most_likely, τs) : τ_most_likely # Return MLF, or all trajs.
end
# ╔═╡ 254956d0-8f58-4e2b-b8a9-5dd10dd074a2
function run_baseline(sys::System, ψ; n, seed=4)
Random.seed!(seed)
τ, τs = most_likely_failure_baseline(sys, ψ; n, full=true)
d = get_depth(sys)
p = NominalTrajectoryDistribution(sys, d)
ℓ = logpdf(p, τ)
n = stepcount()
return (τ=τ, τs=τs, ℓ=ℓ, n=n) # return these variables as a NamedTuple
end
# ╔═╡ 3385fcb3-8b93-4da8-ba75-77877cc77ce4
baseline_small_results = run_baseline(sys_small, ψ_small; n=n_baseline_small);
# ╔═╡ 73da2a56-8991-4484-bcde-7d397214e552
Markdown.parse("""
### Baseline results (small)
\$\$\\begin{align}
\\ell_\\text{baseline} &= $(round(baseline_small_results.ℓ, digits=3))\\tag{failure log-likelihood} \\\\
n_\\text{steps} &= $(baseline_small_results.n) \\tag{number of \\texttt{step} calls}
\\end{align}\$\$
Reminder that the number of `step` calls \$n\$ is equal to the number of rollouts \$m\$ for the small system. This is because the rollout depth is \$d=1\$.
""")
# ╔═╡ 77a6e704-33e8-4241-84f0-0e58c29c06ef
baseline_medium_results = run_baseline(sys_medium, ψ_medium; n=n_baseline_medium);
# ╔═╡ 7ef66a50-6acc-474f-b406-7b27a7b18510
Markdown.parse("""
\$\$\\begin{align}
\\ell_\\text{baseline} &= $(round(baseline_medium_results.ℓ; digits=3))\\tag{failure log-likelihood} \\\\
n_\\text{steps} &= $(format(baseline_medium_results.n; latex=true)) \\tag{number of \\texttt{step} calls \$d\\times m\$}
\\end{align}\$\$
""")
# ╔═╡ 42456abf-4930-4b01-afd1-fce3b4881e28
baseline_details(sys_small; n_baseline=n_baseline_small, descr="simple Gaussian", max_steps)
# ╔═╡ fc2d34da-258c-4460-a0a4-c70b072f91ca
@small function most_likely_failure(sys::SmallSystem, ψ; n=max_steps(sys))
# TODO: WRITE YOUR CODE HERE
end
# ╔═╡ 307afd9c-6dac-4a6d-89d7-4d8cabfe3fe5
Markdown.MD(
md"""
$(@bind rerun_small LargeCheckBox(text="⟵ Click to re-run the <code>SmallSystem</code> evaluation."))""",
Markdown.parse("""
↑ This will re-run **`most_likely_failure(::SmallSystem, ψ)`** and re-save **`$(get_filename(sys_small, ThisProject))`**
_Uncheck this to load results from the file._
""")
)
# ╔═╡ 772cf17e-0fdb-470e-9f12-9480af811edd
baseline_details(sys_medium; n_baseline=n_baseline_medium, descr="pendulum", max_steps)
# ╔═╡ cb7b9b9f-59da-4851-ab13-c451c26117df
@medium function most_likely_failure(sys::MediumSystem, ψ; n=max_steps(sys))
# TODO: WRITE YOUR CODE HERE
end
# ╔═╡ 38f26afd-ffa5-48d6-90cc-e3ec189c2bf1
Markdown.MD(
md"""
$(@bind rerun_medium LargeCheckBox(text="⟵ Click to re-run the <code>MediumSystem</code> evaluation."))""",
Markdown.parse("""
↑ This will re-run **`most_likely_failure(::MediumSystem, ψ)`** and re-save **`$(get_filename(sys_medium, ThisProject))`**
_Uncheck this to load results from the file._
""")
)
# ╔═╡ be8c37e8-45db-4198-b0b9-d287e73fb818
try
submission_details(@bind(directory_trigger, OpenDirectory(@__DIR__)), ThisProject, [SmallSystem, MediumSystem, LargeSystem])
catch end
# ╔═╡ 0c520f93-49ce-45eb-899d-a31105d856c8
if directory_trigger
@info "Opening local directory..."
sleep(1)
end
# ╔═╡ aa0c4ffc-d7f0-484e-a1e2-7f6f92a3a53d
Markdown.MD(
Markdown.parse("""
# 3️⃣ **Large**: Aircraft Collision Avoidance
The large system is an aircraft collision avoidance system (CAS).
- It uses an interpolated lookup-table policy.
- The state is comprised of the relative altitude (m) \$h\$, the relative vertical rate \$\\dot{h}\$ (m/s), the previous action \$a_\\text{prev}\$, and the time to closest point of approach \$t_\\text{col}\$ (sec): \$s = [h, \\dot{h}, a_\\text{prev}, t_\\text{col}]\$
- Actions are \$a \\in [-5, 0, 5]\$ vertical rate changes.
- Disturbances \$x\$ are applied to \$\\dot{h}\$ as environment noise: \$x \\sim \\mathcal{N}(0, 1.5)\$
- Finite horizon (i.e., rollout depth) of \$d=$(get_depth(sys_large))\$ for \$t_\\text{col}\$ from \$40-0\$ seconds.
"""),
depth_highlight(sys_large)
)
# ╔═╡ d23f0299-981c-43b9-88f3-fb6e07927498
md"""
## Large environment
The collision avoidance system has disturbances applied to the relative vertical rate variable $\dot{h}$ of the state (i.e., environment disturbances).
$$\dot{h} + x \quad \text{where} \quad x \sim \mathcal{N}(0, 1.5)$$
"""
# ╔═╡ 641b92a3-8ff2-4aed-8482-9fa686803b68
cas_env.Ds
# ╔═╡ be426908-3fee-4ecd-b054-2497ce9a2e50
md"""
## Large specification $\psi$
The collision avoidance system specification $\psi$ indicates what the system should do:
$$\psi(\tau) = \square_{[41]}\big(|h| > 50\big)$$
i.e., "the absolute valued relative altitude $h$ (first element of the state $s$) in the trajectory $\tau$ should _always_ ($\square$) be greater than $50$ meters at the end of the encounter ($t=41$), anything else is a failure."
"""
# ╔═╡ 258e14c4-9a2d-4515-9a8f-8cd96f31a6ff
ψ_large = LTLSpecification(@formula □(41:41, s->abs(s[1]) > 50));
# ╔═╡ 3328d818-391a-440a-8f1b-f2b7f3e00958
n_baseline_large = 410_000
# ╔═╡ 35434537-9b9c-4528-b58c-420d01813598
baseline_details(sys_large; n_baseline=n_baseline_large, descr="CAS", max_steps)
# ╔═╡ 06b14338-ea3b-45c8-bf6c-28b82db2ea70
baseline_large_results = run_baseline(sys_large, ψ_large; n=n_baseline_large);
# ╔═╡ 204feed7-cde8-40a8-b6b5-051a1c768fd9
Markdown.parse("""
\$\$\\begin{gather}
\\ell_\\text{baseline} = $(round(baseline_large_results.ℓ; digits=3))\\tag{failure log-likelihood} \\\\
n_\\text{steps} = $(format(baseline_large_results.n; latex=true)) \\tag{number of \\texttt{step} calls \$d\\times m\$}
\\end{gather}\$\$
""")
# ╔═╡ e3d6fdf1-3a9e-446b-8482-49d6f64b652e
html_quarter_space()
# ╔═╡ 23fd490a-74d2-44b4-8a12-ea1460d95f85
Markdown.parse("""
## ⟶ **Task (Large)**: Most-likely failure
Please fill in the following `most_likely_failure` function.
""")
# ╔═╡ 18a70925-3c2a-4317-8bbc-c2a096ec56d0
start_code()
# ╔═╡ 3471a623-16af-481a-8f66-5bd1e7890188
@large function most_likely_failure(sys::LargeSystem, ψ; n=max_steps(sys))
# TODO: WRITE YOUR CODE HERE
end
# ╔═╡ 4c5210d6-598f-4167-a6ee-93bceda7223b
end_code()
# ╔═╡ 2ba2d3a2-3f6c-4d5f-8c45-8d00947f6e05
html_quarter_space()
# ╔═╡ ea2d7eb7-d576-415c-ac4c-fea7f90de637
md"""
# 📊 Large Test
We'll automatically test your `most_likely_failure(::LargeSystem, ψ)` function below.
"""
# ╔═╡ 7fe1c3d7-469c-47d9-9d46-e5b8b263edb9
Markdown.MD(
md"""
$(@bind rerun_large LargeCheckBox(text="⟵ Click to re-run the <code>LargeSystem</code> evaluation."))""",
Markdown.parse("""
↑ This will re-run **`most_likely_failure(::LargeSystem, ψ)`** and re-save **`$(get_filename(sys_large, ThisProject))`**
_Uncheck this to load results from the file._
""")
)
# ╔═╡ 74aeca7b-0658-427f-8c02-d093a0d725ee
html_half_space()
# ╔═╡ 6d5c805b-330c-4b04-a51c-15e674352b1b
html_quarter_space()
# ╔═╡ 860ec509-3a86-4842-9471-6b1a0b8f366d
md"""
## Comparing likelihoods
Since the likelihoods across the three problems vary widely in range, we compute the log-likelihoods of your trajectories and take the difference of logs between the _most-likely trajectory_ (i.e., the mean trajectory $\tau_\text{mean}$). The most-likely trajectory is simply the trajectory rolled-out with the mean initial state and mean disturbances.
The score for problem $k$ is then computed as:
$$\begin{gather}
\text{score}_k = \log \left( \frac{p(\tau)}{p(\tau_\text{mean})} \right) = \log p(\tau) - \log p(\tau_\text{mean})
\end{gather}$$
_The maximum possible score is $0$ (but is impossible as the mean trajectories are not failures for these systems)._
"""
# ╔═╡ 54741d81-39e0-4a47-b84d-c41c8eb7611b
function score(sys::System, τ)
ps = NominalTrajectoryDistribution(sys, get_depth(sys))
τ_mean = mean_rollout(sys, ps)
ℓ_mean = logpdf(ps, τ_mean)
ℓ = logpdf(ps, τ)
return ℓ - ℓ_mean
end
# ╔═╡ 6559cf16-a474-4533-a2c7-ccbc02480a76
md"""
Since the small system runs several tests, we take the average score over the tests:
$$\begin{gather}
\text{score}_\text{small} = \mathbb{E}_i \left[ \log\left( \frac{p(\tau_i)}{p(\tau_\text{mean})} \right) \right]
\end{gather}$$
To balance out the difficulty, we use weights $\mathbf{w} = [1,2,3]$ (normalized to sum to one):
$$\bar{w_i} = \frac{w_i}{\sum_j w_j}$$
"""
# ╔═╡ cfdba748-45d5-4eaa-97b3-fdc9fe7e4333
𝐰 = [1,2,3]
# ╔═╡ 6beda870-0cb0-40f5-9531-fa3e2f7bb020
md"""
The final score on the leaderboard is then a weighted sum:
$$\begin{gather}
\mathbf{s} = \big[\text{score}_\text{small},\, \text{score}_\text{medium},\, \text{score}_\text{large} \big] \\
\text{score} = \mathbf{w}^\top\mathbf{s}
\end{gather}$$
"""
# ╔═╡ 5c3d24f6-0106-444c-b7df-89bba8c01b37
function leaderboard_scores(systems::Vector{<:System}, τs; 𝐰=ones(length(τs)))
score_small = mean(score(systems[1], τ) for τ in τs[1])
score_medium = score(systems[2], τs[2])
score_large = score(systems[3], τs[3])
𝐬 = [score_small, score_medium, score_large]
𝐰 = 𝐰 ./ sum(𝐰)
return 𝐰'𝐬
end
# ╔═╡ 4edc5933-9457-4c7c-8456-a26974e0587e
html_half_space()
# ╔═╡ 38b64ee2-5372-4374-80e8-fbf203021a61
begin
function GridInterpolations.interpolants(grid::RectangleGrid, x::AbstractVector)
if any(isnan, x)
throw(DomainError("Input contains NaN!"))
end
cut_counts = grid.cut_counts
cuts = grid.cuts
# Reset the values in index and weight:
num_points = 2^dimensions(grid)
index = Vector{Int}(undef, num_points)
index2 = Vector{Int}(undef, num_points)
weight = Vector{eltype(x)}(undef, num_points)
weight2 = Vector{eltype(x)}(undef, num_points)
# Note: these values are set explicitly because we have not verified that the logic below is independent of the initial values. See discussion in PR #47. These can be removed if it can be proved that the logic is independent of the initial values.
index .= 1
index2 .= 1
weight .= zero(eltype(weight))
weight2 .= zero(eltype(weight2))
weight[1] = one(eltype(weight))
weight2[1] = one(eltype(weight2))
l = 1
subblock_size = 1
cut_i = 1
n = 1
for d = 1:length(x)
coord = x[d]
lasti = cut_counts[d] + cut_i - 1
ii = cut_i
if coord <= cuts[ii]
i_lo, i_hi = ii, ii
elseif coord >= cuts[lasti]
i_lo, i_hi = lasti, lasti
else
while cuts[ii] < coord
ii = ii + 1
end
if cuts[ii] == coord
i_lo, i_hi = ii, ii
else
i_lo, i_hi = (ii - 1), ii