LearningToControlClass/class01/class01_intro.jl at 627af29b4e5e6b87aa3003cf481b4eaf2f548399 · LearningToOptimize/LearningToControlClass · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
### A Pluto.jl notebook ###
# v0.20.15

using Markdown
using InteractiveUtils

# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error).
macro bind(def, element)
    #! format: off
    return quote
        local iv = try Base.loaded_modules[Base.PkgId(Base.UUID("6e696c72-6542-2067-7265-42206c756150"), "AbstractPlutoDingetjes")].Bonds.initial_value catch; b -> missing; end
        local el = $(esc(element))
        global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : iv(el)
        el
    end
    #! format: on
end

# ╔═╡ 13b12c00-6d6e-11f0-3780-a16e73360478
begin
	class_dir = @__DIR__
	import Pkg
	Pkg.activate(".")
	Pkg.instantiate()
	# Pkg.status()
	using PlutoUI
	using Random
	using LinearAlgebra
	using HypertextLiteral
	using PlutoTeachingTools
	using ShortCodes, MarkdownLiteral
	import Images: load
end

# ╔═╡ b6ba1231-2942-4f06-8252-22f02553bb57
using CairoMakie

# ╔═╡ 29df2037-456f-4f98-9e32-71037e3d76c4
using ForwardDiff

# ╔═╡ 52712a4b-8c4b-4637-943d-fdb0f5e9e944
using InfiniteOpt, JuMP, Ipopt

# ╔═╡ ec473e69-d5ec-4d6a-b868-b89dadb85705
ChooseDisplayMode()

# ╔═╡ 8d7a34ef-5a2d-41a8-ac55-39ab00d7e432
md"
| | | |
|-----------:|:--|:------------------|
|  Lecturer   | : | Rosemberg, Andrew |
|  Date   | : | 28 of July, 2025 |
"

# ╔═╡ ced1b968-3ba6-4e58-9bcd-bbc6bee2b93c
md"#### Reference Material"

# ╔═╡ 97994ed8-5606-46ef-bd30-c5343c1d99cf
begin
	MarkdownLiteral.@markdown(
"""

[^cmu]: Zachary Manchester et al. [Optimal Control and Reinforcement Learning at Carnegie Mellon University - CMU 16-745](https://optimalcontrol.ri.cmu.edu/)

[^OptProx]: Van Hentenryck, P., 2024. [Fusing Artificial Intelligence and Optimization with Trustworthy Optimization Proxies](https://www.siam.org/publications/siam-news/articles/fusing-artificial-intelligence-and-optimization-with-trustworthy-optimization-proxies/). Collections, 57(02).

[^ArmManip]: Guechi, E.H., Bouzoualegh, S., Zennir, Y. and Blažič, S., 2018. [MPC control and LQ optimal control of a two-link robot arm: A comparative study](https://www.mdpi.com/2075-1702/6/3/37). Machines, 6(3), p.37.

[^ZachMIT]: Zachary Manchester talk at MIT - [MIT Robotics - Zac Manchester - Composable Optimization for Robotic Motion Planning and Control](https://www.youtube.com/watch?v=eSleutHuc0w&ab_channel=MITRobotics).

[^Hespanha]: Hespanha, J.P., 2018. Linear systems theory. Princeton university press.

"""
)
end

# ╔═╡ 1f774f46-d57d-4668-8204-dc83d50d8c94
md"# Intro - Optimal Control and Learning

In this course, we are interested in problems with the following structure:

```math
\begin{equation}
\!\!\!\!\!\!\!\!\min_{\substack{(\mathbf u_1,\mathbf x_1)\\\mathrm{s.t.}}}
\!\underset{%
   \phantom{\substack{(\mathbf u_1,\mathbf x_1)\\\mathrm{s.t.}}}%
   \!\!\!\!\!\!\!\!\!\!(\mathbf u_1,\mathbf x_1)\in\mathcal X_1(\mathbf x_0)%
}{%
   \!\!\!\!c(\mathbf x_1,\mathbf u_1)%
}
+\mathbb{E}_1\Bigl[
   \quad \cdots

  \;+\;\mathbb{E}_t\Bigl[
    \min_{\substack{(\mathbf u_t,\mathbf x_t)\\\mathrm{s.t.}}}
    \!\underset{%
       \phantom{\substack{(\mathbf u_t,\mathbf x_t)\\\mathrm{s.t.}}}%
       \!\!\!\!(\mathbf u_t,\mathbf x_t)\in\mathcal X_t(\mathbf x_{t-1},w_t)%
    }{%
       \!\!\!\!\!\!\!\!\!\!c(\mathbf x_t,\mathbf u_t)%
    }
    +\mathbb{E}_{t+1}[\cdots]
\Bigr].
\end{equation}
```
which minimizes a first stage cost function $c(\mathbf{x}_1,
\mathbf{u}_1)$ and the expected value of future costs over possible
values of the exogenous stochastic variable $\{w_{t}\}_{t=2}^{T} \in
\Omega$.

Here, $\mathbf{x}_0$ is the initial system state and the
control decisions $\mathbf{u}_t$ are obtained at every period $t$
under a feasible region defined by the incoming state
$\mathbf{x}_{t-1}$ and the realized uncertainty $w_t$. $\mathbf{E}_t$ represents the expected value over future uncertainties $\{w_{\tau}\}_{\tau=t}^{T}$. This
optimization program assumes that the system is entirely defined by
the incoming state, a common modeling choice in many frameworks (e.g.,
MDPs). This is without loss of generality,
since any information can be appended in the state. The system
constraints can be generally posed as:

```math
\begin{align}
    &\mathcal{X}_t(\mathbf{x}_{t-1}, w_t)=
    \begin{cases}
        f(\mathbf{x}_{t-1}, w_t, \mathbf{u}_t) = \mathbf{x}_t \\
        h(\mathbf{x}_t, \mathbf{u}_t) \geq 0
    \end{cases}
\end{align}
```
"

# ╔═╡ a0f71960-c97c-40d1-8f78-4b1860d2e0a2
md"""
where the outgoing state of the system $\mathbf{x}_t$ is a
transformation based on the incoming state, the realized uncertainty,
and the control variables. In the Markov Decision Process (MDP) framework, we refer to $f$ as the "transition kernel" of the system. State and
control variables are restricted further by additional constraints
captured by $h(\mathbf{x}_t, \mathbf{u}_t) \geq 0$.  We
consider policies that map the past information into decisions: $\pi_t : (\mathbf{x}_{t-1}, w_t) \rightarrow \mathbf{x}_t$. In
period $t$, an optimal policy is given by the solution of the dynamic
equations:

```math
\begin{align}
    V_{t}(\mathbf{x}_{t-1}, w_t) = &\min_{\mathbf{x}_t, \mathbf{u}_t} \quad  \! \! c(\mathbf{x}_t, \mathbf{u}_t) + \mathbf{E}_{t+1}[V_{t+1}(\mathbf{x}_t, w_{t+1})]    \\
    &   \text{ s.t. } \quad\mathbf{x}_t  = f(\mathbf{x}_{t-1}, w_t, \mathbf{u}_t) \nonumber         \\
    &  \quad \quad \quad \! \! h(\mathbf{x}_t, \mathbf{u}_t)  \geq 0. \nonumber
\end{align}
```
```math
\implies \pi_t^{*}(\mathbf{x}_{t-1}, w_t) \in \{\mathbf{x}_t \;|\; \exists u_t \;:\; c(\mathbf{x}_t, \mathbf{u}_t) + \mathbf{E}_{t+1}[V_{t+1}(\mathbf{x}_t, w_{t+1})] = V_{t}(\mathbf{x}_{t-1}, w_t) \}
```

"""

# ╔═╡ 1d7092cd-0044-4d38-962a-ce3214c48c24
md"""
Function $V_{t}(\mathbf{x}_{t-1}, w_t)$ is refered to as the value function. To find the optimal policy for the $1^{\text{st}}$ stage, we need to find the optimal policy for the entire horizon $\{t=2,\cdots,T\}$ or at least estimate the "optimal" value function.
"""

# ╔═╡ 60ba261a-f2eb-4b45-ad6d-b6042926ccab
load(joinpath(class_dir, "indecision_tree.png"))

# ╔═╡ 15709f7b-943e-4190-8f40-0cfdb8772183
md"""
Notice that the number of "nodes" to be evaluated (either decisions or their cost) grows exponetially with the number of stages. This the the *Curse of dimensionality*
in stochastic programming.

"""

# ╔═╡ 5d7a4408-21ff-41ec-b004-4b0a9f04bb4f
question_box(md"Can you name a few ways to try and/or solve this problem?")

# ╔═╡ 7e487ebc-8327-4f3e-a8ca-1e07fb39991a
md"""
### Solution Methods

There are a few ways to solve these problems:

```math
(\mathbf{x}_{t-1}, w_t)\xrightarrow[\pi_t^{*}(\mathbf{x}_{t-1}, w_t)]{
\begin{align}
    &\min_{\mathbf{x}_t, \mathbf{u}_t} \quad  \! \! c(\mathbf{x}_t, \mathbf{u}_t) + \mathbf{E}_{t+1}[V_{t+1}(\mathbf{x}_t, w_{t+1})]    \\
    &   \text{ s.t. } \quad\mathbf{x}_t  = f(\mathbf{x}_{t-1}, w_t, \mathbf{u}_t) \nonumber         \\
    &  \quad \quad \quad \! \! h(\mathbf{x}_t, \mathbf{u}_t)  \geq 0. \nonumber
\end{align}
} (\mathbf{x}_t^{*}, \mathbf{u}_t^{*})
```

**Exact Methods:**
 - Deterministic Equivalent: Explicitly model all decisions of all possible scenarios. (Good Luck!)
 - Stochastic Dual Dynamic Programming, Progressive Hedging, ... (Hard but doable for some class of problems.)

**Approximate Methods**:
 - Approximate Dynamic Programming, (model-free and model-based)Reinforcement Learning, Two-Stage Decision Rules, ...
 - **Optimization Proxies**:

```math
\theta^{\star}
\;=\;
\operatorname*{arg\,min}_{\theta \in \Theta}
\;
\mathbb{E}\Bigl[\bigl\|\,\pi_t^{\ast}-\pi_t(\,\cdot\,;\theta)\bigr\|_{\mathcal F}\Bigr],
```

"""

# ╔═╡ bd623016-24ce-4c10-acb3-b2b80d4facc8
md"[^OptProx]"

# ╔═╡ 2d211386-675a-4223-b4ca-124edd375958
@htl """

<img src="https://www.siam.org/media/k2hls5wb/figure1.jpg">

"""

# ╔═╡ 45275d44-e268-43cb-8156-feecd916a6da
# ╠═╡ skip_as_script = true
#=╠═╡
Foldable(md"#### LearningToOptimize Project", @htl """
<div style="
  border:1px solid #ccc;
  border-radius:6px;
  padding:1rem;
  font-size:0.9rem;
  max-width:760px;
  line-height:1.45;
">

  <!-- ─────────────────────── header ─────────────────────── -->
  <h2 style="margin-top:0">LearningToOptimize&nbsp;Organization</h2>

  <p>
    <strong>LearningToOptimize&nbsp;(L2O)</strong> is a collection of open-source tools
    focused on the emerging paradigm of <em>amortized optimization</em>—using machine-learning
    methods to accelerate traditional constrained-optimization solvers.
    <em>L2O is a work-in-progress; existing functionality is considered experimental and may
    change.</em>
  </p>

  <!-- ─────────────────── repositories table ──────────────── -->
  <h3>Open-Source&nbsp;Repositories</h3>

  <table style="border-collapse:collapse;width:100%">
    <tbody>
      <tr>
        <td style="padding:4px 6px;vertical-align:top;">
          <a href="https://github.com/LearningToOptimize/LearningToOptimize.jl"
             target="_blank">LearningToOptimize.jl</a>
        </td>
        <td style="padding:4px 6px;">
          Flagship Julia package that wraps data generation, training loops and evaluation
          utilities for fitting surrogate models to parametric optimization problems.
        </td>
      </tr>

      <tr>
        <td style="padding:4px 6px;vertical-align:top;">
          <a href="https://github.com/andrewrosemberg/DecisionRules.jl"
             target="_blank">DecisionRules.jl</a>
        </td>
        <td style="padding:4px 6px;">
          Build decision rules for multistage stochastic programs, as proposed in
          <a href="https://arxiv.org/pdf/2405.14973" target="_blank"><em>Efficiently
          Training Deep-Learning Parametric Policies using Lagrangian Duality</em></a>.
        </td>
      </tr>

      <tr>
        <td style="padding:4px 6px;vertical-align:top;">
          <a href="https://github.com/LearningToOptimize/L2OALM.jl"
             target="_blank">L2OALM.jl</a>
        </td>
        <td style="padding:4px 6px;">
          Implementation of the primal-dual learning method <strong>ALM</strong>,
          introduced in
          <a href="https://ojs.aaai.org/index.php/AAAI/article/view/25520" target="_blank">
          <em>Self-Supervised Primal-Dual Learning for Constrained Optimization</em></a>.
        </td>
      </tr>

      <tr>
        <td style="padding:4px 6px;vertical-align:top;">
          <a href="https://github.com/LearningToOptimize/L2ODLL.jl"
             target="_blank">L2ODLL.jl</a>
        </td>
        <td style="padding:4px 6px;">
          Implementation of the dual learning method <strong>DLL</strong>,
          proposed in
          <a href="https://neurips.cc/virtual/2024/poster/94146" target="_blank">
          <em>Dual Lagrangian Learning for Conic Optimization</em></a>.
        </td>
      </tr>

      <tr>
        <td style="padding:4px 6px;vertical-align:top;">
          <a href="https://github.com/LearningToOptimize/L2ODC3.jl"
             target="_blank">L2ODC3.jl</a>
        </td>
        <td style="padding:4px 6px;">
          Implementation of the primal learning method <strong>DC3</strong>, as described in
          <a href="https://openreview.net/forum?id=V1ZHVxJ6dSS" target="_blank">
          <em>DC3: A Learning Method for Optimization with Hard Constraints</em></a>.
        </td>
      </tr>

      <tr>
        <td style="padding:4px 6px;vertical-align:top;">
          <a href="https://github.com/LearningToOptimize/BatchNLPKernels.jl"
             target="_blank">BatchNLPKernels.jl</a>
        </td>
        <td style="padding:4px 6px;">
          GPU kernels that evaluate objectives, Jacobians and Hessians for
          <strong>batches</strong> of
          <a href="https://github.com/exanauts/ExaModels.jl" target="_blank">ExaModels</a>,
          useful when defining loss functions for large-batch ML predictions.
        </td>
      </tr>

      <tr>
        <td style="padding:4px 6px;vertical-align:top;">
          <a href="https://github.com/LearningToOptimize/BatchConeKernels.jl"
             target="_blank">BatchConeKernels.jl</a>
        </td>
        <td style="padding:4px 6px;">
          GPU kernels for batched cone operations (projections, distances, etc.),
          enabling advanced architectures such as repair layers.
        </td>
      </tr>

      <tr>
        <td style="padding:4px 6px;vertical-align:top;">
          <a href="https://github.com/LearningToOptimize/LearningToControlClass"
             target="_blank">LearningToControlClass</a>
        </td>
        <td style="padding:4px 6px;">
          Course repository for <em>Special Topics on Optimal Control &amp; Learning</em>
          (Fall 2025, Georgia Tech).
        </td>
      </tr>
    </tbody>
  </table>

  <!-- ─────────────── datasets and weights ──────────────── -->
  <h3 style="margin-top:1.25rem;">Open Datasets and Weights</h3>

  <p>
    The
    <a href="https://huggingface.co/LearningToOptimize" target="_blank">
    LearningToOptimize&nbsp;🤗 Hugging Face organization</a>
    hosts datasets and pre-trained weights that can be used with L2O packages.
  </p>

</div>
""")
  ╠═╡ =#

# ╔═╡ c08f511e-b91d-4d17-a286-96469c31568a
md"## Example: Robotic Arm Manipulation"

# ╔═╡ b3129bcb-c24a-4faa-a5cf-f69ce518ea87
begin
	load(joinpath(class_dir, "nlp_robot_arm.png"))
end

# ╔═╡ c1f43c8d-0616-4572-bb48-dbb71e40adda
md"""
[^ArmManip]

The tip of the second link is computed using the direct geometric model:

```math
p(\theta_{1},\theta_{2}) \;=\;
\begin{cases}
x = L_{1}\,\sin\theta_{1} \;+\; L_{2}\,\sin\!\bigl(\theta_{1}+\theta_{2}\bigr),\\[6pt]
y = L_{1}\,\cos\theta_{1} \;+\; L_{2}\,\cos\!\bigl(\theta_{1}+\theta_{2}\bigr).
\end{cases}
\tag{1}
```
"""

# ╔═╡ 57d896ca-221a-4cfc-b37a-be9898fac923
begin
md"""

**State**
```math
  \mathbf{x}_t=\begin{bmatrix}\theta_{1,t}&\theta_{2,t}&\dot\theta_{1,t}&\dot\theta_{2,t}\end{bmatrix}^{\!\top}
```

**Control**
```math
  \mathbf{u}_t=\boldsymbol\tau_t=\begin{bmatrix}\tau_{1,t}&\tau_{2,t}\end{bmatrix}^{\!\top}
```

**Dynamics** (Euler sample time Δt)
```math
  \mathbf{x}_{t+1}=f_d(\mathbf{x}_t,\mathbf{u}_t)
  \;\;\equiv\;
  \begin{bmatrix}
  \boldsymbol\theta_t+\Delta t\,\dot{\boldsymbol\theta}_t\\[2pt]
  \dot{\boldsymbol\theta}_t+\Delta t\,\mathcal{M}^{-1}(\boldsymbol\theta_t)(B(\boldsymbol\theta_t)\boldsymbol\tau_t + F(w_t) - C(\boldsymbol\theta_t,\boldsymbol{\dot\theta})\bigr)
  \end{bmatrix}
```

**Stage cost**

```math
c(\mathbf{x}_t,\mathbf{u}_t)=
\underbrace{\|p(\boldsymbol\theta_t)-p_{\text{target}}\|_2^{2}}_{\text{tracking}}
+\;\lambda_\tau\|\boldsymbol\tau_t\|_2^{2}\;,
\qquad \lambda_\tau>0 .
```

Terminal cost
$V_T(\mathbf{x}_T)=\|p(\boldsymbol\theta_T)-p_{\text{target}}\|_2^{2}$.

**Constraints**

```math
h(\mathbf{x}_t,\mathbf{u}_t)\ge 0\;:\;
\begin{cases}
\theta_{\min}\le\boldsymbol\theta_t\le\theta_{\max} &\text{(joint limits)}\\
|\dot{\boldsymbol\theta}_t|\le\dot\theta_{\max} &\text{(velocity limits)}\\
|\boldsymbol\tau_t|\le\tau_{\max} &\text{(actuator limits)}
\end{cases}
```

"""
end

# ╔═╡ 52005382-177b-4a11-a914-49a5ffc412a3
section_outline(md"A Crash Course:",md" (Continuous-Time) Dynamics
")

# ╔═╡ 8ea866a6-de0f-4812-8f59-2aebec709243
md"

The general form for the Continuous-Time Dynamics of a smooth system:

```math
\dot{x} = f(x,u) \quad \text{First-Order Ordinary Differential Equation (ODE)}
```
where
```math
\begin{cases}
f: \mathbb{R}^{n} \times \mathbb{R}^{m} \rightarrow \mathbb{R}^{n} & \text{Dynamics} \\
x \in \mathbb{R}^{n} & \text{State} \\
u \in \mathbb{R}^{m} & \text{Control} \\
\dot{x} \in \mathbb{R}^{n} & \text{Time derivative of } x \\
\end{cases}
```
"

# ╔═╡ 2be161cd-2d4c-4778-adca-d45f8ab05f98
Foldable(md"What would $F=ma$ be?", md"""

A $2^{\text{nd}}$--Order ODE! But we can always write them as $1^{\text{st}}$--Order.
For a mechanical system:

```math
x=\begin{bmatrix}
q \\
\dot{q}=v
\end{bmatrix} \implies
\dot{x}=\begin{bmatrix}
v \\
\dot{v}=a
\end{bmatrix}
```
where
```math
\begin{cases}
q & \text{Configuration/Pose} \\
v & \text{Velocity/Angular-Velocity}
\end{cases}
```

**$q$ is not always a vector -- but a `Lie group / Differentiable Manifold`. Examples?**

Even if $q$ is not a vector, $v$ is!

""")

# ╔═╡ b452ee52-ee33-44ad-a980-6a6e90954ee1
md"State $x$ is everything you need to define to determine the how your system will progress through time--The initial conditions / time-varying constraints of your problem.
"

# ╔═╡ 9f62fae9-283c-44c3-8d69-29bfa90faf29
md"### Example: Pendulum"

# ╔═╡ baa3993c-96b0-474e-b5b4-f9eaea642a49
function pendulum(θ_deg = 60; L = 4, fsize = (520, 450), _xlims=nothing, _ylims=(-5, 5))
    θ       = deg2rad(θ_deg)
    pivot   = Point2f(0, 0)
    mass    = Point2f(-L*sin(θ), -L*cos(θ))        # rod tip
    rodϕ    = -π/2 -θ               # rod’s actual angle (≈ -120° here)

    fig = Figure(size = fsize)
    ax  = Axis(fig[1, 1];
               aspect         = 1,
               xticksvisible  = false,
               yticksvisible  = false,
               xgridvisible   = false,
               ygridvisible   = false)
    hidespines!(ax)
    if !isnothing(_ylims)
	    ylims!(ax, _ylims)
    end

	if !isnothing(_xlims)
		xlims!(ax, _xlims)
	end

    ## ceiling ------------------------------------------------------------------
    lines!(ax, [-5,  5], [0, 0]; linewidth = 3)
    foreach(x -> lines!(ax, [x, x], [0, 0.4]; linewidth = 2), -4.5:1:4.5)

    ## vertical reference -------------------------------------------------------
    lines!(ax, [0, 0], [0, -L - 1]; linestyle = :dash)

    ## rod + “ℓ” ----------------------------------------------------------------
    lines!(ax, [pivot[1], mass[1]], [pivot[2], mass[2]]; linewidth = 3)
    mid = 0.6 .* (pivot .+ mass) .+ Point2f(0.25, 0.5)
    text!(ax, mid, text = "ℓ", fontsize = 18)

    ## angle arc ---------------------------------------------------------------
    r  = 0.2L
    ts = range(-π/2, rodϕ; length = 60)             # sweep **toward the rod**
    lines!(ax, r .* cos.(ts), r .* sin.(ts); linewidth = 2)
    text!(ax, Point2f(r*0.05, -0.9r), text = "θ", fontsize = 18)

    ## mass ---------------------------------------------------------------------
    scatter!(ax, [mass]; markersize = 55, color = :white, strokewidth = 3)
    text!(ax, mass, text = "m", align = (:center, :center))

    fig
end

# ╔═╡ 9ec1f918-ff16-4a94-b75f-4b07e2931d4c
@bind θ PlutoUI.Slider(0:1:360, default = 60, show_value = x-> "θ = $(x)")

# ╔═╡ 2f42d32e-8e53-458a-816e-292861a8b8ef
pendulum(θ)

# ╔═╡ ab369bb9-ecce-4c7b-b082-d6ae49beafe8
Foldable(md"How do we write the dynamics?", md"""

The $2^{\text{nd}}$--Order ODE:
```math
m \cdot l^{2} \cdot \ddot{\theta} + m \cdot g \cdot l \cdot sin(\theta) = u
```
where
```math
\begin{cases}
m & \text{Mass} \\
l & \text{Length of the pole} \\
\theta & \text{Pole angular position} \\
g & \text{Gravity} \\
u & \text{Torque exerted at axis}
\end{cases}
```

""")

# ╔═╡ bd1b6301-0b4d-4f94-81bb-e0267792aca0
Foldable(md"How to write it as a $1^{\text{st}}$--Order ODE:?", md"""

```math
x=\begin{bmatrix}
\theta \\
\dot{\theta}
\end{bmatrix} \implies
\dot{x}=\begin{bmatrix}
\dot{\theta} \\
\ddot{\theta}
\end{bmatrix} =
\begin{bmatrix}
\dot{\theta} \\
\frac{-g sin(\theta)}{l} + \frac{u}{ml^{2}}
\end{bmatrix}
```
**Angles are not in** $\mathbb{R}$! In fact:
```math
\begin{cases}
e^{i\theta} \in S^{1} & \text{Configuration in the Circle Group} \\
\dot{\theta} \in \mathbb{R} \\
x \in S^{1} \times \mathbb{R} & \text{Cylinder}
\end{cases}
```

""")

# ╔═╡ 4d598933-05a9-44fa-b5a7-f7e1c7afb094
md"## Control--Affine Systems

Non--linear Systems of the form:
```math
\dot{x} = \underbrace{f_{o}(x)}_{\text{drift}} +  \underbrace{B(x)}_{\text{input Jacobian}}u
```

 $\implies$ Non--linear in the state but affine in the input/control.

Control--Affine Systems are common in many mechanical systems.

"

# ╔═╡ 5f408845-7870-425b-af53-b9e2a8d0c2ea
Foldable(md"Pendulum?", md"""

```math
f_{o}(x)=\begin{bmatrix}
\dot{\theta} \\
\frac{-g sin(\theta)}{l}
\end{bmatrix},\quad
B(x)=\begin{bmatrix}
0 \\
\frac{1}{ml^{2}}
\end{bmatrix}
```
""")

# ╔═╡ 962b427e-3712-4b7f-b971-5c29be434dca
Foldable(md"What happens if $B(x)$ is full rank?", md"""

Habemus a fully--actuated system! We can easily solve for $u$:

```math
u = B(x)^{-1}(\dot{x} - f_{o}(x))
```

> A system where the number of actuators (or control inputs) is equal to the number of degrees of freedom (DOF) of the system.

See **Feedback linearization** approaches.
""")

# ╔═╡ f10927fe-d392-4374-bad1-ab5ac85b8116
md"## Manipulator Dynamics

```math
\begin{cases}
M(q) \dot{v} +  C(q,v) + B(q)u + F \\
\dot{q} = G(q)v & \text{(Velocity Kinematics)}
\end{cases} \qquad \qquad \qquad \qquad \qquad \qquad \qquad
```
```math
\qquad \implies
\qquad \dot{x} = f(x,u) =\begin{bmatrix}
G(q)v \\
M(q)^{-1}(B(q)u + F - C(q,v))
\end{bmatrix}
```
where
```math
\begin{cases}
M(q) & \text{Mass Matrix / Generalized Inertia Tensor} \\
C(q,v) & \text{Dynamics Bias (Corriolis, Gravity)} \\
B(q) & \text{Input Jacobian} \\
F & \text{External Forces}
\end{cases}
```
"

# ╔═╡ b8b206ef-cdc5-4cc9-9b55-70d711ba2a9e
Foldable(md"Pendulum?", md"""

```math
M(q) = ml^2, \; C(q,v) = mgl\sin(\theta), \; B=I, \; G=I
```
""")

# ╔═╡ a09de9e4-7ecc-4d23-9135-384077f0c03f
Foldable(md"All mechanical systems can be written this way. Why?", md"""

Manipulator Dynamics Equations are a way of rewriting the Euler--Lagrange equations.


> The equations were discovered in the 1750s by Swiss mathematician Leonhard Euler and Italian mathematician Joseph-Louis Lagrange.

""")

# ╔═╡ 5a691d10-44f7-4d44-a2c9-a7d4d720b7cc
begin
md"""
#### 🚀 Detour: The Principle of Least Action 🚀

In the calculus of variations and classical mechanics, the Euler–Lagrange equations are a system of second-order ordinary differential equations whose solutions are stationary points of the given action functional:

```math
\mathcal{S}[q(\cdot)] \;=\;
\int_{t_0}^{t_f} L\!\bigl(q(t),\; \dot q(t)\bigr)\,dt,
```

In classical mechanics:

```math
L = \underbrace{\frac{1}{2} v^{\top}M(q)v}_{\text{Kinematic Energy}} - \underbrace{U(q)}_{\text{Potential Energy}}
```

"""
end

# ╔═╡ f3d155c6-5384-481a-8373-582e753ea8d6
question_box(md"What can you say about $M(q)$? When do we have a problem inverting it?")

# ╔═╡ ee5c5e2e-e9f1-4f94-95c9-21d506281ae1
md"""
A curve ($q^\star(t)$) is physically realised iff it is a stationary
point of ($\mathcal{S}$) :

```math
\delta\mathcal{S}=0
\;\;\Longrightarrow\;\;
\frac{d}{dt}\!\bigl(\tfrac{\partial L}{\partial\dot q}\bigr)
- \frac{\partial L}{\partial q}=0
\quad\Longrightarrow\quad
M(q)\,\ddot q + C(q,\dot q)\,\dot q + \nabla U(q)=0 .
```

"""

# ╔═╡ b9aeab8a-f8ea-4310-8568-5d6bda0bb4d3
question_box(md"Can you derive the stationary condition?")

# ╔═╡ 30a013a8-c02e-4816-af0d-9280473c916b
md"""
In most cases:
```math
q^{*} \in \arg \min_{q}
\int_{t_0}^{t_f} L\!\bigl(q(t),\; \dot q(t)\bigr)\,dt,
```

Now, suppose the configuration must satisfy a *gap function*
$\phi(q)\ge 0$ (e.g. **contact with the ground**, obstacle avoidance, joint
limits).
The variational problem becomes

```math
q^{*} \;\in\;
\arg\!\min_{q(\cdot)}
\int_{t_0}^{t_f} L\!\bigl(q(t),\dot q(t)\bigr)\,dt
\quad\text{s.t.}\quad
\phi\!\bigl(q(t)\bigr)\;\ge 0 \;\;\;\forall\,t.
```

Let $(t_k = t_0 + k,\Delta t)$ with $(k=0,\dots,N)$ and
$(q_k \approx q(t_k))$.
Using the midpoint rule we approximate the action by

```math
S_N(q_{0:N})
\;=\;
\sum_{k=0}^{N-1}
L\!\Bigl(
      \tfrac12\bigl(q_k+q_{k+1}\bigr),\;
      \tfrac{q_{k+1}-q_k}{\Delta t}
\Bigr)\,\Delta t,
```

and obtain the finite‐dimensional problem

```math
\begin{aligned}
\min_{q_1,\dots,q_{N}}
& \; S_N(q_{0:N}) \\[4pt]
\text{s.t.}\;&\;
   \phi(q_{k+1}) \;\ge 0,
   \qquad k = 0,\dots,N-1.
\end{aligned}
```

"""

# ╔═╡ 2cc57795-717a-46f0-9bb5-67b601a766de
begin
	gif_url   = "https://raw.githubusercontent.com/dojo-sim/Dojo.jl/main/docs/src/assets/animations/atlas_drop.gif"
	still_url = "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcQkrtL7TCGzNxFlXIqYHW_cFP9pfLscwd7vLSH09nfRFEQCqX_J"
	md""
end

# ╔═╡ 59f6167d-796c-4844-89c0-c796fb59aa2e
Columns(md"[^ZachMIT]", md"▶/⏸$(@bind playing CheckBox(default=false))")

# ╔═╡ 58c2e1f2-819d-40fc-8e92-03a1a3019a3d
Columns(md"""
$(load(joinpath(class_dir, "rocket_physics.png")))

#### Dojo.jl

A differentiable physics engine for robotics that simulates systems using optimization.

- [ArXiv preprint](https://arxiv.org/abs/2203.00806)
- [GitHub](https://github.com/dojo-sim/Dojo.jl)

"""
,
@htl """
<img src="$(playing ? gif_url : still_url)"
	 width="800" height="600"
	 style="object-fit:contain;" />
"""
)

# ╔═╡ 70690e72-c31e-4c91-b211-35c74d1d9973
warning_box(md"But in general we need a *ReFeynman* of the these equations!")

# ╔═╡ 5f35a169-887f-477f-b010-167627f7ce4c
md"## (State–Space) Linear Systems

A system is **Continuous Linear** (CLTV / CLTI) if it can be written as:

```math
\dot{x} = A_{t}x + B_{t}u
```

In state–space literature we often also see a **Output Equation**:
```math
y = C_{t}x + D_{t}u
```

but we will neglect it for now.
"

# ╔═╡ 5c8f6256-e818-4aa1-aea0-02422df8f77c
Foldable(md" When do we have a Time--Invariant (TI) vs Time--Variant (TV)?", md"""

When (A,B) are constant we have an LTI system; otherwise it is LTV.

""")

# ╔═╡ a3f47dad-3cfa-4f6d-9dc6-d4b09d209f86
md"
**Non--Linear Systems are often approximated by Linear Systems (locally).**
"

# ╔═╡ e860d92b-cc8f-479b-a0fc-e5f7a11ae1fd
Foldable(md" $\dot{x} = f(x,u) \; \implies \; A=? \; B=?$", md"""

Suppose now that we apply our dynamics equation to an input:

```math
u(t) = u_{eq} + \delta u(t), \quad t \ge 0
```
where $u_{eq}$ is an fixed input and $\delta u(t)$ is a perturbation function such that the input is close
but not equal to $u_{eq}$ and similarly we perturb the initial condition:

```math
x(0) = x_e + \delta x(0)
```

We will define the deviation from the reference state as:
```math
\delta x(t) = x(t) - x_e, \quad t \ge 0
```

To determine the evolution of $\delta x(t)$, we can expand the dynamics around the reference point using a Taylor expansion:

```math
\dot{\delta x}(t) = f(x_e + \delta x(t), u_{eq} + \delta u(t))
```
```math
=\frac{\partial f}{\partial x}\bigg|_{(x_e, u_{eq})} \delta x(t) + \frac{\partial f}{\partial u}\bigg|_{(x_e, u_{eq})} \delta u(t) + \mathcal{O}(\|\delta x\|^2) + \mathcal{O}(\|\delta u\|^2)
```

Considering just the first-order terms we obtain:

```math
A= \frac{\partial f}{\partial x}|_{(x_e,u_e)}
, \quad B= \frac{\partial f}{\partial u}|_{(x_e,u_e)}
```

**Attention!** The linearization describes perturbations around the reference $(x_e,u_e)$; it is valid only while $\|\delta x\|$ and $\|\delta u\|$ remain small.

""")

# ╔═╡ bb4bfa72-bf69-41f5-b017-7cbf31653bae
Foldable(md"Why approximate? What happens to the optimal control problem?", md"""

The problem becomes convex!!

""")

# ╔═╡ 2936c97e-a407-4e56-952f-0a2dfb7acf83
md"""## Equilibria

A **Equilibrium** point $(x_{\mathrm{eq}},u_{\mathrm{eq}})$ is one at which the system is and will remain at "rest":

```math
\dot{x} = f(x_{\mathrm{eq}},u_{\mathrm{eq}}) = 0
```

The root of the dynamic equations!

In this case,

```math
x(t) = x_{\mathrm{eq}}, \; u(t) = u_{\mathrm{eq}} \; \forall t
```
"""

# ╔═╡ 1a154d04-2b33-43b6-9cb6-accd935de7b7
Foldable(md"Pendulum?", md"""

```math
\dot{x} =
\begin{bmatrix}
\dot{\theta} \\
\frac{-g sin(\theta)}{l}
\end{bmatrix}=
\begin{bmatrix}
0 \\
0
\end{bmatrix}
\implies
\begin{cases}
\dot{\theta} = 0 & \text{No Velocity} \\
\theta = 0, \; \pi, \dots
\end{cases}
```
$([pendulum(0; fsize=(250,250), L=4), pendulum(180; fsize=(250,250), L=4)])
""")

# ╔═╡ 593e2764-7e77-4756-ae62-cfc3eb039444
question_box(md"### Can I use control to move the equilibria?")

# ╔═╡ 17939d59-1ba1-483c-864c-fed049b54151
Columns(md"""

How about if I want $\theta = \pi / 2$ ?

```math
\begin{cases}
\theta = \pi / 2 \\
\dot{x} =
\begin{bmatrix}
\dot{\theta} \\
\frac{-g sin(\theta)}{l} + \frac{u}{ml^{2}}
\end{bmatrix}=
\begin{bmatrix}
0 \\
0
\end{bmatrix}
\end{cases}
```
```math
\implies \frac{u}{ml^{2}} = \frac{g sin(\pi / 2)}{l}
```
```math
\implies u = m\,g\,l
```
""",
pendulum(90; fsize=(250,250), L=4)
)


# ╔═╡ aa63e35d-13dd-4910-b2fd-be017cda4b55
md"
In general, we get a root finding problem in u:

```math
f(x^{*},u) = 0
```

> You can see control as changing a vector filed into a chosen dynamics[^cmu]
"

# ╔═╡ b180beb7-9606-4332-8e94-cd4546b4bc59
md"""
## Stability of Equilibria

When will the system stay "near" an equilibrium point under pertubations?
"""

# ╔═╡ 0e29ab58-e56c-4f54-aa2a-3152034ddc0c
md"### 1--D System"

# ╔═╡ d0d251ec-4ea9-417a-90c2-3f19f4b75aa8
md"""
 Outer points: $(@bind var1 CheckBox()) | Inner: $(@bind var2 CheckBox())
"""

# ╔═╡ 4f69216c-fc31-45d5-9699-c774f9f77a24
begin
	import Plots: plot, hline!, vline!, plot!
	f(x) = x^3 - 3*x
	plt = plot(range(-2.2,2.2, 1000),f, label="ẋ = x³ - 3x", xlabel="x",
			   ylabel="ẋ")
	hline!(plt, [0], label="", color=:black, style=:dash)
	vline!(plt, [0], label="", color=:black, style=:dash)
	if var2
		plot!(plt, [0.5,0.1], [0.2, 0.2],arrow=true,color=:green,linewidth=2,label="")
		plot!(plt, [-0.5,-0.1], [0.2, 0.2],arrow=true,color=:green,linewidth=2,label="")
	end
	if var1
		plot!(plt, [1.9,2.4], [0.2, 0.2],arrow=true,color=:red,linewidth=2,label="")
		plot!(plt, [1.6,1.1], [0.2, 0.2],arrow=true,color=:red,linewidth=2,label="")
		plot!(plt, [-1.9,-2.4], [0.2, 0.2],arrow=true,color=:red,linewidth=2,label="")
		plot!(plt, [-1.6,-1.1], [0.2, 0.2],arrow=true,color=:red,linewidth=2,label="")
	end