Skip to content

Commit d712067

Browse files
committed
Create cnn.tex
1 parent 76b037b commit d712067

1 file changed

Lines changed: 359 additions & 0 deletions

File tree

doc/src/week4/Latexslides/cnn.tex

Lines changed: 359 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,359 @@
1+
\documentclass{beamer}
2+
\usetheme{Madrid}
3+
4+
\usepackage{amsmath,amssymb,amsfonts,bm,mathtools}
5+
\usepackage{physics}
6+
\usepackage{bbm}
7+
\usepackage{mathrsfs}
8+
9+
\title{The Mathematics of Convolutional Neural Networks}
10+
\subtitle{Operator Theory, Symmetry, PDE Limits, and Expressivity}
11+
\author{Morten Hjorth-Jensen}
12+
\date{Spring 2026}
13+
14+
\begin{document}
15+
16+
%=========================================================
17+
\begin{frame}
18+
\titlepage
19+
\end{frame}
20+
21+
%=========================================================
22+
\section*{Lecture 1: Convolution as an Operator}
23+
24+
%---------------------------------------------------------
25+
\begin{frame}{Linear Shift-Invariant Operators}
26+
Let $T_a f(x)=f(x-a)$ on $\ell^2(\mathbb{Z}^d)$.
27+
28+
\textbf{Definition:}
29+
$L$ is shift-invariant if
30+
\[
31+
L T_a = T_a L.
32+
\]
33+
34+
\textbf{Theorem (Structure theorem):}
35+
Every bounded linear shift-invariant operator
36+
\[
37+
L:\ell^2(\mathbb{Z}^d)\to\ell^2(\mathbb{Z}^d)
38+
\]
39+
is convolution with some $k\in\ell^1$.
40+
41+
\[
42+
(Lf)(x)=\sum_y k(x-y)f(y).
43+
\]
44+
\end{frame}
45+
46+
%---------------------------------------------------------
47+
\begin{frame}{Proof Sketch}
48+
Let $\delta_0$ be Kronecker delta.
49+
50+
Define
51+
\[
52+
k(x)=L\delta_0(x).
53+
\]
54+
55+
Using shift invariance:
56+
\[
57+
L\delta_y = T_y L\delta_0 = k(\cdot - y).
58+
\]
59+
60+
For general $f$,
61+
\[
62+
Lf=\sum_y f(y)L\delta_y
63+
=\sum_y f(y)k(\cdot - y).
64+
\]
65+
66+
Hence $Lf=f*k$.
67+
\end{frame}
68+
69+
%---------------------------------------------------------
70+
\begin{frame}{Fourier Diagonalization}
71+
Fourier transform:
72+
\[
73+
\hat f(\omega)=\sum_x f(x)e^{-i\omega x}.
74+
\]
75+
76+
Then
77+
\[
78+
\widehat{f*k}=\hat k \hat f.
79+
\]
80+
81+
Thus convolution operators are diagonal in Fourier basis.
82+
83+
Eigenfunctions:
84+
\[
85+
e^{i\omega x}
86+
\]
87+
with eigenvalues $\hat k(\omega)$.
88+
\end{frame}
89+
90+
%=========================================================
91+
\section*{Lecture 2: Variational and Functional View}
92+
93+
%---------------------------------------------------------
94+
\begin{frame}{CNN Layers as Nonlinear Operators}
95+
A CNN layer:
96+
\[
97+
\Phi(f)=\sigma(K*f).
98+
\]
99+
100+
Interpret as operator on Banach space
101+
\[
102+
\Phi: L^p(\Omega)\to L^p(\Omega).
103+
\]
104+
\end{frame}
105+
106+
%---------------------------------------------------------
107+
\begin{frame}{Variational Interpretation}
108+
Consider functional
109+
\[
110+
\mathcal{J}(f)=\int_\Omega |f(x)|^2 dx.
111+
\]
112+
113+
Convolution acts as quadratic form:
114+
\[
115+
\langle f, K*f\rangle
116+
=
117+
\int \hat k(\omega)|\hat f(\omega)|^2 d\omega.
118+
\]
119+
120+
Thus kernels define spectral penalties.
121+
\end{frame}
122+
123+
%---------------------------------------------------------
124+
\begin{frame}{Expressivity Theorem}
125+
\textbf{Theorem:}
126+
Deep convolutional networks with ReLU activation approximate any continuous translation-equivariant map
127+
\[
128+
F:C(\Omega)\to C(\Omega).
129+
\]
130+
131+
Idea:
132+
\begin{itemize}
133+
\item Finite receptive fields
134+
\item Increasing depth expands support
135+
\end{itemize}
136+
\end{frame}
137+
138+
%=========================================================
139+
\section*{Lecture 3: Continuous Limit and Neural PDEs}
140+
141+
%---------------------------------------------------------
142+
\begin{frame}{Continuous Convolution}
143+
In continuum:
144+
\[
145+
(K*f)(x)=\int K(x-y)f(y)dy.
146+
\]
147+
148+
For small kernels:
149+
\[
150+
K(x)=\delta(x)+\epsilon \kappa(x).
151+
\]
152+
153+
Then
154+
\[
155+
K*f=f+\epsilon \kappa*f.
156+
\]
157+
\end{frame}
158+
159+
%---------------------------------------------------------
160+
\begin{frame}{Diffusion Limit}
161+
If $\kappa$ symmetric and localized:
162+
163+
\[
164+
\kappa*f \approx c \Delta f.
165+
\]
166+
167+
Thus one layer:
168+
\[
169+
f_{l+1}=f_l + \epsilon c \Delta f_l.
170+
\]
171+
172+
In deep limit:
173+
\[
174+
\partial_t f = c \Delta f.
175+
\]
176+
177+
CNN approximates diffusion PDE.
178+
\end{frame}
179+
180+
%---------------------------------------------------------
181+
\begin{frame}{Neural ODE Limit}
182+
Let depth $\to\infty$, step $\to0$:
183+
184+
\[
185+
f_{l+1}-f_l = h \Phi(f_l)
186+
\]
187+
188+
Continuous limit:
189+
\[
190+
\partial_t f = \Phi(f).
191+
\]
192+
193+
Thus CNN approximates nonlinear PDE.
194+
\end{frame}
195+
196+
%=========================================================
197+
\section*{Lecture 4: Renormalization Group View}
198+
199+
%---------------------------------------------------------
200+
\begin{frame}{Hierarchical Representation}
201+
CNN layers increase receptive field.
202+
203+
Analogy:
204+
\[
205+
\text{RG step: integrate short-scale modes}
206+
\]
207+
208+
Pooling:
209+
\[
210+
f(x) \mapsto f(2x)
211+
\]
212+
213+
acts as coarse-graining.
214+
\end{frame}
215+
216+
%---------------------------------------------------------
217+
\begin{frame}{Scale Decomposition}
218+
Fourier perspective:
219+
220+
Early layers: high frequency filtering.
221+
222+
Deeper layers: low frequency structure.
223+
224+
This mirrors Wilsonian RG:
225+
\[
226+
\Lambda \to \Lambda/b.
227+
\]
228+
\end{frame}
229+
230+
%=========================================================
231+
\section*{Lecture 5: Group Equivariance}
232+
233+
%---------------------------------------------------------
234+
\begin{frame}{General Group Convolution}
235+
For group $G$:
236+
237+
\[
238+
(f * k)(g)=\int_G f(h)k(h^{-1}g)dh.
239+
\]
240+
241+
Equivariant if
242+
\[
243+
\Phi(L_g f)=L_g\Phi(f).
244+
\]
245+
\end{frame}
246+
247+
%---------------------------------------------------------
248+
\begin{frame}{Representation Theory}
249+
Decompose
250+
\[
251+
L^2(G)=\bigoplus_{\pi} V_\pi.
252+
\]
253+
254+
Equivariant operators block-diagonal in irreducible representations.
255+
256+
Explains:
257+
\begin{itemize}
258+
\item SO(3) CNNs
259+
\item Gauge equivariant nets
260+
\end{itemize}
261+
\end{frame}
262+
263+
%=========================================================
264+
\section*{Lecture 6: Fourier Neural Operators}
265+
266+
%---------------------------------------------------------
267+
\begin{frame}{FNO Definition}
268+
FNO layer:
269+
\[
270+
f_{l+1}(x)=\sigma\left(\mathcal{F}^{-1}\left(R(\omega)\hat f(\omega)\right)\right).
271+
\]
272+
273+
Truncated Fourier modes.
274+
275+
Global operator learning.
276+
\end{frame}
277+
278+
%---------------------------------------------------------
279+
\begin{frame}{Comparison to CNN}
280+
CNN:
281+
\[
282+
\text{local kernel}
283+
\]
284+
285+
FNO:
286+
\[
287+
\text{global spectral multiplier}
288+
\]
289+
290+
CNN approximates local PDEs.
291+
292+
FNO approximates integral operators.
293+
\end{frame}
294+
295+
%=========================================================
296+
\section*{Lecture 7: Optimization and Stability}
297+
298+
%---------------------------------------------------------
299+
\begin{frame}{Gradient of Convolution}
300+
\[
301+
\frac{\partial \mathcal{L}}{\partial K}
302+
=
303+
f * \delta.
304+
\]
305+
306+
Adjoint:
307+
\[
308+
\tilde K(x)=K(-x).
309+
\]
310+
\end{frame}
311+
312+
%---------------------------------------------------------
313+
\begin{frame}{Stability}
314+
If
315+
\[
316+
\|\hat k\|_\infty < 1
317+
\]
318+
319+
then convolution is contraction.
320+
321+
Deep nets stable under spectral normalization.
322+
\end{frame}
323+
324+
%=========================================================
325+
\section*{Exercises}
326+
327+
%---------------------------------------------------------
328+
\begin{frame}{Exercise 1}
329+
Prove that every bounded translation-invariant operator on $\ell^2$ is convolution.
330+
\end{frame}
331+
332+
%---------------------------------------------------------
333+
\begin{frame}{Exercise 2}
334+
Show diffusion limit of symmetric kernel explicitly via Taylor expansion.
335+
\end{frame}
336+
337+
%---------------------------------------------------------
338+
\begin{frame}{Exercise 3}
339+
Derive continuous depth limit leading to reaction-diffusion PDE.
340+
\end{frame}
341+
342+
%---------------------------------------------------------
343+
\begin{frame}{Exercise 4}
344+
Show that group convolution preserves equivariance.
345+
\end{frame}
346+
347+
%---------------------------------------------------------
348+
\begin{frame}{Summary}
349+
CNNs are:
350+
\begin{itemize}
351+
\item Convolution operators
352+
\item Spectral filters
353+
\item Nonlinear PDE discretizations
354+
\item Hierarchical renormalization flows
355+
\item Symmetry-preserving operator learners
356+
\end{itemize}
357+
\end{frame}
358+
359+
\end{document}

0 commit comments

Comments
 (0)