This repository has been archived on 2023-10-20. You can view files and clone it, but cannot push or open issues or pull requests.
probability-theory/inputs/lecture_13.tex

306 lines
12 KiB
TeX
Raw Normal View History

2023-06-07 18:13:25 +02:00
\lecture{13}{2023-05}{}
2023-05-23 17:10:43 +02:00
%The difficult part is to show \autoref{levycontinuity}.
%This is the last lecture, where we will deal with independent random variables.
We have seen, that
if $X_1, X_2,\ldots$ are i.i.d.~with $ \mu = \bE[X_1]$,
$\sigma^2 = \Var(X_1)$,
then $\frac{\sum_{i=1}^{n} (X_i - \mu)}{\sigma \sqrt{n} } \xrightarrow{(d)} \cN(0,1)$.
\begin{question}
What happens if $X_1, X_2,\ldots$ are independent, but not identically distributed? Do we still have a CLT?
\end{question}
\begin{theorem}[Lindeberg CLT]
\label{lindebergclt}
Assume $X_1, X_2, \ldots,$ are independent (but not necessarily identically distributed) with $\mu_i = \bE[X_i] < \infty$ and $\sigma_i^2 = \Var(X_i) < \infty$.
Let $S_n = \sqrt{\sum_{i=1}^{n} \sigma_i^2}$
2023-05-25 00:33:14 +02:00
and assume that
2023-07-12 23:52:51 +02:00
\[\lim_{n \to \infty} \frac{1}{S_n^2} \sum_{i=1}^{n} \bE\left[
(X_i - \mu_i)^2 \One_{|X_i - \mu_i| > \epsilon S_n}
\right] = 0\]
2023-05-25 00:33:14 +02:00
for all $\epsilon > 0$
(\vocab{Lindeberg condition}\footnote{``The truncated variance is negligible compared to the variance.''}).
2023-05-23 17:10:43 +02:00
Then the CLT holds, i.e.~
\[
\frac{\sum_{i=1}^n (X_i - \mu_i)}{S_n} \xrightarrow{(d)} \cN(0,1).
2023-07-06 00:36:26 +02:00
\]
2023-05-23 17:10:43 +02:00
\end{theorem}
\begin{theorem}[Lyapunov condition]
\label{lyapunovclt}
Let $X_1, X_2,\ldots$ be independent, $\mu_i = \bE[X_i] < \infty$,
$\sigma_i^2 = \Var(X_i) < \infty$
and $S_n \coloneqq \sqrt{\sum_{i=1}^n \sigma_i^2}$.
Then, assume that, for some $\delta > 0$,
\[
2023-07-13 00:17:26 +02:00
\lim_{n \to \infty} \frac{1}{S_n^{2+\delta}}
\sum_{i=1}^{n} \bE[(X_i - \mu_i)^{2 + \delta}] = 0
2023-07-06 00:36:26 +02:00
\]
2023-05-23 17:10:43 +02:00
(\vocab{Lyapunov condition}).
Then the CLT holds.
\end{theorem}
\begin{remark}
The Lyapunov condition implies the Lindeberg condition.
(Exercise).
\end{remark}
2023-05-23 18:02:54 +02:00
We will not prove the \autoref{lindebergclt} or \autoref{lyapunovclt}
2023-05-23 17:10:43 +02:00
in this lecture. However, they are quite important.
We will now sketch the proof of \autoref{levycontinuity},
2023-06-29 22:18:23 +02:00
details can be found in the notes.\notes
2023-07-05 17:53:41 +02:00
\begin{definition}
Let $(X_n)_n$ be a sequence of random variables.
The distribution of $(X_n)_n$ is called
\vocab[Distribution!tight]{tight} (dt. ``straff''),
if
\[
\lim_{a \to \infty} \sup_{n \in \N} \bP[|X_n| > a] = 0.
2023-07-06 00:36:26 +02:00
\]
2023-07-05 17:53:41 +02:00
\end{definition}
\begin{example}+[Exercise 8.1]
\todo{Copy}
\end{example}
2023-05-23 17:10:43 +02:00
A generalized version of \autoref{levycontinuity} is the following:
\begin{theorem}[A generalized version of Levy's continuity \autoref{levycontinuity}]
\label{genlevycontinuity}
Suppose we have random variables $(X_n)_n$ such that
$\bE[e^{\i t X_n}] \xrightarrow{n \to \infty} \phi(t)$ for all $t \in \R$
for some function $\phi$ on $\R$.
Then the following are equivalent:
\begin{enumerate}[(a)]
2023-07-05 17:53:41 +02:00
\item The distribution of $X_n$ is tight.
2023-05-23 17:10:43 +02:00
\item $X_n \xrightarrow{(d)} X$ for some real-valued random variable $X$.
\item $\phi$ is the characteristic function of $X$.
\item $\phi$ is continuous on all of $\R$.
\item $\phi$ is continuous at $0$.
\end{enumerate}
\end{theorem}
2023-07-05 17:53:41 +02:00
\todo{Proof of \autoref{genlevycontinuity} (Exercise 8.2)}
2023-05-23 17:10:43 +02:00
\begin{example}
Let $Z \sim \cN(0,1)$ and $X_n \coloneqq n Z$.
We have $\phi_{X_n}(t) = \bE[[e^{\i t X_n}] = e^{-\frac{1}{2} t^2 n^2} \xrightarrow{n \to \infty} \One_{\{t = 0\} }$.
$\One_{\{t = 0\}}$ is not continuous at $0$.
By \autoref{genlevycontinuity}, $X_n$ can not converge to a real-valued
random variable.
Exercise: $X_n \xrightarrow{(d)} \overline{X}$,
where $\bP[\overline{X} = \infty] = \frac{1}{2} = \bP[\overline{X} = -\infty]$.
Similar examples are $\mu_n \coloneqq \delta_n$ and
$\mu_n \coloneqq \frac{1}{2} \delta_n + \frac{1}{2} \delta_{-n}$.
\end{example}
\begin{example}
Suppose that $X_1, X_2,\ldots$ are i.d.d.~with $\bE[X_1] = 0$.
Let $\sigma^2 \coloneqq \Var(X_i)$.
Then the distribution of $\frac{S_n}{\sigma \sqrt{n}}$ is tight:
\begin{IEEEeqnarray*}{rCl}
\bE\left[ \left( \frac{S_n}{\sqrt{n} }^2 \right)^2 \right] &=&
\frac{1}{n} \bE[ (X_1+ \ldots + X_n)^2]\\
&=& \sigma^2
\end{IEEEeqnarray*}
For $a > 0$, by Chebyshev's inequality, % TODO
we have
\[
\bP\left[ \left| \frac{S_n}{\sqrt{n}} \right| > a \right] \leq \frac{\sigma^2}{a^2} \xrightarrow{a \to \infty} 0.
\]
verifying \autoref{genlevycontinuity}.
\end{example}
\begin{example}
2023-05-25 00:33:14 +02:00
Suppose $C$ is a random variable which is \vocab[Cauchy distribution]{Cauchy distributed}, i.e.~$C$
2023-05-23 17:10:43 +02:00
has probability distribution $f_C(x) = \frac{1}{\pi} \frac{1}{1 + x^2}$.
2023-05-25 00:33:14 +02:00
\begin{figure}[H]
\centering
\begin{tikzpicture}
\begin{axis}[samples=100, smooth]
\addplot[] { (1/3.14159265358979323846) * (1 / ( 1 + x * x))};
\end{axis}
\end{tikzpicture}
\caption{Probability density function of $C$}
\end{figure}
2023-05-23 17:10:43 +02:00
We know that $\bE[|C|] = \infty$.
We have $\phi_C(t) = \bE[e^{\i t C}] = e^{-|t|}$.
Suppose $C_1, C_2, \ldots, C_n$ are i.i.d.~Cauchy distributed
and let $S_n \coloneqq C_1 + \ldots + C_n$.
2023-05-25 00:33:14 +02:00
Exercise: $\phi_{\frac{S_n}{n}}(t) = e^{-|t|} = \phi_{C_1}(t)$, thus $\frac{S_n}{n} \sim C$.
2023-05-23 17:10:43 +02:00
\end{example}
2023-05-23 18:02:54 +02:00
We will prove \autoref{levycontinuity} assuming
\autoref{lec10_thm1}.
2023-06-29 22:18:23 +02:00
\autoref{lec10_thm1} will be shown in the notes.\notes
2023-05-23 18:02:54 +02:00
We will need the following:
\begin{lemma}
\label{lec13_lem1}
Given a sequence $(F_n)_n$ of probability distribution functions,
there is a subsequence $(F_{n_k})_k$ of $F_n$
and a right continuous, non-decreasing function $F$,
such that $F_{n_k} \to F$ at all continuity points of $F$.
(We do not yet claim, that $F$ is a probability distribution function,
as we ignore $\lim_{x \to \infty} F(x)$ and $\lim_{x \to -\infty} F(x)$ for now).
\end{lemma}
\begin{lemma}
\label{s7e1}
Let $\mu \in M_1(\R)$, $A > 0$ and $\phi$ the characteristic function of $\mu$.
Then $\mu\left( (-A,A) \right) \ge \frac{A}{2} \left| \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi(t) d t \right| - 1$.
\end{lemma}
\begin{refproof}{s7e1}
2023-07-14 22:07:36 +02:00
We have
\begin{IEEEeqnarray*}{rCl}
\int_{-\frac{2}{A}}^{\frac{2}{A}} \phi(t) \dif t
&=& \int_{-\frac{2}{A}}^{\frac{2}{A}} \int_{\R} e^{\i t x} \mu(\dif x) \dif t\\
&=& \int_{\R} \int_{-\frac{2}{A}}^{\frac{2}{A}} e^{\i t x} \dif t \mu(\dif x)\\
&=& \int_{\R} \int_{-\frac{2}{A}}^{\frac{2}{A}} \cos(t x) \dif t \mu(\dif x)\\
&=& \int_{\R} \frac{2 \sin\left( \frac{2x}{A}\right) }{x} \mu(\dif x).\\
\end{IEEEeqnarray*}
Hence
\begin{IEEEeqnarray*}{rCl}
\frac{A}{2}\left|\int_{-\frac{2}{A}}^{\frac{2}{A}} \phi(t) \dif t\right|
&=& \left| A \int_{\R} \frac{\sin\left( \frac{2x}{A} \right) }{x} \mu(\dif t)\right|\\
&=& 2\left| \int_{\R} \sinc\left( \frac{2x}{A} \right) \mu(\dif t)\right|\\
&\le& 2 \left[ \int_{|x| < A} \underbrace{\left|\sinc\left( \frac{2x}{A} \right) \right|}_{\le 1} \mu(\dif x)
+ \int_{|x| \ge A} \left|\sinc\left( \frac{2x}{A} \right)\right| \mu(\dif x) \right]\\
&\le& 2 \left[ \mu\left( (-A,A) \right)
+ \frac{A}{2} \int_{|x| \ge A} \frac{\sin(2x / A)|}{|x|} \mu(\dif x) \right]\\
&\le& 2 \left[ \mu\left( (-A,A) \right)
+ \frac{A}{2} \int_{|x| \ge A} \frac{1}{A} \mu(\dif x) \right]\\
&\le& 2 \mu((-A,A)) + \mu((-A,A)^c)\\
&=& 1 + \mu((-A,A)).
\end{IEEEeqnarray*}
2023-05-23 18:02:54 +02:00
\end{refproof}
\begin{refproof}{levycontinuity}
2023-07-14 22:07:36 +02:00
``$\implies$ '' If $\mu_n \implies \mu$,
then by definition
$\int f \dif \mu_n \to \int f \dif \mu$
for all $f \in C_b$.
Since $x \to e^{\i t x}$ is continuous and bounded,
it follows that $\phi_n(t) \to \phi(t)$
for all $t \in \R$.
2023-05-23 18:02:54 +02:00
``$ \impliedby$''
% Step 1:
\begin{claim}
\label{levyproofc1}
Given $\epsilon > 0$ there exists $A > 0$ such that
$\liminf_n \mu_n\left( (-A,A) \right) \ge 1 - 2 \epsilon$.
\end{claim}
\begin{refproof}{levyproofc1}
If $f$ is continuous, then
\[
\frac{1}{\eta} \int_{x - \eta}^{x + \eta} f(t) d t \xrightarrow{\eta \downarrow 0} f(x).
\]
Applying this to $\phi$ at $t = 0$, one obtains:
\begin{equation}
\left| \frac{A}{4} \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi(t) dt - 1 \right| < \frac{\epsilon}{2}
\label{levyproofc1eqn1}
\end{equation}
\begin{claim}
For $n$ large enough, we have
\begin{equation}
\left| \frac{A}{4} \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi_n(t) d t - 1\right| < \epsilon.
\label{levyproofc1eqn2}
\end{equation}
\end{claim}
\begin{subproof}
Apply dominated convergence.
\end{subproof}
So to prove $\mu_n\left( (-A,A) \right) \ge 1 - 2 \epsilon$,
apply \autoref{s7e1}.
It suffices to show that
\[
\frac{A}{2} \left| \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi_n(t) dt\right| - 1 \ge 1 - 2\epsilon
2023-07-06 00:36:26 +02:00
\]
2023-05-23 18:02:54 +02:00
or
\[
1 - \frac{A}{4} \left|\int_{-\frac{2}{A}}^{\frac{2}{A}} \phi_n(t) dt \right| \le \epsilon,
\]
which follows from \autoref{levyproofc1eqn2}.
\end{refproof}
% Step 2
By \autoref{lec13_lem1}
there exists a right continuous, non-decreasing $F $
and a subsequence $(F_{n_k})_k$ of $(F_n)_n$ where $F_n$ is
the probability distribution function of $\mu_n$,
such that $F_{n_k}(x) \to F(x)$ for all $x$ where $F$ is continuous.
\begin{claim}
\[
\lim_{n \to -\infty} F(x) = 0
\]
and
\[
\lim_{n \to \infty} F(x) = 1,
\]
i.e.~$F$ is a probability distribution function.\footnote{This does not hold in general!}
\end{claim}
\begin{subproof}
We have
\[
\mu_{n_k}\left( (- \infty, x] \right) = F_{n_k}(x) \to F(x).
\]
Again, given $\epsilon > 0$, there exists $A > 0$, such that
$\mu_{n_k}\left( (-A,A) \right) > 1 - 2 \epsilon$ (\autoref{levyproofc1}).
Hence $F(x) \ge 1 - 2 \epsilon$ for $x > A $
and $F(x) \le 2\epsilon$ for $x < -A$.
This proves the claim.
\end{subproof}
Since $F$ is a probability distribution function, there exists
a probability measure $\nu$ on $\R$ such that $F$ is the distribution
function of $\nu$.
2023-07-14 22:07:36 +02:00
Since $F_{n_k}(x) \to F_n(x)$ at all continuity points $x$ of $F$,
by \autoref{lec10_thm1} we obtain that
2023-05-23 18:02:54 +02:00
$\mu_{n_k} \overset{k \to \infty}{\implies} \nu$.
Hence
$\phi_{\mu_{n_k}}(t) \to \phi_\nu(t)$, by the other direction of that theorem.
But by assumption,
$\phi_{\mu_{n_k}}(\cdot ) \to \phi_n(\cdot )$ so $\phi_{\mu}(\cdot) = \phi_{\nu}(\cdot )$.
By \autoref{charfuncuniqueness}, we get $\mu = \nu$.
We have shown, that $\mu_{n_k} \implies \mu$ along a subsequence.
We still need to show that $\mu_n \implies \mu$.
\begin{fact}
Suppose $a_n$ is a bounded sequence in $\R$,
such that any subsequence converges to $a \in \R$.
Then $a_n \to a$.
\end{fact}
\begin{subproof}
2023-06-29 22:18:23 +02:00
\notes
2023-05-23 18:02:54 +02:00
\end{subproof}
2023-05-25 00:33:14 +02:00
Assume that $\mu_n$ does not converge to $\mu$.
2023-05-23 18:02:54 +02:00
By \autoref{lec10_thm1}, pick a continuity point $x_0$ of $F$,
such that $F_n(x_0) \not\to F(x_0)$.
Pick $\delta > 0$ and a subsequence $F_{n_1}(x_0), F_{n_2}(x_0), \ldots$
which are all outside $(F(x_0) - \delta, F(x_0) + \delta)$.
Then $\phi_{n_1}, \phi_{n_2}, \ldots \to \phi$.
Now, there exists a further subsequence $G_1, G_2, \ldots$ of $F_{n_i}$,
which converges.
$G_1, G_2, \ldots$ is a subsequence of $F_1, F_2,\ldots$.
However $G_1, G_2, \ldots$ is not converging to $F$,
as this would fail at $x_0$. This is a contradiction.
\end{refproof}
% IID is over now
\subsection{Summary}
What did we learn:
\begin{itemize}
\item How to construct product measures
\item WLLN and SLLN
\item Kolmogorov's three series theorem
\item Fourier transform, weak convergence and CLT
\end{itemize}