\documentclass[12pt]{article}
\usepackage{amsfonts}
%\documentstyle[12pt,amsfonts]{article}
%\documentstyle{article}

\setlength{\topmargin}{-.5in}
\setlength{\oddsidemargin}{0 in}
\setlength{\evensidemargin}{0 in}
\setlength{\textwidth}{6.5truein}
\setlength{\textheight}{8.5truein}
%\input ../basicmath/basicmathmac.tex
%
%\input ../adgeomcs/lamacb.tex
\input ../adgeomcs/mac.tex
\input ../adgeomcs/mathmac.tex

\def\fseq#1#2{(#1_{#2})_{#2\geq 1}}
\def\fsseq#1#2#3{(#1_{#3(#2)})_{#2\geq 1}}
\def\qleq{\sqsubseteq}

%
\begin{document}
\begin{center}
\fbox{{\Large\bf Spring, 2005 \hspace*{0.4cm} CIS 610}}\\
\vspace{1cm}
{\Large\bf Advanced Geometric Methods in Computer Science\\
Jean Gallier \\
\vspace{0.5cm}
Homework 1}\\[10pt]
February 2, 2005; Due February 21, 2005\\
\end{center}

``A problems'' are for practice only, and should not
be turned in.

\medskip\noindent
{\bf Problem A1.} 
Given a finite dimensional Euclidean space, $E$, 
if $U$ and $V$ are two orthogonal subspaces that span $E$, 
i.e., $E = U \oplus V$, we have the linear projections
$\mapdef{p_U}{E}{U}$ and $\mapdef{p_V}{E}{V}$. Recall:
since every $w\in E$ can be written uniquely as
$w = u + v$, with $u\in U$ and $v\in V$, we have
$p_U(w) = u$, $p_V(w) = v$ and 
$p_U(w) + p_V(w) = w$, for all $w\in E$.
We define the {\it orthogonal
reflection with respect to $U$ and parallel to $V$\/} as the linear
map, $s$, given by
\[
s(w) = 2p_U(w) - w =  w - 2p_V(w),
\]
for all $w\in E$.
Observe that $s\circ s = \id$, that $s$ is the identity on $U$ and $s = -\id$
on $V$.
When $U = H$ is a hyperplane,  $s$ is called a 
{\it hyperplane reflection\/}
(about $H$).

\medskip
(a)
If $w$ is any nonzero vector orthogonal to the hyperplane $H$, prove that
$s$ is given by
\[
s(x) = x - 2\frac{\lag x, w\rag}{\norme{w}^2} w,
\]
for all $x\in E$.
(Here, $\norme{w}^2 = \lag w, w\rag$.)

\medskip
(b)
In matrix form, if the vector $w$ is represented by the column vector $W$,
show that the matrix of the hyperplane reflection about the hyperplane
$K = \{w\}^{\perp}$ is
\[
I - 2\frac{W\transpos{W}}{\transpos{W}W}.
\]
Such matrices are called {\it Householder matrices\/}.

\medskip\noindent
{\bf Problem A2.} 
As in A1, let $E$ be a finite dimensional Euclidean space and
assume $E$ is nontrivial, i.e, $\dimm(E) \geq 1$.
Prove that if $u, v\in E$ are any two nonzero vectors and
$\norme{u} = \norme{v}$, then there is a hyperplane, $H$, so that the
reflection $s$ about $H$ sends $u$ to $v$ ($v = s(u)$)  and
if $u \not= v$, then this reflection is unique.


\vspace {0.25cm}\noindent
{\bf Problem A3.} 
Given a finite dimensional Euclidean space, $E$, recall that
a linear map, $\mapdef{f}{E}{E}$, is an {\it isometry\/} iff
\[
\lag f(u), f(v)\rag = \lag u, v\rag,
\quad\hbox{for all $u, v\in E$}.
\]

(a)
Prove that a linear map, $f$, is an isometry iff
\[
f^*\circ f = f\circ f^* = \id,
\]
where $f^*$ denotes the  adjoint of $f$.

\medskip
(b)
Note that an isometry, $f$, also preserves the Euclidean norm
$\norme{u} = \sqrt{\lag u, u\rag}$, i.e.,
$\norme{f(u)} = \norme{u}$, for all $u\in E$.

\medskip
Is the following converse true: If $f$ is a linear map and
$\norme{f(u)} = \norme{u}$, for all $u\in E$, then $f$ is an isometry?

\medskip
Is this statement still true if we do not assume that $f$ is linear?

\medskip
(c)
For {\it any\/} map, $\mapdef{f}{E}{E}$,
show that the condition
\[
\lag f(u), f(v)\rag = \lag u, v\rag,
\quad\hbox{for all $u, v\in E$}
\]
implies that $f$ is actually linear (remember, $E$ has finite dimension).

\vspace {0.25cm}\noindent
{\bf Problem A4.} 
Let $E$ and $F$ be normed vector spaces (as defined in the
transparencies, Section 3.1). 

\medskip
(a)
Check that
\[
\sup_{u \not= 0} \frac{\norme{Au}}{\norme{u}} =
\sup_{\norme{u} = 1} \norme{Au}.
\]
{\it Hint\/}. Use property (b) of a norm.

\medskip
(b)
Prove that a linear map, $\mapdef{A}{E}{F}$, is bounded iff it is
linear. (Again, property (b) of norms will be useful.)

\medskip
(c) Prove that every norm on $\reals^n$ or $\complex^n$ is continuous.

\medskip
(d) Two norms $\norme{\>}_1$ and $\norme{\>}_2$ are {\it equivalent\/} iff
there exist $c_1, c_2 > 0$ so that
$\norme{u}_1 \leq c_1\norme{u}_2$ and 
$\norme{u}_2 \leq c_2\norme{u}_1$, for all $u\in E$.
Prove that on a finite dimensional vector space, $E$, any two norms
are equivalent.

\medskip\noindent
{\it Hint\/}. If $E$ is finite-dimensional, then $E$ is isomorphic to
$\reals^n$ or to $\complex^n$. Use the fact that the $(n-1)$-sphere
\[
S^{n -1} = \{u\in E \mid \norme{u}_2 = 1\}
\]
is compact and consider the values of the functions
$x \mapsto \frac{\norme{x}_1}{\norme{x}_2}$ and
$x \mapsto \frac{\norme{x}_2}{\norme{x}_1}$ on $S^{n-1}$.

\medskip
(e) Use (d) to prove that if $E$ is finite-dimensional, then
every linear map, $\mapdef{A}{E}{F}$, is bounded ($E$ and $F$ are normed
vector spaces).

\vspace {0.5cm}\noindent
{\bf Problem A5.} 
Prove Proposition 3.1.6 of the transparencies.

\vspace {0.5cm}\noindent
{\bf Problem A6.} 
Given an $m\times n$ matrix, $A$, prove that its Frobenius norm, 
\[
\norme{A}_F = \sqrt{\sum_{i j} |a_{i\, j}|^2}
\]
satisfies
\[
\norme{A}_F = \sqrt{\mathrm{tr}(A^*A)} = \sqrt{\mathrm{tr}(AA^*)}
\]
where $\mathrm{tr}(B)$ is the trace of the square matrix $B$
(the sum of its diagonal elements).

\vspace {0.5cm}
``B problems'' must be turned in.

\vspace {0.25cm}\noindent
{\bf Problem B1 (30 pts).}
Prove Proposition 3.1.7:

\medskip\noindent
{\em 
Let $A$ be an $m\times n$ matrix (over $\reals$ or $\complex$) and
let $\mu_1\geq \mu_2 \geq \cdots \geq \mu_p$ be its singular values
(where $p = \min(m,n)$). Then, the following properties hold:
\begin{enumerate}
\item
$\norme{Au} \leq \norme{A}\norme{u}$, where $\norme{A}$ is
a subordinate norm and $\norme{Au}_2 \leq \norme{A}_F\norme{u}_2$,
where $\norme{A}_F$ is the Frobenius norm.
\item
$\norme{AB} \leq \norme{A}\norme{B}$, for a subordinate norm 
or the Frobenius norm.
\item
$\norme{UAV} = \norme{A}$, if $U$ and $V$ are orthogonal (or unitary)
and $\norme{\>}$ is the Frobenius norm or the  subordinate 
norm $\norme{\>}_2$.
\item
$\norme{A}_{\infty} = \max_i \sum_j |a_{i\, j}|$.
\item
$\norme{A}_{1} = \max_j \sum_i |a_{i\, j}|$.
\item
$\norme{A}_2 = \mu_1 = \sqrt{\lambda_{\mathrm{max}}(A^*A)}$,
where $\lambda_{\mathrm{max}}(A^*A)$ is the largest eigenvalue of
$A^*A$.
\item
$\norme{A}_F = \sqrt{\sum_{i = 1}^p \mu_i^2}$, where 
$p = \min(m,n)$.
\item
$\norme{A}_2 \leq \norme{A}_F \leq \sqrt{p}\norme{A}_2$. 
\end{enumerate}
In (4), (5), (6), (8), the matrix norms are the subordinate norms
induced by the corresponding norms ($\norme{\>}_{\infty}$,
$\norme{\>}_{1}$ and $\norme{\>}_{2}$) on $\reals^m$ and $\reals^n$.
}

\vspace {0.25cm}\noindent
{\bf Problem B2 (30 pts).}
Prove Proposition 3.1.8:

\medskip\noindent
{\em
Let $A$ be an $m\times n$ matrix of rank $r$
and let $VD\transpos{U} = A$ 
be an SVD for $A$. Write $u_i$ for the columns of $U$, $v_i$ for the
columns of $V$ and $\mu_1 \geq \mu_2 \geq \cdots \geq \mu_p$
for the singular values of $A$ ($p = \min(m, n)$).
Then, a matrix of rank $k < r$ closest to $A$
(in the $\norme{\>}_2$ norm) is given by
\[
A_k = \sum_{i = 1}^k \mu_i v_i \transpos{u_i} = 
V \mathrm{diag}(\mu_1, \ldots, \mu_k) \transpos{U}
\]
and $\norme{A - A_k}_2 = \mu_{k+1}$.
}

\medskip\noindent
{\it Hint\/}.
You will need to prove that if $B$ is any rank $k$ matrix ($k < r$)
then $\norme{A - B}_2 \geq \mu_{k+1}$.
Since $\mathrm{rk}(B) = k$, the kernel of $B$ has dimension $n - k$.
Note that the space spanned by $u_1, \ldots, u_{k+1}$ has
dimension $k + 1$; deduce that there must be a unit vector, $h$, in their
intersection and use
\[
\norme{A - B}_2 \geq \norme{(A - B)h}_2.
\]

\vspace {0.25cm}\noindent
{\bf Problem B3 (40 pts).}
(a) Prove Lemma 3.2.2:

\medskip\noindent
{\em
If $B$ is a symmetric positive semi-definite $d\times d$ matrix with eigenvalues
$\lambda_1 \geq \lambda_2 \geq \cdots \geq \lambda_d \geq 0$ and associated
eigenvectors $u_1, \ldots, u_d$, then
\[
\max_{x\not= 0} \frac{\transpos{x} B x}{\transpos{x}{x}} = \lambda_1
\]
(with the maximum attained for $x = u_1$)  and
\[
\max_{x\not= 0, x \in \{u_1, \ldots, u_k\}^{\perp}} 
\frac{\transpos{x} B x}{\transpos{x}{x}} = \lambda_{k+1}
\]
(with the maximum attained for $x = u_{k+1}$), where 
$1 \leq k \leq d - 1$.
}

\medskip
(b) Prove Theorem 3.2.3:

\medskip\noindent
{\em 
Let $X$ be an $n\times d$ matrix of data points, $X_1, \ldots, X_n$,
and let $\mu$ be the centroid of the $X_i$'s.
If $X - \mu = VD\transpos{U}$ is an SVD decomposition of $X - \mu$ and if 
the main diagonal of $D$ consists of the singular
values  $\mu_1 \geq \mu_2 \geq \cdots \geq \mu_d$, 
then a $k$th principal component of $X$ is given  by
\[
Y_k = (X - \mu)u_k = \hbox{$k$th column of $VD$},
\]
where $u_k$ is the $k$th column of $U$. Furthermore,
\[
\mathrm{var}(Y_k) = \frac{\mu_k^2}{n - 1}
\]
and $\mathrm{cov}(Y_h, Y_k) = 0$, whenever $h \not= k$.
}

\vspace {0.25cm}\noindent
{\bf Problem B4 (50 pts).}
The purpose of this problem is to prove that
given any self-adjoint linear map $\mapdef{f}{E}{E}$
(i.e., such that $\dual{f} = f$),
where $E$ is a Euclidean space of dimension $n\geq 3$,
given an orthonormal basis
$(\novect{e_1}, \ldots, \novect{e_n})$, there are $n-2$
isometries  $h_i$, hyperplane reflections or the identity,
such that the matrix of
$$h_{n-2} \circ \cdots \circ h_1
\circ f\circ
h_1 \circ \cdots \circ h_{n-2}$$
is a symmetric tridiagonal matrix.

\medskip
(1)
Prove that
for any isometry $\mapdef{f}{E}{E}$, we have $f = \dual{f} = f^{-1}$
iff $f\circ f = \id$.

\medskip
Prove that if $f$ and $h$ are  self-adjoint linear maps
($\dual{f} = f$ and $\dual{h} = h$), then $h\circ f\circ h$
is a self-adjoint linear map.

\medskip
(2)
Proceed by induction, taking inspiration from
the proof of the triangular decomposition given
in Lemma 7.3.1 of my book,
{\sl Geometric Methods and Applications\/}. 
Let $V_k$ be the subspace spanned by 
$(\novect{e_{k+1}}, \ldots, \novect{e_n})$.
For the base case, proceed  as follows. 

\medskip
Let 
$$f(\novect{e_1}) = a_1^0 \novect{e_1} + \cdots + a_n^0\novect{e_n},$$
and let
$$r_{1,\, 2} = \norme{a_2^0 \novect{e_2} + \cdots + a_n^0\novect{e_n}}.$$
Find an isometry $h_1$ (reflection or $\id$) such that
$$h_1(f(\novect{e_1}) - a_1^0\novect{e_1}) = r_{1,\, 2}\, \novect{e_2}.$$
Observe that 
$$\novect{w_1} =  r_{1,\, 2}\, \novect{e_2} +  a_1^0\novect{e_1} - f(\novect{e_1})
\in V_1,$$
and prove that $h_1(\novect{e_1}) = \novect{e_1}$, so that
$$h_1\circ f \circ h_1(\novect{e_1}) = a_1^0\novect{e_1} + r_{1,\, 2}\, \novect{e_2}.$$
Let $f_1 = h_1\circ f \circ h_1$.

\medskip
Assuming by induction that
$$f_k = h_k \circ \cdots \circ h_1
\circ f\circ
h_1 \circ \cdots \circ h_k$$
has a  tridiagonal matrix up to the $k$th row and column, $1\leq k \leq n - 3$, let
$$f_k(\novect{e_{k+1}}) = a^k_{k}\novect{e_{k}} + a^k_{k+1}\novect{e_{k+1}}
+ \cdots + a^k_{n}\novect{e_{n}},$$ 
and let
$$r_{k+1,\, k+2} = \norme{a^k_{k+2}\novect{e_{k+2}}
+ \cdots + a^k_{n}\novect{e_{n}}}.$$
Find an isometry $h_{k+1}$ (reflection or $\id$) such that
$$h_{k+1}(f_k(\novect{e_{k+1}}) - a^k_{k}\novect{e_{k}} - a^k_{k+1}\novect{e_{k+1}})
= r_{k+1,\, k+2}\, \novect{e_{k+2}}.$$
Observe that
$$\novect{w_{k+1}} =  r_{k+1,\, k+2}\, \novect{e_{k+2}} 
+ a^k_{k}\novect{e_{k}} + a^k_{k+1}\novect{e_{k+1}}
- f_k(\novect{e_{k+1}})\in V_{k+1},$$
and prove that
$h_{k+1}(\novect{e_{k}}) = \novect{e_{k}}$ and
$h_{k+1}(\novect{e_{k+1}}) = \novect{e_{k+1}}$, so that
$$h_{k+1}\circ f_k \circ h_{k+1}(\novect{e_{k+1}}) 
=   a^k_{k}\novect{e_{k}} + a^k_{k+1}\novect{e_{k+1}}
+ r_{k+1,\, k+2}\, \novect{e_{k+2}}.$$
Let $f_{k+1} = h_{k+1}\circ f_k \circ h_{k+1}$, 
and finish the proof.

\medskip
Do $f$ and $f_{n-2}$ have the same eigenvalues?
If so, explain why.

\medskip
(3)
Prove that given any symmetric $n\times n$-matrix $A$, there
are $n - 2$ matrices $H_1, \ldots, H_{n-2}$,
Householder matrices or the identity, such that
$$B = H_{n-2} \cdots H_1 A H_1 \cdots H_{n-2}$$
is a symmetric tridiagonal matrix.


\vspace {0.25cm}\noindent
{\bf Problem B5 (20 pts).}
As discussed in class, let $X$ be the $10\times 2$ centered matrix
consisting of the 
year of birth and the length of beard of our
ten mathematicians:

\[
\begin{tabular}{|l|c|r|} \hline
  Name                           & \hbox{year} &  {length} \\ \hline
 {Carl Friedrich Gauss}  & -51.4   & -5.6 \\ \hline
 {Camille Jordan} & 9.6   & 6.4 \\ \hline
 {Adrien-Marie Legendre}  & -76.4   & -5.6 \\ \hline
 {Bernhard Riemann}   & -2.4  & 9.4 \\ \hline
 {David Hilbert}   & 33.6   & -3.6 \\\hline
 {Henri Poincar\'e} & 25.6  & -0.6 \\ \hline
 {Emmy Noether} & 53.6 & -5.6 \\ \hline
 {Karl Weierstrass} & 13.4  & -5.6 \\ \hline
 {Eugenio Beltrami} & 6.6 & -3.6 \\ \hline
 {Hermann Schwarz} & 14.6 & 14.4 \\ \hline
\end{tabular}
\]

Compute the principal directions and the two PC's of $X$.
Plot your results
(You may use {\sl Matlab}, {\sl Mathematica}, etc.).
Can you conclude anything?

\vspace {0.25cm}\noindent
{\bf Problem B6 (40 pts).}
(a) Given a rotation matrix
%
%\medskip
\[R = \matta{\cos\theta}{-\sin\theta}
             {\sin\theta}{\cos\theta},\]
%
%\medskip\noindent
where $0 < \theta < \pi$, 
prove that there is a skew symmetric matrix
$B$ such that
\[R = (I - B)(I + B)^{-1}.\]

\medskip
(b) If $B$ is a skew symmetric $n\times n$ matrix, prove that
$\lambda I_n - B$ and $\lambda I_n + B$ 
are invertible for all $\lambda\not= 0$, and that
they commute.

\medskip
(c)
Prove that
\[R = (\lambda I_n - B)(\lambda I_n + B)^{-1}\]
is a rotation matrix that does not admit $-1$ as an
eigenvalue. (Recall, a rotation is an orthogonal matrix
$R$ with positive determinant, i.e., $\det(R) = 1$.)

\medskip
(d) Given any rotation matrix $R$ that does not admit $-1$
as an eigenvalue, prove  that there is a skew symmetric
matrix $B$ such that
\[R = (I_n - B)(I_n + B)^{-1}= (I_n + B)^{-1}(I_n - B).\]
This is known as the {\it Cayley representation\/} of rotations
(Cayley, 1846).
%\index{Cayley's representation of rotations}%

\medskip
(e) Given any rotation matrix $R$, prove 
that there is a skew symmetric
matrix $B$ such that
\[R = \left((I_n - B)(I_n + B)^{-1}\right)^2.\]


\vspace {0.25cm}\noindent
{\bf Problem B7 (20 pts).}
Given any hyperplane, $H$, in $\reals^m$ and given any point,
$x\in \reals^m$, the distance from $x$ to $H$ is defined by
\[
d(x,H) = \min_{h\in H} d(x,h),
\]
where $d(x,h)$ is the usual Euclidean distance in $\reals^m$.

\medskip
(a)
If the hyperplane, $H$, is given by the equation
\[
a_1x_1 + \cdots + a_mx_m + c = 0,
\]
then prove that
\[
d(x,H) = \frac{|a_1x_1 + \cdots + a_mx_m + c|}{\norme{a}},
\]
where $a = (a_1, \ldots, a_m)\not= 0$ and $x = (x_1, \ldots, x_m)$.

\medskip
(b)
Given a data set of $n$ points, $X_1, \ldots, X_n\in \reals^d$,
prove that a hyperplane, $H$, that best approximates 
$X_1, \ldots, X_n$ in the least squares sense is a hyperplane that
minimizes the sum of the square distances of each $X_i$ to $H$.


\vspace {0.5cm}\noindent
{\bf Extra Credit (40 pts).}
Write a computer program implementing the method of
Problem B4(3).


\vspace{0.5cm}\noindent
{\bf TOTAL: 230 + 40 points.}

\end{document}