\documentclass[12pt]{article}
\usepackage{e-jc}
\specs{P4.2}{21(4)}{2014}

%%% Standard Symbols %%%
\usepackage{xspace}
\usepackage{verbatim}
\usepackage{color}
\usepackage{graphicx}
\usepackage{tabularx}

\usepackage{amsmath}
\usepackage[showonlyrefs]{mathtools}
\usepackage{mathstyle}
\usepackage{breqn}
\usepackage{empheq}
\usepackage{amstext,amssymb,amsfonts}
\usepackage{nicefrac}
\usepackage{bm}

\newcommand{\arxiv}[1]{\href{http://arxiv.org/abs/#1}{\texttt{arXiv:#1}}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% New age reference packages:
% nameref: lets one refer to the title given to the theorem rather than
% its number.
% hyperref and cleveref are nice.
\usepackage{nameref}
\usepackage[pagebackref,colorlinks,linkcolor=black,filecolor = blue,
citecolor = black, urlcolor = blue,pdfstartview=FitH]{hyperref}
\usepackage[capitalize]{cleveref}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\usepackage{mdwlist}

%%% Theorems, Lemmas, etc.
\usepackage{amsthm}
\usepackage{thmtools}

\renewcommand{\qedsymbol}{$\square$}

\declaretheorem[within=section]{theorem}
\declaretheorem[sibling=theorem]{corollary}

\declaretheorem[sibling=theorem]{lemma}
\declaretheorem[sibling=theorem]{claim}

\declaretheorem[sibling=theorem]{definition}
\declaretheorem[sibling=theorem]{Lemma+Definition}
\declaretheorem[sibling=theorem]{remark}
\declaretheorem[sibling=theorem]{observation}
\declaretheorem[sibling=theorem]{conjecture}

\newenvironment{proofof}[1]{\noindent{\bf Proof
of #1:}}{\qed\bigskip}

\crefname{conjecture}{Conjecture}{Conjectures}
\crefname{claim}{Claim}{Claims}
\crefname{remark}{Remark}{Remarks}
\crefname{Lemma+Definition}{Lemma+Definition}{Lemma+Definition}

% useful things
\newcommand{\mrm}[1]{\mathrm {#1}}
\newcommand{\mv}[1]{\mathbf {#1}}
\newcommand{\msf}[1]{\mathsf {#1}}
\newcommand{\norm}[1]{\lVert#1\rVert}
\newcommand{\set}[1]{\left\{#1\right\}}

% bits
\newcommand{\bits}{\{0,1\}}

%%% Symbol styles:  choose as you like
\renewcommand{\leq}{\leqslant}
\renewcommand{\geq}{\geqslant}
\renewcommand{\ge}{\geqslant}
\renewcommand{\le}{\leqslant}
\renewcommand{\epsilon}{\varepsilon}
\newcommand{\eps}{\epsilon}
\newcommand{\psd}{\succeq 0}
\newcommand{\xx}{\mathbf{x}}
\newcommand{\yy}{\mathbf{y}}
\newcommand{\zz}{\mathbf{z}}
\newcommand{\ww}{\mathbf{w}}
\newcommand{\otilde}{\widetilde{O}}

%%% Reals, Naturals, Integers, etc. %%%

\newcommand{\R}{\mathbb{R}}
\newcommand{\C}{\mathbb{C}}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\F}{\mathbb{F}}
\newcommand{\T}{\mathbb{T}}
\newcommand{\U}{\mathbb{U}}

\newcommand{\cA}{\mathcal A}
\newcommand{\cB}{\mathcal B}
\newcommand{\cC}{\mathcal C}
\newcommand{\cD}{\mathcal D}
\newcommand{\cE}{\mathcal E}
\newcommand{\cF}{\mathcal F}
\newcommand{\cG}{\mathcal G}
\newcommand{\cH}{\mathcal H}
\newcommand{\cI}{\mathcal I}
\newcommand{\cJ}{\mathcal J}
\newcommand{\cK}{\mathcal K}
\newcommand{\cL}{\mathcal L}
\newcommand{\cM}{\mathcal M}
\newcommand{\cN}{\mathcal N}
\newcommand{\cO}{\mathcal O}
\newcommand{\cP}{\mathcal P}
\newcommand{\cQ}{\mathcal Q}
\newcommand{\cR}{\mathcal R}
\newcommand{\cS}{\mathcal S}
\newcommand{\cT}{\mathcal T}
\newcommand{\cU}{\mathcal U}
\newcommand{\cV}{\mathcal V}
\newcommand{\cW}{\mathcal W}
\newcommand{\cX}{\mathcal X}
\newcommand{\cY}{\mathcal Y}
\newcommand{\cZ}{\mathcal Z}

\newcommand{\bbA}{\mathbb A}
\newcommand{\bbB}{\mathbb B}
\newcommand{\bbS}{\mathbb S}
\newcommand{\bbR}{\mathbb R}
\newcommand{\bbZ}{\mathbb Z}
\newcommand{\bbI}{\mathbb I}
\newcommand{\bbQ}{\mathbb Q}
\newcommand{\bbP}{\mathbb P}
\newcommand{\bbE}{\mathbb E}
\newcommand{\bbT}{\mathbb T}

%%% Probability related macros %%%

% probability symbols
\newcommand{\Esymb}{{\bf E}}
\newcommand{\Isymb}{{\bf I}}
\newcommand{\Psymb}{{\bf Pr}}
\newcommand{\Vsymb}{\mathbb{V}}
\DeclareMathOperator*{\E}{\Esymb}
\DeclareMathOperator*{\Var}{\Vsymb}
\DeclareMathOperator*{\ProbOp}{\Psymb}
\DeclareMathOperator*{\I}{\Isymb}
\renewcommand{\Pr}{\ProbOp}
\newcommand{\Cov}{\mathbf{Cov}}

\newcommand{\Brac}[1]{\left[#1 \right]}
\newcommand{\Ex}[1]{\E\Brac{#1}}

\def\G{{\mathbb{G}}}
\def\depth{\mrm{depth}}

% Exponential function
\newcommand{\expo}[1]{{\mathsf{e}\left(#1\right)}}

\title{\bf\boldmath Limits of Boolean Functions on $\F_p^n$}
\author{Hamed Hatami\thanks{This author's research was supported by an NSERC, and an FQRNT grant.}\\
\small McGill University\\[-0.8ex]
\small Quebec, Canada\\
\small\tt hatami@cs.mcgill.ca\\
\and
Pooya Hatami\\
\small University of Chicago\\[-0.8ex]
\small Illinois, U.S.A.\\
\small\tt pooya@cs.uchicago.edu\\
\and
James Hirst\thanks{This author's research was partly supported by NSERC.}\\
\small McGill University\\[-0.8ex]
\small Quebec, Canada\\
\small\tt james.hirst@mail.mcgill.ca
}

\date{\dateline{Jun 9, 2014}{Sep 19, 2014}{Oct 2, 2014}\\
\small Mathematics Subject Classifications: 11B30}

\begin{document}
\maketitle

\begin{abstract}
We study sequences of functions of the form $\F_p^n \to \bits$ for varying $n$, and define a notion of convergence based on the induced distributions from restricting the functions to a random affine subspace. Using a decomposition theorem   and a recently proven equi-distribution theorem from higher-order Fourier analysis, we prove that the limits of such convergent sequences can be represented by certain measurable functions.  We also show that every such limit object arises as the limit of some sequence of functions. These results are in the spirit of similar results which have been developed for limits of graph sequences. A more general, albeit substantially more sophisticated, limit object was recently constructed by Bal\'azs Szegedy [Gowers norms, regularization and limits of functions on abelian groups, 2010. \arxiv{1010.6211}].
\end{abstract}

\section{Introduction}\label{sec:intro}


In limit theories of discrete structures, one often studies a large object by studying  its ``local statistics''.  More precisely, given a sampling rule that allows one to sample a random substructure, there is an induced probability measure on the set of possible small substructures. For example, given a graph $G$ and a positive integer $k$, one can select $k$ random vertices in $G$ and look at the subgraph induced by $G$ on these $k$ vertices. This induces a probability distribution on $k$-vertex graphs. Every such sampling rule leads to a notion of convergence. Namely, a sequence of structures is called convergent if these probability distributions converge. So, in the above example, a sequence of graphs is called convergent~\cite{LS06} if, for every $k$, the corresponding probability distributions on $k$-vertex graphs converges.

Let $p$ be a fixed prime, and denote $\F=\F_p$. In this article we are interested in functions from vector spaces over the field $\F$ to the set $\{0,1\}$, or equivalently the subsets of such vector spaces. Given a subset of $\F^n$, the local information we would like to work with is the distribution of small linear structures, e.g. arithmetic progressions, contained within the set. To this end, we use the sampling rule that considers the restriction of a given function to a random affine subspace. We will describe a restriction of $\F^n$ to an affine subspace of dimension $k$ by an affine transformation $A:\F^k \to \F^n$. Recall that an affine transformation $A$ is of the form $L+c$ where $L$ is linear and $c$ is a constant vector in $\F^n$. More precisely, given a function $f:\F^n \to \bits$ and a positive integer $k$, we select a random affine transformation $A:\F^k \to \F^n$ uniformly, and consider the random variable $Af: \F^k \to \bits$ defined as $Af : x \mapsto f(Ax)$. This induces a probability 
distribution on the set of functions $\{\F^k \to \bits\}$. A sequence of functions $\{f_i:\F^{n_i} \to \bits\}_{i \in \N}$ is called \emph{convergent} if these probability distributions converge for every $k$. 

It is worth mentioning that, in the context of this paper, it is simply more natural to work with affine subspaces rather than linear ones. This is partly because most substructures of interest (e.g., arithmetic progressions) are translation invariant and, more generally, invariant under affine transformations. Consequently, it is desirable to require that a function $f:\F^n \to \{0,1\}$ and its translations $f_a(x):=f(x+a)$ for $a \in \F^n$ have the same local statistics. This would have not been the case had we defined convergence by restricting to random linear subspaces. Additionally, since linear subspaces always contain the origin, the corresponding notion of  convergence is too sensitive to the values of the function at 0. For these reasons, we define  convergence via restricting to affine subspaces. However, the techniques of this paper can be extended to other similar notions of convergence.


Now, the purpose of this article is to describe a limit object for such convergent sequences of functions. Before we can state our results we need to recall some results from higher-order Fourier analysis. 


\section{Basic background}
Most of the material in this section is directly quoted from the full version of~\cite{BFHHL13}.

\paragraph{Notation}
For $d \in \mathbb{N} \cup \{\infty\}$, denote $[d]:=\{1,\ldots,d\}$ if $d <\infty$, and $[d]=\mathbb{N}$ otherwise.
We shorthand $\F=\F_p$ for a prime finite field. For $f:\F^n \to \C$ we denote $\|f\|_1 = \E[|f(x)|]$, $\|f\|_2^2 = \E[|f(x)|^2]$ where $x \in \F^n$ is chosen uniformly and $\|f\|_{\infty} = \max |f(x)|$. Note that $\|f\|_1 \le \|f\|_2 \le \|f\|_{\infty}$. The expression $o_{m}(1)$ denotes quantities which approach zero as $m$ grows. We shorthand $x \pm \eps$ for any quantity in $[x-\eps, x+\eps]$.

\subsection{Uniformity norms and non-classical polynomials}


\begin{definition}[Multiplicative Derivative]\label{multderiv}
Given a function $f: \F^n \to \C$ and an element $h \in \F^n$, define
the {\em multiplicative derivative in direction $h$} of $f$ to be the
function $\Delta_hf: \F^n \to \C$ satisfying $\Delta_hf(x) =
f(x+h)\overline{f(x)}$ for all $x \in \F^n$.
\end{definition}
The {Gowers norm} of order $d$ for a function $f$ is determined by the
expected multiplicative derivative of $f$ in $d$ random directions at
a random point.
\begin{definition}[Gowers norm]\label{gowers}
Given a function $f: \F^n \to \C$ and an integer $d \geq 1$, the {\em
  Gowers norm of order $d$}  for $f$ is given by
$$\|f\|_{U^d} = \left|\E_{h_1,\dots,h_d,x \in \F^n} \left[(\Delta_{h_1}
\Delta_{h_2} \cdots \Delta_{h_d}f)(x)\right]\right|^{1/2^d}.$$
\end{definition}
Note that as $\|f\|_{U^1}= |\Ex{f}|$ the Gowers norm of order $1$ is only a semi-norm. However for $d>1$, it is not difficult to show that
$\|\cdot\|_{U^d}$ is indeed a norm.

If $f = e^{2\pi i P/p}$ where $P: \F^n \to \F$ is a polynomial
of degree $< d$, then $\|f\|_{U^d} = 1$. If $d < p$ and $\|f\|_\infty \le 1$, then in fact, the
converse holds, meaning that any  function $f: \F^n \to \C$ satisfying
$\|f\|_\infty \le 1$ and $\|f\|_{U^d} = 1$ is of this form. But when $d \geq p$, the converse
is no longer true. In order to characterize functions $f : \F^n \to
 \C$ with $\|f\|_\infty \le 1$ and $\|f\|_{U^d}=1$, we need to define the notion of {\em
  non-classical polynomials}.


Non-classical polynomials might not necessarily be $\F$-valued. We need to
introduce some notation.
Let $\T$ denote the circle group $\R/\Z$. This is an Abelian group
with group operation denoted $+$. For an integer $k \geq 0$, let $\U_k$
denote the subgroup $\frac{1}{p^k} \Z/\Z \subseteq \T$. Let $\msf{e}: \T \to \C$ denote the character
$\expo{x} = e^{2\pi i x}$.
\begin{definition}[Additive Derivative]\label{addderiv}
Given a function\footnote{We try to adhere to the following convention: upper-case letters (e.g. $F$ and
  $P$) to denote functions mapping from $\F^n$ to $\T$ or to $\F$,
  lower-case   letters (e.g. $f$ and $g$) to denote functions mapping
  from $\F^n$ to $\C$, and upper-case Greek letters (e.g. $\Gamma$ and
$\Sigma$) to denote functions mapping  $\T^C$ to $\T$.} $P:
\F^n \to \T$ and an element $h \in \F^n$, define
the {\em additive derivative in direction $h$} of $f$ to be the
function $D_hP: \F^n \to \T$ satisfying $D_hP(x) = P(x+h) - P(x)$
for all $x \in \F^n$.
\end{definition}
\begin{definition}[Non-classical polynomials]\label{poly}
For an integer $d \geq 0$, a function $P: \F^n \to \T$ is said to be a
{\em non-classical polynomial of degree $\leq d$} (or simply a
{\em polynomial of degree $\leq d$}) if for all $h_1,
\dots, h_{d+1}, x \in \F^n$, it holds that
\begin{equation}\label{eqn:poly}
(D_{h_1}\cdots D_{h_{d+1}} P)(x) = 0.
\end{equation}
The {\em degree} of $P$ is the smallest $d$ for which the above holds.
A function $P : \F^n \to \T$ is said to be a {\em classical polynomial of degree
$\leq d$} if it is a non-classical polynomial of degree $\leq d$
whose image is contained in $\U_1$.
\end{definition}

It is a direct consequence of \cref{poly} that a function $f :
\F^n \to \C$ with $\|f\|_\infty \leq 1$ satisfies $\|f\|_{U^{d+1}} =
1$ if and only if $f = \expo{P}$ for a
(non-classical) polynomial $P: \F^n \to \T$ of degree $\leq d$.


The following lemma of Tao and Ziegler shows that a classical
polynomial $P$ of degree $d$ must always be of the form $\frac{1}{p}|Q(x)|$, where $Q : \F^n \to \F$ is a polynomial (in the usual sense) of
degree $d$, and $|\ \cdot\ |$ is the standard map from $\F$ to $\{0,1,\ldots,p-1\}$. This lemma also characterizes the structure of non-classical
polynomials.
\begin{lemma}[Lemma 1.7 in \cite{TZ12}]\label{struct}
Let $d \geq 1$ be an integer. A function $P: \F^n \to \T$ is a polynomial of degree $\leq d$ if and
only if $P$ can be represented as
%
\[
P(x_1,\dots,x_n) = \alpha + \sum_{\substack{0\leq d_1,\dots,d_n< p; k \geq 0: \\ 0 < \sum_i d_i \leq d - k(p-1)}} \frac{ c_{d_1,\dots, d_n,
  k} |x_1|^{d_1}\cdots |x_n|^{d_n}}{p^{k+1}} \mod 1,
\]
%
for a unique choice of $c_{d_1,\dots,d_n,k} \in \set{0,1,\dots,p-1}$
and $\alpha \in \T$.  The element $\alpha$ is called the {\em
  shift} of $P$, and the largest integer $k$ such that there
exist $d_1,\dots,d_n$ for which $c_{d_1,\dots,d_n,k} \neq 0$ is called
the {\em depth} of $P$. A depth $k$ polynomial $P$ takes values in a coset of the subgroup $\U_{k+1}$. Classical polynomials correspond to
polynomials with shift $0$ and depth $0$.
\end{lemma}

Note that \cref{struct} immediately implies the following important
observation:
\begin{remark}
If $Q: \F^n \to \T$ is a polynomial of degree $d$ and depth $k$, then
$pQ$ is a polynomial of degree $\max(d-p+1, 0)$ and depth $k-1$. In
other words, if $Q$ is classical, then $pQ$ vanishes, and otherwise,
its degree decreases by $p-1$ and its depth by $1$. Also, if $\lambda
\in [1, p-1]$ is an integer, then $\deg(\lambda Q) = d$ and
$\depth(\lambda Q) = k$.
\end{remark}
For convenience of exposition, we will assume throughout this
paper that the shifts of all polynomials are zero. This can be done
without affecting any of the results in this work. Hence, all
polynomials of depth $k$ take values in $\U_{k+1}$. By a \emph{$(d,k)$-polynomial} we mean a polynomial of degree $d$ and depth $k$.

\subsection{Rank}
We will often need to study  Gowers norms of exponentials of polynomials. As we describe below, if this analytic
quantity is non-negligible, then there is an algebraic explanation for this: it is possible to  decompose the polynomial as a function of a constant number of low-degree polynomials.  To state this rigorously, let us define the notion of {\em rank} of a polynomial.


\begin{definition}[Rank of a polynomial]
Given a polynomial $P : \F^n \to \T$ and an integer $d > 1$, the {\em $d$-rank} of
$P$, denoted $\msf{rank}_d(P)$, is defined to be the smallest integer
$r$ such that there exist polynomials $Q_1,\dots,Q_r:\F^n \to \T$ of
degree $\leq d-1$ and a function $\Gamma: \T^r \to \T$ satisfying
$P(x) = \Gamma(Q_1(x),\dots, Q_r(x))$. If $d=1$, then
$1$-rank is defined to be $\infty$ if $P$ is non-constant and $0$
otherwise.

The {\em rank} of a polynomial $P: \F^n \to \T$ is its $\deg(P)$-rank. We say $P$ is $r$-regular if $\msf{rank}(P) \geq r$.
\end{definition}

Note that for integer $\lambda \in [1, p-1]$,  $\mrm{rank}(P) =
\mrm{rank}(\lambda P)$. We will also define the following, weaker, analytic notion of uniformity for a polynomial. 

\begin{definition}[Uniformity]
Let $\epsilon > 0$ be a real. A degree $d$ polynomial $P:\F^n \to \T$ is said to be $\epsilon$-uniform if
\[ \|\expo{P}\|_{U^d} \ < \epsilon \]
\end{definition}

The following theorem of Tao and Ziegler relates rank to uniformity by showing that high rank polynomials have small
Gowers norms.

\begin{theorem}[Theorem 1.20 of \cite{TZ12}]\label{arank}
For any $\eps > 0$ and integer $d > 0$, there exists an
integer $r = r_{\ref{arank}}(d,\eps)$ such that the following is
true. For any polynomial $P : \F^n \to \T$ of degree $\leq d$, if
$\|\expo{P}\|_{U^d} \geq \eps$, then $\msf{rank}_{d}(P) \leq r$.
\end{theorem}

This immediately implies that a sufficiently regular polynomial will be uniform.

\begin{corollary}\label{cor:uni}
Let $\epsilon$, $d$, and $r=r_{\ref{arank}}(d,\epsilon)$ be as in \cref{arank}. Every $r$-regular polynomial of degree $d$ is also $\epsilon$-uniform.
\end{corollary}

Finally, we need to extend our definitions of rank and uniformity to a collection of polynomials. In particular, we will need these when the collection comes from a polynomial factor.

\begin{definition}[Rank and Regularity]\label{rfact}
A polynomial factor $\cB$ defined by a sequence of polynomials $P_1,\ldots,P_C:\F^n \to \T$ with respective depths $k_1,\ldots,k_C$ is said to have rank $r$ if $r$ is the least integer for which there exist weights  $(\lambda_1,\ldots,\lambda_C) \in \Z^C$, with $(\lambda_1 \emph{ mod } p^{k_1 + 1},\ldots,\lambda_C \emph{ mod } p^{k_C + 1}) \neq 0^C$, such that $\msf{rank}_d\left(\sum_{i=1}^C \lambda_i P_i\right) \leq r$, where $d = \max_i \deg(\lambda_i P_i)$.

Given a polynomial factor $\cB$ and a function $r:\Z_{>0} \to \Z_{>0}$, we say that $\cB$ is $r$-regular if $\cB$ is of rank larger than $r(|\cB|)$.
\end{definition}

\begin{definition}[Uniform Factor]\label{ufact}
Let $\epsilon > 0$ be a real. A polynomial factor $\cB$ defined by a sequence of polynomials $P_1,\ldots,P_C:\F^n \to \T$ with respective depths $k_1,\ldots,k_C$ is said to be $\epsilon$-uniform if for every collection $(\lambda_1,\ldots,\lambda_C) \in \Z^C$, with at least one $\lambda_i$ \emph{mod} $p^{k_i + 1}$ not 0, we have
\[ \left\| \expo{\sum_{i=1}^C \lambda_i P_i} \right\|_{U^d} \ < \epsilon, \]
where $d = \max_i \deg(\lambda_i P_i)$.
\end{definition}

\begin{remark}\label{rem:Cuni}
Let $\epsilon:\N \to \R_{+}$ be an arbitrary non-increasing function. Similar to \cref{cor:uni}, it also follows from \cref{arank} that an $r$-regular, degree $d$ factor $\cB$ is also $\epsilon(|\cB|)$-uniform when $r = r_{\ref{arank}}(d,\epsilon(\cdot))$.
\end{remark}

The following decomposition theorem is one of the main tools in higher-order Fourier analysis. 

\begin{theorem}[Strong Decomposition Theorem; Theorem 4.4 of \cite{BFL12}]\label{thm:decomp}
Suppose $\delta > 0$ and $d \geq 1$ are integers. Let $\eta: \N
\to \R^+$ and $r: \N \to \N$ be arbitrary non-decreasing functions. Then there exists  $C =
C_{\ref{thm:decomp}}(\delta,\eta,r,d)$ such that the following holds.

Given $f: \F^n \to [0,1]$, there
exist three functions $f_1, f_2, f_3: \F^n \to
\R$ and a polynomial factor  $\cB$ of
degree at most $d$ and complexity at most $C$ such that the following conditions hold:
\begin{itemize*}
\item[(i)]
$f=f_1+f_2+f_3$.
\item[(ii)]
$f_1 = \E[f|\cB]$.
\item[(iii)]
$\|f_2\|_{U^{d+1}} \leq 1/\eta(|\cB|)$.
\item[(iv)]
$\|f_3\|_2 \leq \delta$.
\item[(v)]
$f_1$ and $f_1 + f_3$ have range $[0,1]$; $f_2$ and $f_3$ have range $[-1,1]$.
\item[(vi)]
$\cB$ is $r$-regular.
\end{itemize*}
\end{theorem}

\begin{remark}
As in \cref{rem:Cuni}, taking $r = r_{\ref{arank}}(d,\epsilon(\cdot))$, we obtain a decomposition where now condition $(vi)$ reads that the factor $\cB$ is $\epsilon(|\cB|)$-uniform, where $\epsilon:\N \to \R_{+}$ is an arbitrary non-increasing function.
\end{remark}

\subsection{Complexity of systems of linear forms}
In this article, a \emph{linear form} in $k$ variables is a vector $L=(\lambda_1,\ldots,\lambda_k) \in \F^k$ regarded as a linear function from $V^k$ to $V$  for any vector space $V$ over $\F$: If $\mv{x}=(x_1,\ldots,x_k) \in V^k$, then $L(\mv{x}) := \lambda_1 x_1+\ldots+\lambda_k x_k$. A linear form $L = (\lambda_1, \lambda_2, \dots, \lambda_k)$ is said to be {\em affine} if $\lambda_1 =1$. Affine linear forms are important in this work for the following reason. If $\{L_1,\ldots,L_m\}  \subseteq \F^k$ and $A:\F^k \to \F^n$ is a uniform random affine transformation, then $(AL_1,\ldots,AL_m)$ has the same distribution as $(x_0+L_1(x_1,\ldots,x_k),\ldots,x_0+L_m(x_1,\ldots,x_k))$ where $x_0,\ldots,x_k$ are uniform and independent random vectors in $\F^n$. The vectors $(x_0+L_1(x_1,\ldots,x_k), \ldots, x_0+L_m(x_1,\ldots,x_k))$ can be considered as the application of affine linear forms to the variables $x_0,\ldots,x_k$.  On the other hand  if $\{L_1,\ldots,L_m\}  \subseteq \F^k$ are all affine linear forms, 
and $A:\F^k \to \F^n$ is a uniform random affine transformation, then $(AL_1,\ldots,AL_m)$ has the same distribution as $(L_1(x_1,\ldots,x_k),\ldots,L_m(x_1,\ldots,x_k))$ where $x_1,\ldots,x_k$ are uniform and independent random vectors in $\F^n$.


A \emph{system of linear forms} in $k$ variables is a finite set $\cL \subseteq \F^k$ of   linear forms  in $k$ variables. A system of linear forms is called \emph{affine} if it consists only of affine linear forms. For the reasons described in the previous paragraph we will mainly be interested in affine systems of linear forms. 


Given a function $f:\F^n \to \C$ and a system of linear forms $\cL=\{L_1,\ldots,L_m\} \subseteq \F^\ell$, define
\begin{equation}
\label{eq:avg_function}
t_\cL(f) = \E_{x_1,\ldots,x_\ell}\left[\prod_{L \in \cL} f(L(x_1,\ldots,x_\ell))\right].
\end{equation}


\begin{definition}\label{def:truecomplexity}
A system of linear forms $\cL = \{L_1,\dots,L_m\} \subseteq \F^\ell$  is said to be of true complexity at most $d$ if there exists a function $\delta:\R_+ \to \R_+$ such that $\lim_{\epsilon \to 0} \delta(\epsilon)=0$ and
$$\left|\E_{x_1,\dots,x_\ell \in \F^n} \left[ \prod_{i=1}^m
    f_i(L_i(x_1,\dots,x_\ell))\right] \right| \leq \min_i \delta(\|f_i\|_{U^{d+1}}),$$
for all $f_1, \dots, f_m : \F^n \to [-1,1]$.
\end{definition}


\begin{remark}
\label{rem:boundcomplexity}
It is known~\cite{GW10} that the true complexity of an affine system linear forms $\cL = \{L_1,\dots,L_m\}$ is at most $m$. 
\end{remark}

The following lemma shows that for a proper decomposition $f=f_1+f_2+f_3$, the average $t_\cL(f)$ is a good approximation of $t_\cL(f_1)$. 

\begin{lemma}
\label{lem:decompapprox}
Let $\mathcal{L} = \{L_1,\dots,L_m\}$ be a system of linear forms of true complexity at most $d$ and $\epsilon>0$ be a constant. Let $f:\F^n \to \bits$ be decomposed to $f=f_1+f_2+f_3$ according to \cref{thm:decomp}. Then
$$ |t_\cL(f) - t_\cL(f_1)| \le \epsilon,$$
provided that $\delta$ is sufficiently small and $\eta$ and $r$ grow sufficiently fast.
\end{lemma}
\begin{proof}
We can expand $t_\cL(f) = t_\cL(f_1 + f_2 + f_3)$ as
%
\[ \sum_{(i_j)_{j \in [m]} \in [3]^m} \E \left[ \prod_{j=1}^m f_{i_j}(L_j(x_1,\ldots,x_\ell)) \right]. \]
%
Most of the terms in this sum are negligible: If any $i_j = 2$, then from the decomposition theorem and the definition of true complexity we get that the summand is at most $\delta'(1/\eta(|\cB|))$, where $\delta'$ is from \cref{def:truecomplexity}. Additionally, if some $i_j = 3$, then an application of Cauchy-Schwarz bounds the summand by $\delta$. The only other term is precisely $t_\cL(f_1)$, so we get
%
\[ |t_\cL(f) - t_\cL(f_1)| \leq 3^m \max\{\delta'(1/\eta(|\cB|)),\delta\} = o_{\eta, r, \delta}(1). \]
\end{proof}


\section{Main results}

We  study the following notions of convergence. Given a function $f:\F^n \to \bits$ and an affine system of linear forms $\cL \subseteq \F^k$, we can select a random affine  transformation $A:\F^k \to \F^n$ uniformly, and then consider the restriction of $Af$ to the set $\cL$. This induces a probability distribution $\mu_f(\cL)$ over the set of functions $\{\cL \to \bits\}$.

\begin{definition}
A sequence of functions $\{f_i:\F^{n_i} \to \{0,1\}\}_{i \in \N}$ is called $d$-convergent if for every $k$ and every $\cL \subseteq \F^k$ of true complexity at most $d$, the probability distributions $\mu_{f_i}(\cL)$ converge.
\end{definition}

It follows from~\cref{rem:boundcomplexity} that $\{f_i:\F^{n_i} \to \{0,1\}\}_{i \in \N}$ is convergent if it is $d$-convergent for every $d$. Thus in the sequel, the term $\infty$-convergence means convergence. Our first goal is to find a representation for the limit of a $d$-convergent sequence.

\begin{definition}\label{def:GroupGd}
For every  $d \in \N \cup \{\infty\}$, denote $\G_d = \prod_{j=1}^d \prod_{k=0}^{\lfloor \frac{j-1}{p-1}\rfloor} \U_{k+1}^\N$. So every element in this group is of the form $a=(a_{j,k,i} :  j \in [d], k \in [0,\lfloor \frac{j-1}{p-1}\rfloor], i \in \N )$, with each $a_{j,k,i} \in \U_{k+1}$. 

A $d$-limit object is a measurable function $\Gamma:  \G_d \to [0,1]$.
\end{definition}

To be precise, $\G_d$ is a compact Abelian group with the topology generated by the open (clopen, even) sets which are the subsets of $\G_d$ obtained by fixing finitely many coordinates. The unique Haar probability measure on  $\G_d$ can be obtained by specifying it on these sets (in the obvious way) and extending it to $\G_d$ via Carath\'{e}odory's extension theorem.

In order to prove that $d$-limit objects correspond to the limits of $d$-convergent sequences, we need to define the probability distribution that a $d$-limit object induces on $\{\cL \to \bits\}$. First we need  another definition.


\begin{definition}[Consistency]\label{consistent}
Let $\cL = \{L_1, \dots, L_m\}$ be a system of linear forms.
A sequence of elements $b_1, \dots, b_m \in \T$ is said to be {\em
  $(d,k)$-consistent with $\cL$} if there exists a $(d,k)$-polynomial $P$ and a point $x$ such that $P(L_i(x))=b_i$ for every $i \in [m]$.

Given vectors $\mv{d}=(d_1, \dots, d_C)\in \Z_{>0}^C$ and $\mv{k} = (k_1,\dots,k_C) \in \Z_{\geq 0}^C$, a
sequence of vectors $\mv{b}_1, \dots, \mv{b}_m \in \T^C$ is said to be {\em $(\mv{d},\mv{k})$-consistent with $\cL$} if for every $i \in [C]$, the
elements $\mv{b}_{1}(i),\dots, \mv{b}_{m}(i)$ are $(d_i,k_i)$-consistent with $\cL$.
\end{definition}
If $\cB$  is a polynomial factor, the term {\em $\cB$-consistent with $\cL$} is a synonym for  {\em $(\mv{d},\mv{k})$-consistent with $\cL$} where  $\mv{d}=(d_1, \dots, d_C)$ and $\mv{k} = (k_1,\dots,k_C)$ are respectively the degree and depth sequences of polynomials defining $\cB$.  
  
A sequence of elements $\mv{b}_1, \dots, \mv{b}_m \in \G_d$ is \emph{consistent} with $\cL$ if for every $j \in [d]$,  $k \in [0,\lfloor \frac{j-1}{p-1}\rfloor]$, and $i \in \N$, the elements $\mv{b}_1(j,k,i),\ldots,\mv{b}_m(j,k,i)$ are $(j,k)$-consistent with $\cL$.

Consider a highly uniform polynomial factor $\cB$ of degree $d > 0$, defined by the collection of polynomials $P_1,\ldots,P_C:\F^n \to \T$, with respective degrees $d_1,\ldots,d_C$ and depths $k_1,\ldots,k_C$. Let $\cL = \{L_1,\ldots,L_m\}$ be an affine system of linear forms on $\ell$ variables. We are interested in the distribution of the random matrix
%
\begin{equation}\label{eq:plmatrix}
\begin{pmatrix}
P_1(L_1(X)) & P_2(L_1(X)) & \cdots & P_C(L_1(X)) \\
P_1(L_2(X)) & P_2(L_2(X)) & \cdots & P_C(L_2(X)) \\
\vdots & \vdots &  & \vdots \\
P_1(L_m(X)) & P_2(L_m(X)) & \cdots & P_C(L_m(X))
\end{pmatrix},
\end{equation}
%
where $X$ is a uniform random variable taking values in $\left(\F^n\right)^\ell$. Note that by the definition of consistency, for every $i \in [C]$, the $i$-th column of this matrix is always $(d_i,k_i)$-consistent with $\cL$. To prove our main result we will use the following equi-distribution theorem from \cite{BFHHL13}, which shows that if the factor $\cB$ is sufficiently uniform, then the distribution of \cref{eq:plmatrix} can be made arbitrarily close to uniform over matrices satisfying the consistency condition above.

\begin{theorem}[See Theorem~3.10 of \cite{BFHHL13}]\label{thm:affequid}
Given $\eps > 0$, let $\cB$ be an $\epsilon$-uniform polynomial factor of degree $d > 0$ and complexity $C$, that is defined by a tuple of
polynomials $P_1, \dots, P_C: \F^n
\to \T$ having respective degrees $d_1, \dots, d_C$ and respective
depths $k_1, \dots, k_C$.
Let $\cL=\{L_1, \dots, L_m\}$ be an affine system of linear forms on $\ell$ variables.

Suppose $\mv{b}_1, \dots, \mv{b}_m \in \T^{C}$  are atoms of $\cB$ that are
$\cB$-consistent with $\cL$. Then
$$
\Pr_{x_1, \dots, x_\ell}[\cB(L_j(x_1, \dots, x_\ell)) = \mv{b}_{j}~
 \forall j \in [m]] =
\frac{1}{K} \pm \eps,
$$
where $K$ is the number of  tuples $(\mv{b}_1, \dots, \mv{b}_m)$ that are
$\cB$-consistent with $\cL$. 
\end{theorem}


Consider a $d$-limit object $\Gamma:  \G_d \to [0,1]$. For an affine system of linear forms $\cL = \{L_1, \dots, L_m\}$,  select $\mv{b}_1, \dots, \mv{b}_m \in \G_d$ at random (according to the Haar measure on $\G_d$, defined following \cref{def:GroupGd}), conditioned on being consistent with $\cL$. Then define the random function $g:\cL \to \bits$ by setting  $g(L_i)=1$ with probability $\Gamma(\mv{b}_i)$ and $g(L_i)=0$ with probability $1-\Gamma(\mv{b}_i)$ independently for every $i \in [m]$. This introduces a probability measure $\mu_\Gamma(\cL)$ on the set of functions $\{\cL \to \bits\}$.

We say that a sequence of functions $\{f_i: \F^{n_i} \to \bits\}_{i \in \N}$ \emph{$d$-converges} to $\Gamma$ if for every affine system of linear forms $\cL$ of true complexity at most $d$, the probability measures $\mu_{f_i}(\cL)$ converge to $\mu_\Gamma(\cL)$.


\begin{theorem}[Main Theorem]
\label{thm:limit}
For every $d \in \N \cup \{\infty\}$, every $d$-convergent sequence  $d$-converges to a  $d$-limit object. On the other hand every $d$-limit object is the limit of a $d$-convergent sequence.
\end{theorem}

\section{Proof of the Main Theorem}
For a measurable $\Gamma:\G_d \to \C$, similar to (\ref{eq:avg_function}), we can define
$$t_\cL(\Gamma) := \E \left[\prod_{i \in [m]} \Gamma(\mv{b}_i)\right],$$
where $\mv{b}_1, \dots, \mv{b}_m \in \G_d$ are selected at random, conditioned on being consistent with $\cL$.

We start with a simple and standard observation that allows us to work with the averages $t_\cL(\cdot)$ rather than the the distributions $\mu_\cL$. 
\begin{observation}
For every $d \in \N \cup \{ \infty \}$, a sequence $\{f_i: \F^{n_i} \to \bits\}_{i \in \N}$  $d$-converges to a $d$-limit object $\Gamma$ if and only if  for every affine system of linear forms $\cL$ of true complexity at most $d$ we have $\lim_{i \to \infty} t_\cL(f_i) = t_\cL(\Gamma)$. 
\end{observation}
\begin{proof}
The forward direction here is clear: The value of $t_\cL(f_i)$ is determined by the distribution $\mu_{f_i}(\cL)$.

For the other direction, write $\mu := \mu_{f_i}(\cL)$ for some $i$ and some affine $\cL$ of true complexity at most $d$. We can identify $\mu$ with a function $\mu:\{0,1\}^{\cL} \rightarrow [0,1]$. Then, applying the Fourier transform (for the group $\F_2^{\cL}$), we can write
%
\[ \mu(x) = \sum_{S \subseteq \cL} \hat{\mu}(S) \chi_S(x), \]
%
so that $\mu$ is determined by the $\hat{\mu}(S)$. However, we have
%
\[ \hat{\mu}(S) = \E_{y\in \{0,1\}^{\cL}} \left[ \mu(y) \chi_S(y) \right] = \E_y \left[ \mu(y) (-1)^{\sum_{i \in S}y_i} \right] = \E_y \left[ \mu(y) \prod_{i \in S} (1-2y_i) \right]. \]
%
Expanding this product, we see that $\hat{\mu}(S)$ is a linear combination of terms of the form
%
\[ \E_y \left[\mu(y) \prod_{i \in S'} y_i\right] = t_{S'}(f_i), \]
%
where $S' \subseteq S \subseteq \cL$. Thus the distribution $\mu$ is determined by the values $t_{\cL'}(f_i)$, where $\cL' \subseteq \cL$ (and hence $\cL'$ has true complexity $\leq d$).
\end{proof}

\cref{thm:limit} follows from \cref{lem:firstpart} and~\cref{lem:secondpart}. 

\begin{lemma}
\label{lem:firstpart}
Let  $\{f_i: \F^{n_i} \to \bits\}_{i \in \N}$ be a $d$-convergent sequence. There exists a $d$-limit object $\Gamma$ such that $\lim_{i \to \infty} t_\cL(f_i)=t_\cL(\Gamma)$ for every affine system of linear forms $\cL$ of true complexity at most $d$.
\end{lemma}
\begin{proof}
Consider a decreasing sequence $\{\epsilon_i\}_{i \in \N}$ of positive reals tending to $0$. Let the parameters $\delta_i$, $\eta_i$, and $r_i$ be chosen as required by~\cref{lem:decompapprox} so that for every affine system of linear forms $\cL = \{L_1,\dots,L_m\}$ of true complexity at most $d$, if $i$ is sufficiently large, then the following holds:  
\begin{itemize}
\item[(i)]  $|t_\cL(f_i) - t_\cL(f_i^1)| \le \epsilon_i$ where $f_i = f_i^1 + f_i^2 + f_i^3$ is decomposed according to \cref{thm:decomp} with the parameters $\delta_i$, $\eta_i$, and $r_i$, and degree $d_i$, where $d_i=d$ if $d<\infty$, and $d_i=i$ if $d=\infty$.

\item[(ii)] The assertion of \cref{thm:affequid} is true with $\eps=\epsilon_i p^{-i d_iC}$ when applied to the factor $\cB$ in the decomposition  $f_i = f_i^1 + f_i^2 + f_i^3$. Here $C$ is the complexity of $\cB$.   
\end{itemize}

Decompose each $f_i$  as $f_i = f_i^1 + f_i^2 + f_i^3$ according to \cref{thm:decomp} with the above mentioned parameters. 
We have $f_i^1(x) = \tilde{\Gamma}_i(P_1^{i}(x),\ldots,P_C^{i}(x))$ for some function $\tilde{\Gamma}_i$ and polynomials $P_1^i,\ldots,P_C^i$. Considering the degrees and the depths of the polynomials, the function $\tilde{\Gamma}_i$ corresponds naturally to a $d$-limit object $\Gamma_i$: Indeed let $\phi: [C] \to \N \times \N \times \N$ be any injective map that satisfies $\phi(t)=(\deg(P_t^i),\depth(P_t^i),t)$ for every $t \in [C]$. Define $\pi: \G_d \to \T^C$  as $\pi:\mv{b} \to (\mv{b}(\phi(1)),\ldots,\mv{b}(\phi(C)))$, and let $\Gamma_i(\mv{b}) := \tilde{\Gamma}_i(\pi(\mv{b}))$ for every $\mv{b} \in \G_d$. 

Let $\cL = \{L_1,\dots,L_m\}$ be an affine system of linear forms of true complexity at most $d$, and let $i$ be sufficiently large.  
We will show that  $|t_\cL(f_i^1) - t_\cL(\Gamma_i)| \le \epsilon_i$. Choose $\mv{b}_1, \dots, \mv{b}_m \in \G_d$ randomly, conditioned on being consistent with $\cL$.  Since consistency is defined coordinate-wise, it follows that $(\pi(\mv{b}_1),\ldots,\pi(\mv{b}_m))$ is distributed uniformly conditioned on being $\cB$-consistent with $\cL$, and hence that $t_\cL(\tilde{\Gamma}_i) = t_\cL(\Gamma_i)$.

Now we can write
%
\[ t_\cL(f_i^1) = \E_{x} \left[ \prod_{L_j \in \cL} \tilde{\Gamma}_i(P_1^i(L_j(x)),\ldots,P_C^i(L_j(x)))\right] = \E \left[ \prod_{L_j \in \cL} \tilde{\Gamma}_i(y_j) \right], \]
%
where the $y_j$ are distributed as $(P_1(L_j(x)),\ldots,P_C(L_j(x)))$. The condition (ii) above shows that the distribution of $(\mv{b}_1,\ldots,\mv{b}_m)$, where the $\mv{b_j} \in \T^C$ are chosen uniformly and conditioned on being $\cB$-consistent with $\cL$, is $\epsilon$-close in total variation distance to that of $(P_1(L_j(x)),\ldots,P_C(L_j(x)))_{j\in[m]}$ when $x$ is chosen uniformly at random. This gives
%
\[ \E \left[ \prod_{L_j \in \cL} \tilde{\Gamma}_i(y_j) \right] \leq t_\cL(\tilde{\Gamma}_i) + p^{id_iC}\epsilon \]
%
since each $P_t^i$, $t \in [C]$, has degree at most $d_i$, hence there are at most $p^{id_iC}$ choices for the $y_j$ when $i \geq m$. So we have the desired approximation.

So far we have established that for every system of affine linear forms $\cL$, if $i$ is sufficiently large, then 
\begin{equation}
\label{eq:convGamma_f}
|t_\cL(f_i) - t_\cL(\Gamma_i)| \le 2\epsilon_i.
\end{equation}

Next we construct the limit object $\Gamma$. For every $t \in \N$ denote $\G_d^t = \prod_{j=1}^d \prod_{k=0}^{\lfloor \frac{j-1}{p-1}\rfloor} \U_{k+1}^t$. Note that $\G_d^t$ corresponds to a partition of  $\G_d$.  For every measurable $\Gamma:\G_d \to [0,1]$ and $t \in \N$, define $\mathcal{E}_t(\Gamma) = \E\left[\Gamma  \ | \ \G_d^t \right]$. Note that the set $\{\G_d^1 \to [0,1]\}$ is a compact space, and thus one can find a subsequence of $\{\Gamma_i\}_{i \in \N}$ such that $\mathcal{E}_1(\Gamma_i)$ for $i$ in this subsequence converges to a function $\mu_1: \G_d^1 \to [0,1]$. 
Now we restrict ourselves to this subsequence and consider $\mathcal{E}_2$. Again by compactness we can find a subsequence for which $\mathcal{E}_2(\Gamma_i)$ converges to a function $\mu_2:\G_d^2 \to [0,1]$. Continuing in the same manner we define $\mu_t:\G_d^t \to [0,1]$ for every $t$. Note that since we restricted to a subsequence at every step, we have $\E[\mu_t | \G_d^r ]=\mu_r$ for every $r<t$. Furthermore, by picking the first element from the first subsequence, the second element from the second subsequence, and so on, we obtain a subsequence $\Gamma'_1,\Gamma'_2,\ldots$ of the original sequence that satisfies $\lim \E\left[\Gamma'_i  \ | \ \G_d^t \right] = \mu_t$ for every $t \in \N$. 

The measure $\mu_t$ is a $\sigma$-finite measure over the atoms $\G_d^t$, and thus by Carath\'eodory's extension theorem, there is a unique measure (also $\sigma$-finite) $\mu$ on $\G_d$ such that $\E[\mu | \G_d^t] = \mu_t$ for every $t$. Now let $\nu$ denote the Haar probability measure on $\G_d$ (described after \cref{def:GroupGd}), and note that for any $t$ and any particular $\Gamma_i$ we have $\E[\Gamma_i \ | \G_d^t ] \leq 1$. Since $\mu_t$ is a limit (over a subsequence) of these averages, we have $\mu_t(A) \leq \nu(A)$ for every $A \subseteq \G_d^t$. It follows that $\mu(A) \leq \nu(A)$ for any $\mu$-measurable $A \subseteq \G_d$. In particular, $\mu$ is absolutely continuous with respect to $\nu$. Let  $\Gamma: \G_d \to [0,1]$ be the Radon-Nikodym derivative of $\mu$.

Note that as $\lim \E\left[\Gamma'_i  \ | \ \G_d^t \right] = \mu_t$, the sequence of $\Gamma'_i$ converge to $\Gamma$ in $L_1$, and consequently $\lim t_\cL(\Gamma_i') = t_\cL(\Gamma)$. We showed in (\cref{eq:convGamma_f}) that $\lim t_\cL(f_i) = \lim t_\cL(\Gamma_i)$, and since the former limit exists by assumption, it follows that $t_\cL(\Gamma) = \lim t_\cL(f_i)$. 
\end{proof}

Before we can prove the second part of \cref{thm:limit}, we need a simple lemma which shows the existence of collections of high rank polynomials of arbitrary degree and depth sequence.

\begin{lemma}
\label{lem:ddrank}
Let $\mv{d}=(d_1, \dots, d_C)\in \Z_{>0}^C$ and $\mv{k} = (k_1,\dots,k_C) \in \Z_{\geq 0}^C$ satisfy $0 \le k_i \le \lfloor \frac{d_i-1}{p-1}\rfloor$ for every $i$, and let $r$ be a positive integer. There exists a set of polynomials $P_1,\ldots,P_C$ of rank at least $r$ such that $P_i$ is of degree $d_i$ and depth $k_i$ for every $i \in [C]$.
\end{lemma}
\begin{proof}
Let $r'$ be an integer. For each $i$, let $m_i$ satisfy $d_i = m_i + (p-1)k_i$. Allot variables $x_1^i,\ldots,x_{m_i r'}^i$ for exclusive use by $P_i$, and let $P_i = (1/p^{k_i + 1})(x_1^i\cdots x_{m_i}^i + \cdots + x_{m_i(r'-1) + 1}^i\cdots x_{m_i r'}^i)$. It is clear that $P_1,\ldots,P_C$ has the desired degree and depth sequence. For sufficiently large $n$ we have enough variables to do this, and it is standard that for sufficiently large $r'$, these polynomials will have rank at least $r$.
\end{proof}

With this in hand, we can now complete the proof of \cref{thm:limit}.

\begin{lemma}
\label{lem:secondpart}
Let  $d \in \N \cup \{\infty\}$, and let $\Gamma$ be a $d$-limit object. There exists a  $d$-convergent  sequence of functions  $\{f_i: \F^{n_i} \to \bits\}_{i \in \N}$ whose limit is $\Gamma$.
\end{lemma}
\begin{proof}
For every $t \in \N$, define the function $\Gamma_t:\G_d \rightarrow [0,1]$ to be the function obtained from $\E \left[ \Gamma \mid \G_d^t \right]$ (a map from $\G_d^t$ to $[0,1]$) by extending it to a function on $\G_d$. The $\Gamma_t$ converge to $\Gamma$ in $L_1$ norm, and each $\Gamma_t$ depends on only a finite number of coordinates of $\G_d$. Let $\mv{d}_t = (d_1^t,\ldots,d_C^t)$ and $\mv{k}_t = (k_1^t,\ldots,k_C^t)$ be the degree and depth sequences corresponding to the coordinates of $\G_d$ used by $\Gamma_t$.

For every $r \in \N$, we can apply \cref{lem:ddrank} to get a collection of polynomials $P_1,\ldots,P_C$ of rank $\geq r$ such that $P_i$ has degree $d_i^t$ and depth $k_i^t$ for every $i$. Now define the function $f_t^r: \F^{n_r} \rightarrow [0,1]$ by letting $f_t^r(x) = \Gamma(P_1(x),\ldots,P_C(x))$, where we treat $(P_1(x),\ldots,P_C(x))$ as an element of $\G_d$. It follows from \cref{thm:affequid} by the same argument used in the proof of \cref{lem:firstpart} that we have $t_{\cL}(f_t^r) \rightarrow_r t_\cL(\Gamma_t)$ for every affine $\cL$ of true complexity at most $d$. Taking a suitable diagonal subsequence of the $f_t^r$, we obtain a sequence of functions $f_i:\F^{n_i} \rightarrow [0,1]$ with $t_{\cL}(f_i) \rightarrow_i t_\cL(\Gamma)$ for every affine $\cL$ of true complexity at most $d$.

To complete the proof, consider the random functions $f_i':\F^{n_i} \rightarrow \bits$ where $f_i'(x)$ takes value 1 with probability $f_i(x)$. It is well known that these $d$-converge to $\Gamma$ with probability 1, and hence the existence of a $d$-convergent sequence converging to $\Gamma$ is evinced.
\end{proof}

The use of \cref{thm:limit} at this point is mainly one of convenience. Tools like \cref{thm:decomp} and \cref{thm:affequid} afford one the luxury to make simplifying assumptions about a function while incurring an arbitrarily small error. Working with $d$-limit objects here is effectively allowing us to replace `arbitrarily small' with 0. Similarly, we can substitute `arbitrarily high rank' polynomials for `infinite rank' ones. These kinds of assumptions can eliminate a significant amount of bookkeeping in proofs. Then, with a result regarding $d$-limit objects, the finite results are usually obtained for free (through \cref{thm:limit}). The state of affairs in the graph limit theory is mostly the same, although many conjecture that there should be some non-cosmetic benefit to the limit approach.

\section{Necessary depths}
The following inverse theorem characterizes functions with large $U^{d+1}$ norm by showing that they must correlate highly with a degree $\leq d$ (non-classical) polynomial.

\begin{theorem}[Theorem 1.11 of \cite{TZ12}]\label{thm:inverse}
Suppose $\delta > 0$ and $d \geq 1$ is an integer. There exists an
$\eps = \eps_{\ref{thm:inverse}}(\delta,d)$ such that the following
holds. For every function $f: \F^n \to \mathbb{D}$ with $\|f\|_{U^{d+1}} \geq \delta$, there exists a polynomial $P:
\F^n \to \T$ of degree $\leq d$ that is $\eps$-correlated with $f$,
meaning
$$\left|\E_{x \in \F^n} f(x) \expo{-P(x)} \right| \geq \eps.$$
\end{theorem}

Note that every polynomial that is not classical must have degree at least $p$. It is known that polynomials of degree $d=p$ that are not classical are unnecessary in higher-order Fourier analysis. More precisely in~\cref{thm:inverse}, for the case $d=p$, one can assume that the polynomial $P:\F^n \to \T$ in the statement of the theorem is a \emph{classical} polynomial of degree at most $p$. This can be carried further through \cref{thm:decomp} and then to the definition of the $d$-limit object. We will elaborate on this below, but first let us prove a generalization of this fact, which says that the polynomials of maximum possible depth are unnecessary in higher-order Fourier analysis. 

\begin{lemma}
\label{lem:redundepth}
Every $(1+k(p-1),k)$-polynomial $P:\F^n \to \T$ can be expressed as a function of a $(1+k(p-1),k-1)$-polynomial, a $(1+(k-1)(p-1),k-1)$-polynomial, and a $(k(p-1),k-1)$-polynomial.
\end{lemma}
\begin{proof}
By \cref{struct}, we have $P(x_1,\ldots,x_n) = \frac{\sum c_i |x_i|}{p^{k+1}}+ R(x_1,\ldots,x_n) \mod 1$ for integers $0 \le c_i \le p-1$, where $R$ is a $(1+k(p-1),k-1)$-polynomial. Let $M:= \sum c_i |x_i|$, and let $0 \le a<p^k$ and $b \in [p-1]$ be the unique integers satisfying $M \equiv a+bp^k \mod p^{k+1}$. The value of $P$ is fixed by the three values $a$, $b$ and $R$. The value of $a$ is determined by the value of the  $(1+(k-1)(p-1),k-1)$-polynomial  $\frac{M}{p^{k}} \mod 1$. Furthermore knowing $a$, the value of $b$ is determined by the value of the $\frac{M^p-M}{p^{k+1}} \mod 1$.   Indeed 
\begin{equation}
\label{eq:dist}
bp^k \equiv (a+bp^k)^p-(a+bp^k) - (a^p-a)   \mod p^{k+1}.
\end{equation}
It remains to show that $\frac{M^p-M}{p^{k+1}} \mod 1$ is a $(1+k(p-1),k-1)$-polynomial. Since degree and depth are invariant under affine transformations, it suffices to show that $Q:=\frac{|x_1|^p - |x_1|}{p^{k+1}} \mod 1$ is a $(k(p-1),k-1)$-polynomial. By Fermat's little theorem $p^kQ =0$, and thus $Q$ is of depth $k-1$. Furthermore, the identity $|x_1|(|x_1|-1)\ldots(|x_1|-p+1) = 0$ allows us to replace $|x_1|^p$ with a polynomial of degree $p-1$. This shows that $Q$ is of degree at most $(p-1)+(p-1)(k-1)=k(p-1)$. 
\end{proof}

It follows from~\cref{lem:redundepth} that in \cref{thm:decomp}, $(1+k(p-1),k)$-polynomials can be avoided in the polynomials defining the factor $\cB$. Consequently, every $d$-convergent sequence converges to a $d$-limit object $\phi:\G_d \to [0,1]$ such that $\phi$ does not depend on the coordinates that correspond to $(1+k(p-1),k)$-polynomials. Next we will show that there are no other values of $(d,k)$ that behave similarly. That is, for which every $(d,k)$-polynomial can be expressed as a function of a constant number of polynomials of either degree $d$ and depth $<k$, or degree $<d$. We  need  the following theorem of \cite{TZ12}. 

\begin{theorem}[See Theorem 4.1 of \cite{TZ12}]\label{thm:ppower}
Let $d>p$ be an integer, and $\epsilon>0$. There exists a $\rho=\rho_{\ref{thm:ppower}}(\epsilon,d)$ such that the following holds for sufficiently large $n$. If $P:\F^n \to \T$ is a polynomial of degree $d$ with $\norm{\expo{P}}_{U^{d}} \ge \epsilon$, then $pP:\F^n \to \T$ is a polynomial of degree $\leq d-p+1$ that satisfies 
$$\norm{\expo{pP}}_{U^{d-p+1}} \ge \rho.$$
\end{theorem}
%
The following lemma implies that unless $d$ and $k$ are as in \cref{lem:redundepth}, for every constant $C$, there exists a $(d,k)$-polynomial that cannot be expressed as a function of  $C$  polynomials, each of either degree $d$ and depth $<k$, or of degree $<d$. The polynomial that we use in the proof of the lemma is reminiscent of the so called generalized inner product polynomial that was used by  Babai, Nisan, and Szegedy~\cite{BNS} for similar purposes (see also~\cite{ViolaWigderson}).

\begin{lemma}
Let $2 \leq m \leq p-1$ be an integer, and $\epsilon>0$. Then for every $k \ge 0$, defining $d=m+k(p-1)$, there exists a degree $(d, k)$-polynomial $Q$ such that 
\begin{equation}
\label{eq:lowcorr}
|\langle \expo{Q},\expo{R_1+R_2} \rangle| < \epsilon,
\end{equation}
 for any polynomial $R_1$ of degree at most $d$ and depth less than $k$, and any polynomial $R_2$ of degree at most $d-1$. 
\end{lemma}
\begin{proof}
Let 
%
$$P=\sum_{i=0}^{\lfloor n/m \rfloor-1} |x_{im+1}|\ldots  |x_{im+m}|.$$ 
%
Set $\epsilon_k=\epsilon$, and for every $0 \le i \le k$,  let $\epsilon_i \in (0,\epsilon)$ be constants satisfying 
%
$$\epsilon_{i} < \epsilon_{\ref{thm:inverse}}(\rho_{\ref{thm:ppower}}(\epsilon_{i+1},d), d).$$ 
%
We show by induction on $i$ that if $n$ is sufficiently large, then the $(m+i(p-1),i)$-polynomial $Q=\frac{P}{p^{i+1}} \mod 1$ satisfies the desired property with parameter $\epsilon_i$ in (\ref{eq:lowcorr}) instead of $\epsilon$. 

We first look at the classical case $i=0$.  Notice that in this case by taking $n$ to be sufficiently large, we can guarantee that $\|\expo{Q}\|_{U^d}$ is sufficiently small, and this implies that the correlation of $Q$ with any polynomial of degree lower than $m+i(p-1)=m$ is smaller than $\epsilon_0$. 

Now let us consider the case $i>0$. Assume for the sake of contradiction that  $|\langle \expo{\frac{P}{p^{i+1}}},\expo{R_1+R_2} \rangle| \ge \epsilon_i$ for a polynomial $R_1$ of degree at most $d_i=m+i(p-1)$ and depth $<k$, and a polynomial $R_2$ of degree $\le d_i-1$. This in particular implies $\norm{\expo{\frac{P}{p^{i+1}}-R_1-R_2} }_{U^{d_i}} \ge \epsilon_i$. Note that $\frac{P}{p^{i+1}}-R_1-R_2 \mod 1$ has degree $d_i>p$, and thus we can apply \cref{thm:ppower} to  conclude that 
$$
\norm{ \expo{p(P/p^{i+1} -R_1-R_2)} }_{U^{d_i-p+1}} \ge \rho\left(\norm{\expo{P/p^{i+1} -R_1-R_2}}_{U^{d_i}}\right) \ge \rho_{\ref{thm:ppower}}(\epsilon_i,d_i).
$$
Therefore by \cref{thm:inverse}  there exists a polynomial $R'$ of degree at most $d_i-p$ such that 
%
$$\left|\left\langle \expo{p(P/p^{i+1} -R_1-R_2)}, \expo{R'} \right\rangle\right| > \epsilon_{\ref{thm:inverse}}(\rho_{\ref{thm:ppower}}(\epsilon_i,d_i),d_i-p) \ge \epsilon_{\ref{thm:inverse}}(\rho_{\ref{thm:ppower}}(\epsilon_i,d_i),d_i) \ge \epsilon_{i-1}.$$  
%
It follows that $\left|\left\langle \expo{P/p^i}, \expo{pR_1+pR_2+ R'} \right\rangle\right| > \epsilon_{i-1}$, which contradicts our induction hypothesis. 
\end{proof} 


\paragraph{Concluding remarks.}
We conclude with an open question regarding the uniqueness of the limit object.  Suppose that two limit objects $\Gamma_1,\Gamma_2$ satisfy $t_{\cL}(\Gamma_1)=t_{\cL}(\Gamma_2)$ for every affine system of linear forms $\cL$.  Then how do $\Gamma_1$ and $\Gamma_2$ relate to each other?

% The answer to the analogous question in the context of the limits of dense graphs is known. There, the limit objects are represented as so called graphons, which are symmetric measurable functions $W:[0,1]^2 \to [0,1]$. It is shown in~\cite{BCL10} that two graphons $W_1,W_2$ satify $t_H(W_1) = t_H(W_2)$ for every graph $H$ if and only if there exists a third graphon $W$ and two measure preserving maps $\sigma_1,\sigma_2:[0,1] \to [0,1]$ such that for $i=1,2$, we have $W_i(x,y)=W(\sigma_i(x),\sigma_i(y))$ almost everywhere. 

%\newpage
%\bibliographystyle{alpha}
%\bibliography{linearlimits}

\newcommand{\etalchar}[1]{$^{#1}$}
\begin{thebibliography}{7}

\bibitem[BFH{\etalchar{+}}13]{BFHHL13}
Arnab Bhattacharyya, Eldar Fische, Hamed Hatami, Pooya Hatami, and Shachar
  Lovett.
\newblock Every locally characterized affine-invariant property is testable.
\newblock In {\em Proc.\ 45th Annual ACM Symposium on the Theory of Computing},
  2013.
\newblock Full version at \arxiv{1212.3849}. 

\bibitem[BFL13]{BFL12}
Arnab Bhattacharyya, Eldar Fischer, and Shachar Lovett.
\newblock Testing low complexity affine-invariant properties.
\newblock In {\em Proc.\ 23rd {ACM}-{SIAM} {S}ymposium on {D}iscrete
  {A}lgorithms}, 2013.

\bibitem[BNS92]{BNS}
L{\'a}szl{\'o} Babai, Noam Nisan, and M{\'a}ri{\'o} Szegedy.
\newblock Multiparty protocols, pseudorandom generators for logspace, and
  time-space trade-offs.
\newblock {\em J. Comput. System Sci.}, 45(2):204--232, 1992.
\newblock Twenty-first Symposium on the Theory of Computing (Seattle, WA,
  1989).

\bibitem[GW10]{GW10}
William~T. Gowers and Joseph~A. Wolf.
\newblock The true complexity of a system of linear equations.
\newblock {\em Proc. Lond. Math. Soc. (3)}, 100(1):155--176, 2010.

\bibitem[LS06]{LS06}
L{\'a}szl{\'o} Lov{\'a}sz and Bal{\'a}zs Szegedy.
\newblock Limits of dense graph sequences.
\newblock {\em J. Combin. Theory Ser. B}, 96(6):933--957, 2006.

\bibitem[TZ12]{TZ12}
Terence Tao and Tamar Ziegler.
\newblock The inverse conjecture for the {G}owers norm over finite fields in
  low characteristic.
\newblock {\em Ann. Comb.}, 16(1):121--188, 2012.

\bibitem[VW08]{ViolaWigderson}
Emanuele Viola and Avi Wigderson.
\newblock Norms, {XOR} lemmas, and lower bounds for polynomials and protocols.
\newblock {\em Theory Comput.}, 4:137--168, 2008.

\end{thebibliography}

\end{document}


%%% Local Variables:
%%% mode: latex
%%% TeX-master: t
%%% End: