
\documentclass[12pt]{article}
\usepackage{e-jc}

\usepackage{amssymb} %Box
\usepackage{amsmath} %Operator
\usepackage{amsthm} %proof

\usepackage[colorlinks=true,citecolor=black,linkcolor=black,urlcolor=blue]{hyperref}

\newcommand{\arxiv}[1]{\href{http://arxiv.org/abs/#1}{\texttt{arXiv:#1}}}

\theoremstyle{plain}
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{fact}[theorem]{Fact}
\newtheorem{observation}[theorem]{Observation}
\newtheorem{claim}[theorem]{Claim}

\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{example}[theorem]{Example}
\newtheorem{conjecture}[theorem]{Conjecture}
\newtheorem{open}[theorem]{Open Problem}
\newtheorem{problem}[theorem]{Problem}
\newtheorem{question}[theorem]{Question}

\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{note}[theorem]{Note}


\newcommand{\eps}{\varepsilon}

\newcommand{\weight}{w}
\DeclareMathOperator{\rank}{rk}%rank
\DeclareMathOperator{\nullity}{nl}%nullity
\DeclareMathOperator{\PWE}{PWE}%poly-weighted edge subset
\DeclareMathOperator{\PWV}{PWV}%poly-weighted vertex subset
\DeclareMathOperator{\PHE}{PHE}%poly-weighted hyperedge subset
\DeclareMathOperator{\subscriptsbf}{}%single bond flip chain
\newcommand{\Msbf}{{\mathcal{M}_{\subscriptsbf}}}
\newcommand{\Psbf}{{P_{\subscriptsbf}}}
\newcommand{\Osbf}{{\Omega_{\subscriptsbf}}}
\newcommand{\pisbf}{{\pi_{\subscriptsbf}}}
\DeclareMathOperator{\subscriptssf}{}%single site flip chain
\newcommand{\Mssf}{{{\mathcal{M}}_{\subscriptssf}'}}
\newcommand{\Pssf}{{P_{\subscriptssf}'}}
\newcommand{\Ossf}{{\Omega_{\subscriptssf}'}}
\newcommand{\pissf}{{\pi_{\subscriptssf}'}}
\DeclareMathOperator{\cp}{cp}%canonical paths
\DeclareMathOperator{\lw}{lw}%linear-width
\DeclareMathOperator{\pw}{pw}%path-width
\DeclareMathOperator{\tw}{tw}%tree-width
\DeclareMathOperator{\vs}{vs}%vertex-separation
\DeclareMathOperator{\cw}{cw}%cut-width
\DeclareMathOperator{\bw}{bw}%branch-width
\DeclareMathOperator{\inj}{inj}%injection
\newcommand{\maxedgesize}{\eta}%rank of hypergraph, size of largest edge


\title{\bf Subset Glauber dynamics on \\ graphs, hypergraphs and matroids \\of bounded tree-width\footnote{A preliminary version of this work appeared in the Proceedings of the 38th International Colloquium on Automata, Languages and Programming (ICALP 2011)~\cite{BoKa11}. Amongst other elements, the extensions to hypergraphs and matroids are new. This work was supported by the Engineering and Physical Sciences Research Council [grant number EP/G066604/1].}}

\author{Magnus Bordewich\\
\small School of Engineering and Computing Sciences\\[-0.8ex]
\small Durham University\\[-0.8ex] 
\small Durham, UK\\
\small\tt m.j.r.bordewich@durham.ac.uk\\
\and
Ross J. Kang\footnote{This work was completed while this author was employed at Durham University.}\\
\small IMAPP\\[-0.8ex]
\small Radboud University Nijmegen\\[-0.8ex]
\small Nijmegen, Netherlands\\
\small\tt ross.kang@gmail.com
}

\date{\dateline{Mar 15, 2014}{Oct, 2014}\\
\small Mathematics Subject Classifications: 05C85, 68R10, 60J10, 05C31, 68W20, 68W25}


\begin{document}

\maketitle


\begin{abstract}
Motivated by the `subgraphs world' view of the ferromagnetic Ising model, we analyse the mixing times of Glauber dynamics based on subset expansion expressions for classes of graph, hypergraph and matroid polynomials.  With a canonical paths argument, we demonstrate that the chains defined within this framework mix rapidly upon graphs, hypergraphs and matroids of bounded tree-width. 

This extends known results on rapid mixing for the Tutte polynomial, adjacency-rank ($R_2$-)polynomial and interlace polynomial. In particular Glauber dynamics for the $R_2$-polynomial was known to mix rapidly on trees, which led to hope of rapid mixing on a wider class of graphs. We show that Glauber dynamics for a very wide class of polynomials mixes rapidly on graphs of bounded tree-width, including many cases in which the Glauber dynamics does not mix rapidly for all graphs.  This demonstrates that rapid mixing on trees or bounded tree-width graphs does not offer strong evidence towards rapid mixing on all graphs.

  \bigskip\noindent \textbf{Keywords:} Markov chain Monte Carlo; graph polynomials; tree-width; canonical paths; approximate counting
%Tutte polynomial, interlace polynomial, random cluster model, rapid mixing, hypergraph polynomials, matroid polynomials

\end{abstract}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Introduction}


We analyse a subset-sampling Markov chain on graphs (and later hypergraphs and matroids) that is derived from subset expansion\footnote{The term `subset expansion' was coined by Gordon and Traldi~\cite{GoTr90}, though it is a special type of `states model expansion' which is commonly applied in statistical physics.} graph functions, which include many well-known graph polynomials. We show that this chain mixes rapidly on graphs of constant tree-width. 

An {\em edge subset expansion formula} for a graph function $\mathcal{P}$ is written as follows: for any graph $G = (V,E)$,%hypergraph
\begin{align}
\label{eqn:polyedge}
\mathcal{P}(G)
= \sum_{S\subseteq E}
\weight((V,S))
\end{align}
for some graph function $\weight$, where $(V,S)$ denotes the graph with vertex set $V$ and edge set $S$. %hypergraph
If the function $\weight$ is non-negative, that is, $\weight(G) \ge 0$ for all graphs $G$, we refer to~\eqref{eqn:polyedge} as an {\em edge subset weighting for $\mathcal{P}$} and to $\weight$ as its {\em weight function}.  In fact, we shall need the weight function to be {\em positive} --- from a statistical physics viewpoint, this results in a so-called `soft-core model'. One example of such a function that is prominent in statistical physics, theoretical computer science, and discrete probability is the {\em random cluster model partition function}, which can be defined for any $G = (V,E)$ and parameters $q,\mu$ as
\begin{align}
\label{eqn:RCpoly}
Z_{RC}(G;q,\mu) := \sum_{S\subseteq E}q^{\kappa(S)}\mu^{|S|},
\end{align}
where $\kappa(S)$ is the number of components in $(V,S)$.  For more on the random cluster model, see an extensive treatise by Grimmett~\cite{Gri06}.
Notice that, if $q, \mu \ge 0$, then $\weight((V,S)):=q^{\kappa(S)}\mu^{|S|}$ provides an edge subset weighting for $Z_{RC}(G;q,\mu)$.

A common approach to approximating graph functions expressed by subset expansion formulae is through Markov chain Monte Carlo sampling (MCMC). A Markov chain with state space $\Omega = \{S:S\subseteq E\} (=2^E)$ and stationary distribution $\pi(\cdot) \propto w((V,\cdot))$ can be used to sample subsets of the edges. In this approach, an efficient approximation scheme relies on rapid convergence of the Markov chain to its stationary distribution --- so-called rapid mixing. In our setting, a natural Markov chain is Glauber dynamics~\cite{Gla63}, the single bond flip chain, in which possible transitions are the addition or removal of one edge from the current subset (state), with transition probabilities designed to produce the desired stationary distribution.
In addition to being a principal tool in the design of efficient approximation schemes for counting problems, MCMC has seen widespread use in computational physics, computational biology, machine learning, and statistics.
There have been steady advances in our understanding of such random processes and in showing how quickly they generate good approximations of useful probability distributions in huge, complex data sets.
See the lecture notes of Jerrum~\cite{Jer03} or a survey by Randall~\cite{Ran06} for an overview of the application of these techniques in theoretical computer science.


We postpone the precise statement of our main result, Theorem~\ref{thm:mainedge,tw}, as it requires a host of definitions, but here we give a cursory description.
We show that when the weight function $w$ of some subset expansion formula is strictly positive and {\em $\lambda$-multiplicative} (as defined in Subsection~\ref{subsec:mild}), then Glauber dynamics is rapidly mixing on graphs of bounded tree-width. Roughly speaking, the condition of \mbox{$\lambda$-multiplicativity} is that the weight function is multiplicative with respect to the operation of disjoint graph union as well as ``nearly multiplicative'' with respect to the operation of composition via small vertex cuts. For now, we remark that many important graph polynomials (e.g.~$Z_{RC}(G;q,\mu)$) obey it. The results may be extended to cover hypergraphs and matroids, and polynomials expressed by vertex subset expansion rather than edge subset expansion.  Our general approach is an extension of work by Ge and \v{S}tefankovi\v{c}~\cite{GeSt12} (see also an earlier version~\cite{GeSt09+}), which showed that the Markov chain for the (soft-core) random cluster model --- i.e.~weighted according to~\eqref{eqn:RCpoly} --- mixes rapidly upon graphs of bounded tree-width. 


Our work started with the $R_2$-polynomial; Ge and \v{S}tefankovi\v{c} introduced both this polynomial and its associated Glauber dynamics in an attempt to devise an approximation scheme for \#BIS, the problem of counting the number of independent sets in a bipartite graph. 
Their {\em adjacency-rank polynomial} is defined for any $G = (V,E)$ and parameters $q,\mu$ as
\begin{align}
\label{eqn:R2poly}
R_2(G;q,\mu) := \sum_{S\subseteq E}q^{\rank_2(S)}\mu^{|S|},
\end{align}
where $\rank_2(S)$ is the $\mathbb{F}_2$-rank of the {\em adjacency} matrix for $(V,S)$.
Using a combinatorial interpretation of $\rank_2$ applicable only to bipartite graphs, they showed that the edge subset Glauber dynamics (using the weighting in~\eqref{eqn:R2poly}) mixes rapidly on trees. 
They conjectured that the chain mixes rapidly on all bipartite graphs, cf.~Conjecture~1 in~\cite{GeSt09+}. Our initial objective was to show that the chain mixes rapidly not only on trees, but also on all bipartite graphs of bounded tree-width. We achieved this and more: indeed, we have shown that the $R_2$-polynomial fits in our framework without recourse to the combinatorial interpretation for bipartite graphs, and hence that the Markov chain for the $R_2$-polynomial mixes rapidly upon all graphs of bounded tree-width. However, in extending the approach of Ge and \v{S}tefankovi\v{c} even further to all  {$\lambda$-multiplicative} weight functions, we have highlighted its generality with regard to polynomials and its dependence on bounded tree-width. Since the proof approach works for such a great range of chains that do not mix rapidly in general, it seems that the approach will not extend to larger graph classes without significant alteration. Thus the fact that Glauber dynamics associated with the $R_2$-polynomial mixes rapidly on trees cannot be taken as support for the conjecture that it mixes rapidly on all graphs. 
Indeed, the conjectured rapid mixing of their chain on all bipartite graphs has since been disproved by Goldberg and Jerrum~\cite{GoJe12a}. 

The structure of this paper is as follows. In Subections~\ref{sub:context} and~\ref{sub:extensions}, we present further background to the work, to graph polynomials and their computation, as well as extensions.  In Section~\ref{sec:definitions}, we give the definitions that are necessary for a detailed description of the main theorem.  We give the main theorem in Section~\ref{sec:results} and then indicate some of its consequences.  We present the proofs in Section~\ref{sec:proof}.  In Section~\ref{sec:induced}, we extend our results to Glauber dynamics on vertex subsets, that is, on induced subgraphs.  For edge subset Glauber dynamics, we outline an extension to hypergraphs in Section~\ref{sec:hyper}.  We give an indication of how to extend the approach to matroids in Section~\ref{sec:matroid}.







\subsection{Context}\label{sub:context}


The random cluster model has been widely studied both as given in~(\ref{eqn:RCpoly}), and as the Tutte polynomial of a graph~\cite{Tut54}. Under a suitable transformation, $Z_{RC}(G;q,\mu)$ is equivalent to the Tutte polynomial, defined for any $G = (V,E)$ and parameters $x,y$ as
\begin{align}
\label{eqn:Tuttepoly}
T(G;x,y) := \sum_{S\subseteq E}(x-1)^{r(E)-r(S)}(y-1)^{|S|-r(S)},
\end{align}
where $r(S)$ is the $\mathbb{F}_2$-rank of the incidence matrix for $(V,S)$.
A wealth of combinatorial and structural information can be obtained from evaluations of this function. Indeed, this polynomial has a remarkable universality property, which informally speaking says that it subsumes any graph invariant that can be computed by deletion and contraction of edges~\cite{OxWe79}, cf.~\cite{WeMe00}.  In addition, the Tutte polynomial specialises to several key univariate graph polynomials, including the chromatic polynomial of  Birkhoff~\cite{Bir12}. 
It specialises to the Jones polynomial in knot theory~\cite{Jon85}.
By its connection with the random cluster model, it also generalises the partition functions of the Ising~\cite{Isi25} and Potts~\cite{Pot52} models\footnote{If $x,y\ge1$ or $q,\mu\ge0$, then, respectively, $T(G;x,y)$ or $Z_{RC}(G;q,\mu)$ generalise the partition functions of the {\em ferromagnetic} Ising and Potts models.}. Consult the monograph of Welsh~\cite{Wel93} for more on these crucial connections.
In addition to $Z_{RC}(G;q,\mu)$ and $T(G;x,y)$, we shall highlight a few other specific polynomials from the literature, but for a broad account of the development of graph polynomials, consult the recent surveys by Makowsky~\cite{Mak08} and by Ellis-Monaghan and Merino~\cite{ElMe11a,ElMe11b}.

It was shown in 1990 by Jaeger, Vertigan and Welsh~\cite{JVW90} that, in general, for fixed (rational) values of $x$ and $y$, the evaluation of $T(G;x,y)$ is \#P-hard, except on a few special points and curves in the $(x,y)$-plane.  As a result, there have been substantial efforts since then to pin down the {\em approximation} complexity of computing $T(G;x,y)$. For large swaths of the $(x,y)$-plane, it is now known that the computation of $T(G;x,y)$ either does not admit a fully polynomial-time randomised approximation scheme (FPRAS) unless RP = NP, or is at least as hard as \#BIS (the problem of counting independent sets in bipartite graphs) under approximation-preserving reductions, cf.~Goldberg and Jerrum~\cite{GoJe12b}.
The sole positive approximation result applicable to general graphs is the breakthrough FPRAS, using MCMC, by Jerrum and Sinclair~\cite{JeSi93} for the partition function of the ferromagnetic Ising model --- this corresponds to computation of $T(G;x,y)$ along the portion of the parabola $(x-1)(y-1)=2$ with $y > 1$.
Various approaches have led to efficient approximations in some regions of the Tutte plane for specific classes of graphs --- cf.~e.g.~Alon, Frieze and Welsh~\cite{AFW95}, Karger~\cite{Kar01}, and Bordewich~\cite{Bor04}.





The polynomials and Markov chains that we capture in our framework are defined for all graphs; however, we obtain rapid mixing results only on classes of graphs of bounded (constant) tree-width.
For brevity, we do not define tree-width here, but merely say that it is an essential concept in structural graph theory and parameterised complexity --- see modern surveys on the topic by Bodlaender~\cite{Bod06} and Hlin\v{e}n\'y et al.~\cite{HOSG08}. 
The restriction of tree-width is commonly used in graph algorithms to reduce the complexity of a computationally difficult problem, usually by way of dynamic programming.
For example, it is already known that many of the polynomials covered here can be exactly evaluated efficiently for graphs of bounded tree-width.  Independently, Andrzejak~\cite{And98} and Noble~\cite{Nob98} exhibited polynomial-time algorithms to compute the Tutte polynomial of graphs with bounded tree-width.  Works of Makowsky and Mari\~{n}o~\cite{MaMa03} and Noble~\cite{Nob09} have significantly generalised this, in the former case, to a wide array of polynomials under the framework of monadic second order logic (MSOL), and, in the latter case, to the so-called $U$-polynomial~\cite{NoWe99}, a polynomial that includes not only the Tutte polynomial but also a powerful type of knot invariant as special cases.

Even though many of the polynomials we refer to can be computed exactly in polynomial time for graphs of bounded tree-width, it remains of interest to show that their associated Glauber dynamics are rapidly mixing. There have been significant and concerted endeavours by researchers spanning physics, computer science and probability to determine the mixing characteristics of Glauber dynamics on many related Markov chains.  Spin systems have been of particular interest; indeed, the main thrust of the work of Jerrum and Sinclair~\cite{JeSi93} was to tackle the partition function for the `spins world' of the ferromagnetic Ising model (using a translation to the rapidly mixing `subgraphs world').  For more on the connections among the `spins world', the `subgraphs world' and the `random cluster world', see the recent work of Huber~\cite{Hub11}. 
Much illuminating recent work on the mixing times of Glauber dynamics has been restricted to trees or tree-like graphs, cf.~e.g.~\cite{BKMP05,DeMo10,DLP10,GJK10,MSW07,TVVY10}.


\subsection{Extensions}\label{sub:extensions}

The primary focus in our paper is to establish results for graph polynomials defined according to {\em edge} subset expansion; however, we can also adapt our methodology to polynomials defined according to {\em vertex} subset expansion, which may be viewed as the `induced subgraphs world'. 
To our knowledge, this form of Markov chain has not been greatly examined.  One of our motivations to consider vertex subset expansion is to cover the graph polynomial introduced by Arratia, Bollob\'as and Sorkin~\cite{ABS04b}:
the {\em bivariate interlace polynomial}, defined for any graph $G = (V,E)$ and parameters $x,y$ as
\begin{align}
\label{eqn:interpoly}
q(G;x,y) := \sum_{S\subseteq V}(x-1)^{\rank_2(S)} (y-1)^{|S|-\rank_2(S)},
\end{align}
where $\rank_2(S)$ is the $\mathbb{F}_2$-rank of the adjacency matrix for $G[S]$, the subgraph of $G$ induced by the vertices in $S$.
This polynomial specialises to the independence polynomial and is intimately related to Martin polynomials~\cite{AiHo04}.
Just as for the Tutte polynomial, computation of the bivariate interlace polynomial is \#P-hard in almost the entire plane, as was shown by Bl\"aser and Hoffmann~\cite{BlHo08}.
The multivariate interlace polynomial, a generalisation of the interlace polynomial, can be evaluated efficiently for graphs of bounded tree-width, cf.~Courcelle~\cite{Cou08} and Bl\"aser and Hoffmann~\cite{BlHo11}.  Subject to a condition on the vertex subset weightings, which we have called {\em vertex $\lambda$-multiplicativity}, we can establish rapid mixing for vertex subset Glauber dynamics on graphs of constant tree-width.

We also observe that our framework can be extended in a natural way to hypergraphs.  Though hypergraph polynomials and their corresponding Markov chains have not been studied nearly as widely as their graph counterparts, we point out some important examples.  Goldberg and Jerrum~\cite{GoJe12b} used a multivariate hypergraph Tutte polynomial to prove approximate counting hardness of the ferromagnetic (bivariate graph) Tutte polynomial.  
There is a significant body of research into Glauber dynamics for counting colourings and independent sets in hypergraphs~\cite{BDK06,BDK08,Bub01,DyGr00}.  Hypergraph variants of the multivariate chromatic polynomial and the edge elimination polynomial have been introduced by White~\cite{Whi11}. The study of (counting) complexity for hypergraph parameters continues to grow in importance in relation to constraint satisfaction problems (CSPs).  Much of this study involves a restriction of the input hypergraph's width.  Unfortunately, there is no unique standard hypergraph analogue of tree-width: consult Hlin\v{e}n\'{y} et al.~\cite{HOSG08} for an overview of several competing notions of width for hypergraphs and the relationship with CSPs.  We have made a choice of width parameter that gives the most straightforward extension from our graph framework.

Another possible extension is to matroids, a setting with close connections to the Tutte polynomial, cf.~Welsh~\cite{Wel76}.  As an indication of this possibility, we show that (edge) subset Glauber dynamics associated with the Tutte polynomial for matroids mixes rapidly on matroids of bounded branch-width. The difficulty in dealing with matroids is that the definition of {$\lambda$-multiplicativity} we have used for graphs and hypergraphs involves vertex cuts, which are indefinable in a matroid. So although we have not formed a general condition akin to {$\lambda$-multiplicativity}  for matroids, we show that the matroid rank function has suitable properties in matroids of bounded branch-width.
We note that Hlin\v{e}n\'y~\cite{Hli06} has shown the existence of a polynomial-time algorithm for the computation of the Tutte polynomial for matroids of bounded branch-width representable over a finite field.  











\section{Definitions}\label{sec:definitions}



\subsection{$\lambda$-multiplicative weight functions}\label{subsec:mild}

In this subsection, we describe the condition we require on our graph functions $\mathcal{P}$.
This condition prescribes that the weight function is multiplicative with respect to the operation of disjoint graph union as well as ``nearly multiplicative'' with respect to the operation of composition via small vertex cuts.

We use the notation $\hat\lambda := \max\{\lambda,1/\lambda\}$. For a graph $G = (V,E)$, a vertex cut $K$ is said to separate sets $V_1$ and $V_2$ if $(V_1,K,V_2)$ is a partition of $V$ and there is no edge of $E$ that is incident to both a vertex of $V_1$ and a vertex of $V_2$.
A partition $(E_1,E_2)$ of $E$ is appropriate (for $K$) if $E_1$ has no edge adjacent to a vertex in $V_2$ and $E_2$ has no edge adjacent to a vertex in $V_1$.

For fixed $\lambda > 0$, we say that the weight function $\weight$ is {\em $\lambda$-multiplicative}, if for any $G = (V,E)$, any vertex cut $K$ that separates sets $V_1$ and $V_2$, and any appropriate partition $(E_1,E_2)$, we have
\begin{align}\label{eqn:edgekmult}
\hat\lambda^{-|K|} \le \frac{\weight((V_1\cup K,E_1))\weight((V_2\cup K,E_2))}{\weight(G)} \le \hat\lambda^{|K|}.
\end{align}
As mentioned above, if $\weight$ is $\lambda$-multiplicative, then it follows that $\weight$ is multiplicative with respect to disjoint union (by taking $K = \emptyset$); furthermore, taking $V_2 = \emptyset$ implies that the addition or deletion of a few edges in the graph does not change $\weight$ wildly.


\subsection{Examples of valid polynomials}\label{subsec:kmultex}


In this subsection, we emphasise specific examples of edge subset weightings and justify that their weight functions are $\lambda$-multiplicative.

Let $G = (V,E)$ be any graph, $K$ be any vertex cut that separates vertex subsets $V_1$ and $V_2$, and $(E_1,E_2)$ be any appropriate partition. 
We define $G'$ to be the disjoint union of graphs $(V_1\cup K,E_1)$ and $(V_2\cup K,E_2)$. We could imagine forming $G'$ from $G$ by splitting each vertex in $K$, taking incident edges in $E_1$ with one copy of the vertex and those in $E_2$ with the other.
It is trivial to verify multiplicativity with respect to disjoint union for each of the weight functions considered below.
Therefore, to establish $\lambda$-multiplicativity for these weight functions, it will suffice to verify that 
$\hat\lambda^{-|K|} \le \weight(G')/\weight(G) \le \hat\lambda^{|K|}$.

First, we observe that the partition function of the \emph{random cluster model} for $q,\mu > 0$ satisfies the condition. Recalling~\eqref{eqn:RCpoly},
the relevant weight function is $\weight((V,S)) := q^{\kappa(S)}\mu^{|S|}$.
To handle the $\mu^{|S|}$ factor, note that the graphs $G$ and $G'$ have the same number of edges.
For the $q^{\kappa(S)}$ factor, the number of components in $G'$ can be at most $\kappa(G)+|K|$ since $G'$ can be obtained by splitting $|K|$ vertices of $G$. Thus, $\weight$ is $\lambda$-multiplicative if we take $\lambda := q$. 

This can also be seen in the context of the \emph{Tutte polynomial} when $x,y>1$. Recalling~\eqref{eqn:Tuttepoly},
the relevant weight function is $\weight((V,S)) := (x-1)^{r(E)-r(S)}(y-1)^{|S|-r(S)}$.
As before, it is easy to take care of the $(x-1)^{r(E)}(y-1)^{|S|}$ factor.
For the remaining $((x-1)(y-1))^{-r(S)}$ factor, it is enough to observe that the incidence matrix of $G$ may be obtained from the incidence matrix of $G'$ as follows. The matrix for $G'$ has two rows for each of the vertices in $K$, one from $(V_1\cup K,E_1)$ and one from $(V_2\cup K,E_2)$. If we replace one of these two rows with the sum of the two rows, we do not alter the rank; if we then delete the other of the two rows, we change the rank by at most $1$. Repeating this for each vertex in $K$, we obtain the incidence matrix for $G$, at a total change in the rank $r$ of the incidence matrix of at most $|K|$. Thus, $\weight$ is $\lambda$-multiplicative if we take $\lambda := (x-1)(y-1)$. 


Next, we see that the {\em adjacency-rank polynomial} of Ge and \v{S}tefankovi\v{c}~\cite{GeSt09+} satisfies the condition if $q,\mu>0$. Recalling~\eqref{eqn:R2poly}, the relevant weight function is  $\weight((V,S)) := q^{\rank_2(S)}\mu^{|S|}$.
As before, it is simple to handle the $\mu^{|S|}$ factor.
For the $q^{\rank_2(S)}$ factor, we note that the adjacency matrix of $G$ may be formed from the adjacency matrix of $G'$ by $|K|$ row additions, followed by $|K|$ column additions and finally the deletion of $|K|$ rows and $|K|$ columns. Since we must delete both rows and columns, the rank $\rank_2$ of the adjacency matrix may change by up to $2|K|$. Thus, in this case, $\weight$ is $\lambda$-multiplicative when taking  $\lambda := q^2$.

Now, consider the {\em multivariate Tutte polynomial} as formulated by Sokal~\cite{Sok05}, defined for any graph $G = (V,E)$ and parameters $q,\vec{v}=\{v_e\}_{e\in E}$ by
\begin{align}
\label{eqn:multiTuttepoly}
Z_{Tutte}(G;q,\vec{v}) := \sum_{S\subseteq E}q^{\kappa(S)}\prod_{e\in S}v_e.
\end{align}
Under this expansion, $\weight := q^{\kappa(S)}\prod_{e\in S}v_e$ is an edge subset weight function if $q > 0$ and $v_e>0$ for any $e \in E$ are fixed. We can handle the $q^{\kappa(S)}$ factor as we did for the random cluster model partition function. For the $\prod_{e\in S}v_e$ factor, observe that $G$ and $G'$ have the same set of edges. Thus, $\weight$ is $\lambda$-multiplicative when taking $\lambda := q$.

Last, we discuss the {\em $U$-polynomial} of Noble and Welsh~\cite{NoWe99}, defined for any graph $G = (V,E)$ and parameters $y,\vec{x}=\{x_i\}_{i=1}^{|V|}$ by
\begin{align}
\label{eqn:Upoly}
U(G;\vec{x},y) := \sum_{S\subseteq E}(y-1)^{|S|-r(S)}\prod_{i=1}^{|V|}{x_i}^{\kappa(i,S)},
\end{align}
where $\kappa(i,S)$ denotes the number of components of order $i$ in $(V,S)$.
If $y>1$ and $x_i>0$ for all $i$, then $\weight((V,S)) := (y-1)^{|S|-r(S)}\prod_{i=1}^{|V|}{x_i}^{\kappa(i,S)}$ gives an edge subset weighting.
The $(y-1)^{|S|-r(S)}$ factor can be handled as above.
For the $\prod_{i=1}^{|V|}{x_i}^{\kappa(i,S)}$ factor, observe that $\sum_i |\kappa(i,G)-\kappa(i,G')|$ is at most $3|K|$, since, if we obtain $G'$ by splitting the vertices in $K$, each time we split a vertex we either change the size of a single component or split a single component into two smaller components. Thus, taking $x' := \max_i \max\{x_i,x_i^{-1}\}$ and $y' := \max\{y-1,(y-1)^{-1}\}$, we see that $\weight$ is $\lambda$-multiplicative when taking $\lambda := y'{x'}^3$.

\subsection{Glauber dynamics for edge subsets}\label{subsec:chains}

\noindent
In this subsection, we define the Markov chain associated with the edge subset expansion formula for $\mathcal{P}$. From the formulation in~\eqref{eqn:polyedge}, the {\em single bond flip chain} $\Msbf$ on a given graph $G = (V,E)$ is defined as follows.  We start with an arbitrary subset $X_0 \subseteq E$ and repeatedly generate $X_{t+1}$ from $X_t$ by running the following experiment.%hypergraph
\begin{enumerate}
\item Pick an edge $e\in E$ uniformly at random and let $S = X_t \oplus \{e\}$.
\item Set $X_{t+1} = S$ with probability $\frac12\min\left\{1,\weight((V,S))/\weight((V,X_t))\right\}$ and with the remaining probability set $X_{t+1} = X_t$.
\end{enumerate}
By convention, we denote the state space of $\Msbf$ by $\Osbf$ (i.e.~$\Osbf = 2^E$) and its transition probability matrix by $\Psbf$.
With standard arguments, it can be shown that $\Msbf$ is a reversible Markov chain 
that has a unique stationary distribution $\pisbf$ satisfying $\pisbf(S) \propto \weight((V,S))$. 
Hence, we may use $\Msbf$ as a Markov chain in MCMC sampling for the following problem.

\medskip
\noindent
$\PWE(\mathcal{P})$: \textsc{$\mathcal{P}$-weighted Edge Subsets}\\
\indent Input: a graph $G = (V, E)$.\\%hypergraph
\indent Output: a subset $S\subseteq E$ with probability $\weight((V,S))/\mathcal{P}(G)$.
\medskip

The term \emph{rapidly mixing} applies to a Markov chain that quickly converges to its stationary distribution. We make this precise here. 
The {\em total variation distance} $\Vert\nu - \nu'\Vert_{TV}$ between two probability distributions $\nu$ and $\nu'$ is defined by
$\Vert\nu - \nu'\Vert_{TV} = \frac12\sum_{H\in\Omega}|\nu(H)-\nu'(H)|$.
For $\eps>0$, the {\em mixing time} of a Markov chain $\mathcal{M}$ (with state space $\Omega$, transition matrix $P$ and stationary distribution $\pi$) is defined as
\begin{align*}
\tau(\eps) := \max_{H\in\Omega}\{\min\{t \ | \ \Vert P^t(H,\cdot)-\pi(\cdot) \Vert_{TV} \le \eps\}\}.
\end{align*}
In this paper, we shall say that a chain $\mathcal{M}$ {\em mixes rapidly} if, for any fixed $\eps$, $\tau(\eps)$ is (upper) bounded by a polynomial in the number of vertices of the input graph. This definition for rapid mixing is the one more commonly used in theoretical computer science, whereas often in statistical physics or discrete probability a stricter $O(n \log n)$ bound is mandated.


\section{Results}\label{sec:results}

We are now prepared to state the main theorem.

\begin{theorem}\label{thm:mainedge,tw}
Let $G=(V,E)$ where $|V| = n$.  If $\weight$ is $\lambda$-multiplicative for some $\lambda > 0$, then the mixing time of $\Msbf$ on $G$ satisfies
\begin{align*}
\tau(\eps) = O\left(n^{4+4(\tw(G)+1)|\log\lambda|}
%hypergraph maxedgesize
\left(
\max_{S\subseteq E}\left\{\log\frac{\mathcal{P}(G)}{\weight((V,S))}\right\}
+
\log\frac1{\eps}
\right)
\right)
\end{align*}
(where $\tw(G)$ denotes the tree-width of $G$).
\end{theorem}

In later sections, we describe extensions of this theorem to induced subgraphs (Theorem~\ref{thm:mainvertex,tw}), to hypergraphs (Theorem~\ref{thm:mainedge,tw,hyper}), and to matroids (Theorem~\ref{thm:mainedge,bw,matroid}).


In Subsection~\ref{subsec:kmultex}, we noted some examples of polynomials that have $\lambda$-multiplicative weight functions.  Thus, Theorem~\ref{thm:mainedge,tw} implies that each of their associated Glauber dynamics on edge subsets is rapidly mixing upon graphs of bounded tree-width.

\begin{corollary}
\label{cor:consequences}
Let $G = (V,E)$ where $|V| = n$.
In the following list, we state conditions on the parameters which guarantee rapid mixing of the single bond flip chain on $G$ associated with the stated polynomial and weighting.  We also state the mixing time bound.
\begin{enumerate}
\item For fixed $q,\mu>0$ and the weighting $\weight$ of $Z_{RC}(G;q,\mu)$ given by~\eqref{eqn:RCpoly}, the mixing time satisfies
\begin{align*}
\tau(\eps) = O\left(n^{4+4(\tw(G)+1)|\log q|}
\left(
n^2
+
\log\frac1{\eps}
\right)
\right).
\end{align*}
Equivalently, for fixed $x,y>1$ and the weighting $\weight$ of $T(G;x,y)$ given by~\eqref{eqn:Tuttepoly}, the mixing time satisfies
\begin{align*}
\tau(\eps) = O\left(n^{4+4(\tw(G)+1)|\log ((x-1)(y-1))|}
\left(
n^2
+
\log\frac1{\eps}
\right)
\right).
\end{align*}
\item For fixed $q,\mu>0$ and the weighting $\weight$ of $R_2(G;q,\mu)$ given by~\eqref{eqn:R2poly}, the mixing time satisfies
\begin{align*}
\tau(\eps) = O\left(n^{4+8(\tw(G)+1)|\log q|}
\left(
n^2
+
\log\frac1{\eps}
\right)
\right).
\end{align*}
\item For fixed $q>0$ and $v_e>0$ for all $e$ and the weighting $\weight$ of $Z(G;q,\vec{v})$ given by~\eqref{eqn:multiTuttepoly}, the mixing time satisfies
\begin{align*}
\tau(\eps) = O\left(n^{4+4(\tw(G)+1)|\log q|}
\left(
n^2
+
\log\frac1{\eps}
\right)
\right).
\end{align*}
\item For fixed $y>1$ and $x_i>0$ for all $i$ and the weighting $\weight$ of $U(G;\vec{x},\mu)$ given by~\eqref{eqn:Upoly}, the mixing time satisfies
\begin{align*}
\tau(\eps) = O\left(n^{4+4(\tw(G)+1)\left|\log \left(y'{x'}^3\right)\right|}
\left(
n^2
+
\log\frac1{\eps}
\right)
\right),
\end{align*}
where $x' = \max_i \max\{x_i,x_i^{-1}\}$ and $y' = \max\{y-1,(y-1)^{-1}\}$.
\end{enumerate}
\end{corollary}

Here, we point out that Ge and \v{S}tefankovi\v{c} obtained part~1 above and showed part~2 above in the special case of trees.  Parts~2--4 directly extend these findings, and our main theorem considerably broadens the scope of mixing time bounds for subset Glauber dynamics on graphs of bounded tree-width.




\section{Proofs}\label{sec:proof}

Let us first give an outline of our arguments.

Although our main result is stated in terms of tree-width, we do not treat tree-width directly but instead use linear-width, a more restrictive width parameter introduced by Thomas~\cite{Tho96}. This strategy was also employed by Ge and \v{S}tefankovi\v{c} in the two specific cases mentioned above. For any graph $G=(V,E)$, an ordering  $(e_1,\dots,e_m)$ of $E$ has linear-width at most $\ell$, if, for each $i \in \{2,\dots,m\}$, there are at most $\ell$ vertices that are incident to both an edge in $\{e_1,\dots,e_{i-1}\}$ and an edge in $\{e_i,\dots,e_m\}$. 
The {\em linear-width $\lw(G)$} of $G = (V,E)$ is the smallest integer $\ell$ such that there is an ordering of $E$ with linear-width at most $\ell$. 
The motivation for using linear-width is that it implies an ordering of the edges which we can then use to define canonical paths between pairs of edge subsets. Then we show that $\lambda$-multiplicativity is the general condition under which we can  bound the congestion of these canonical paths.  The use of canonical paths is a standard technique for obtaining a bound on the mixing time of MCMC methods --- see the lecture notes of Jerrum~\cite{Jer03} for an expository account of this approach.



The key property we require that relates the linear-width of $G$ to the more well-studied parameters path-width $\pw(G)$ and tree-width $\tw(G)$ of $G$ is the following set of inequalities, details of which can be found in Bodlaender~\cite{Bod88}, Chung and Seymour~\cite{ChSe89}, Fomin and Thilikos~\cite{FoTh06}, Ge and \v{S}tefankovi\v{c}~\cite{GeSt12}, and Korach and Solel~\cite{KoSo93}.
For any graph $G$ on $n$ vertices,%hypergraph
\begin{align}
\label{eqn:width}
\pw(G) \le \lw(G) \le \pw(G) + 1 \le (\tw(G)+1)(\log_2 n+1)+1.
\end{align}


We follow a canonical paths strategy to bound the mixing time of $\Msbf$.
Given $G = (V,E)$, let $\sigma = (e_1,\dots,e_m)$ be an ordering of $E$.  Given $I,F \in \Osbf$, let $I\oplus F$ denote the symmetric difference of $I$ and $F$, let $\sigma[I\oplus F] := (e_{i_1},\dots,e_{i_k})$ denote the restriction of $\sigma$ to $I\oplus F$ (that is, $\{e_{i_1},\dots,e_{i_k}\} = I\oplus F$ and $i_1<\cdots<i_k$), and let $\gamma_{\sigma,I\to F}$ denote the {\em canonical path} from $I$ to $F$, defined as
\begin{align*}
\gamma_{\sigma,I\to F} := (H_0,\dots,H_k),
\end{align*}
where $H_0 = I$, $H_j=H_{j-1}\oplus \{e_{i_j}\}$ for all $j\in\{1,\dots,k\}$ (and hence $H_k =F$).  Let $\Gamma_\sigma = \{\gamma_{\sigma,I\to F} \ | \ I, F \in \Omega\}$.


To bound the mixing time of $\Msbf$, we will, for some appropriately chosen $\sigma$, bound the {\em congestion} $\varrho(\Gamma_\sigma)$ of the canonical paths, which is defined by
\begin{align}
\label{eqn:congestion}
\varrho(\Gamma_\sigma) :=
\max_{\substack{(H,H'):\\P(H,H')>0}}
\left\{
\frac{1}{\pi(H)P(H,H')}
\sum_{\substack{I,F:\\(H,H')\in\gamma_{\sigma,I\to F}}}
\pi(I)\pi(F)|\gamma_{\sigma,I\to F}|
\right\},
\end{align}
where $|\gamma_{\sigma,I\to F}|$ denotes the length of the path $\gamma_{\sigma,I\to F}$.  
The mixing time can then be bounded using the following inequality of Sinclair~\cite{Sin92}, see also Diaconis and Stroock~\cite{DiSt91}:
for any set $\Gamma$ of canonical paths,
\begin{align}
\label{eqn:mixingcongestion}
\tau(\eps) \le \max_{H\in\Omega}\left\{\varrho(\Gamma)\cdot\left(\log\frac1{\pi(H)}+\log \frac1\eps\right) \right\}.
\end{align}




The remainder of the section is devoted to showing the following.


\begin{theorem}\label{thm:mainedge,lw}
Suppose $G = (V,E)$ has linear-width $\ell$ and let $\sigma = (e_1,\dots,e_m)$ be an ordering of $E$ with linear-width at most $\ell$.  If $\weight$ is $\lambda$-multiplicative for some $\lambda > 0$, then $\varrho(\Gamma_\sigma) \le 2m^2\hat\lambda^{4\ell}$.
\end{theorem}


\noindent
Theorem~\ref{thm:mainedge,lw} immediately implies a good mixing time bound for the Markov chain $\Msbf$ and hence Theorem~\ref{thm:mainedge,tw} follows.

\begin{corollary}\label{cor:lw}
Let $G=(V,E)$ where $|E| = m$.  If $\weight$ is $\lambda$-multiplicative for some $\lambda > 0$, then the mixing time of $\Msbf$ on $G$ satisfies
\begin{align*}
\tau(\eps) = O\left(m^2 \hat\lambda^{4\lw(G)}
%hypergraph maxedgesize
\left(
\max_{S\subseteq E}\left\{\log\frac{\mathcal{P}(G)}{\weight((V,S))}\right\}
+
\log\frac1{\eps}
\right)
\right).
\end{align*}
\end{corollary}

\begin{proof}
Substitute the congestion bound of Theorem~\ref{thm:mainedge,lw} into inequality~\eqref{eqn:mixingcongestion}.
\end{proof}

\begin{proof}[Proof of Theorem~\ref{thm:mainedge,tw}]
Substitute the upper bound on $\lw(G)$ in~\eqref{eqn:width} into Corollary~\ref{cor:lw}.
\end{proof}



In the proof of Theorem~\ref{thm:mainedge,lw}, we will need the following lemma.

\begin{lemma}\label{lem:fdiffedge}
Suppose $G = (V,E)$ has linear-width $\ell$ and let $\sigma = (e_1,\dots,e_m)$ be an ordering of $E$ with linear-width at most $\ell$.  Suppose $I,F\in\Osbf$ and $H$ is on $\gamma_{\sigma,I\to F}$.
If $\weight$ is $\lambda$-multiplicative for some $\lambda > 0$, then
\begin{align*}
\frac{\weight((V,I))\weight((V,F))}{\weight((V,H))\weight((V,C))} \le \hat\lambda^{4\ell},
\end{align*}
where $C = I \oplus F \oplus H$.
\end{lemma}

\begin{proof}
Since $H$ is on $\gamma_{\sigma,I\to F}$, we may assume that $H = H_j$ for some $j\in\{0,\dots,k\}$.  Let $Q = \{e_1,e_2,\dots,e_{i_j-1},e_{i_j}\}$ and $\overline Q = E \setminus Q$. Then
\begin{align}
H = (F\cap Q)\cup(I\cap \overline Q) \quad \text{and} \quad C = (I\cap Q)\cup(F\cap \overline Q). \label{eqn:HCedge}
\end{align}
We can partition $V$ into three sets as follows. Let $V_1$ denote the set of vertices that are incident only to edges in $Q$; let $V_2$ denote the set of vertices that are incident only to edges in $\overline Q$; let $K$ denote the set of remaining vertices, that is, the set of vertices incident to edges in $Q$ and $\overline Q$. Note that $|K|$ is at most the linear-width $\ell$.

No vertex $v_1$ of $V_1$ is adjacent to a vertex $v_2$ of $V_2$, as otherwise the edge between them would simultaneously be in $Q$ and $\overline Q$. 
This implies that $K$ is a vertex cut separating $V_1$ and $V_2$ with respect to $G$, and also with respect to the graphs $(V,I)$, $(V,F)$, $(V,H)$, $(V,C)$.
Furthermore, $(I\cap Q,I\cap\overline Q)$, $(F\cap Q,F\cap\overline Q)$, $(H\cap Q,H\cap\overline Q)$, $(C\cap Q,C\cap\overline Q)$ are edge partitions that are appropriate for $K$. 
Therefore, by the fact that $\weight$ is $\lambda$-muliplicative and $|K| \le \ell$,
\begin{align*}
\hat\lambda^{-\ell} \le \frac{\weight((V_1\cup K,J\cap Q))\weight((V_2\cup K,J\cap\overline Q))}{\weight((V,J))} \le \hat\lambda^\ell \quad \text{for } J\in \{I,F,H,C\}. 
\end{align*}
By~\eqref{eqn:HCedge}, it follows that
\begin{align*}
(V_1\cup K,H\cap Q) &= (V_1\cup K,F\cap Q), \\
(V_2\cup K,H\cap\overline Q) &= (V_2\cup K,I\cap\overline Q), \\
(V_1\cup K,C\cap Q) &= (V_1\cup K,I\cap Q)\quad\text{ and} \\
(V_2\cup K,C\cap\overline Q) &= (V_2\cup K,F\cap\overline Q).
\end{align*}
Now, letting $r$ be
\begin{align*}
\weight((V_1\cup K,I\cap Q))\weight&((V_2\cup K,I\cap\overline Q))
\weight((V_1\cup K,F\cap Q))\weight((V_2\cup K,F\cap\overline Q)),
\end{align*}
we obtain
\begin{align*}
\frac{\weight((V,I))\weight((V,F))}{r} \le \hat\lambda^{2\ell} 
\quad \text{and} \quad
\frac{r}{\weight((V,H))\weight((V,C))} \le \hat\lambda^{2\ell}, 
\end{align*}
whereby the lemma easily follows.
\end{proof}






\begin{proof}[Proof of Theorem~\ref{thm:mainedge,lw}]
Let $(H,H')\in\Osbf\times\Osbf$ such that $\Psbf(H,H')>0$.  We will bound the expression within the $\max$ of the definition for $\varrho(\Gamma_\sigma)$.
We let $\hat{H}=H$ if $\pisbf(H)\le\pisbf(H')$ and $\hat{H}=H'$ otherwise.  
Denote by $\cp(H,H')$ the set of pairs $(I,F)$ such that $(H,H')\in\gamma_{\sigma,I\to F}$.  We define the function $\inj: \cp(H,H')\to\Osbf$ by $(I,F) \to I\oplus F\oplus \hat{H}$.  Observe that $\inj$ is an injection, for, given $J\in \Osbf$, we can determine the unique $(I,F)$ such that $\inj(I,F) = J$ by first computing $J\oplus\hat{H}=I\oplus F$ and then using the ordering $\sigma$ to recover $I$ and $F$.  Since $\weight$ is $\lambda$-multiplicative, we have by Lemma~\ref{lem:fdiffedge} that
\begin{align}
\frac{\weight((V,I))\weight((V,F))}{\weight((V,\hat{H}))\weight((V,\inj(I,F)))} \le \hat\lambda^{4\ell}.
\label{eqn:fdiffedge}
\end{align}
Regardless of whether $\pisbf(H)\le\pisbf(H')$ or $\pisbf(H)>\pisbf(H')$, a brief calculation yields that
$\pisbf(H)\Psbf(H,H')
 = \pisbf(\hat{H})/(2m)$. Thus,
\begin{align}
\frac{1}{\pisbf(H)\Psbf(H,H')}&\sum_{(I,F)\in\cp(H,H')}\pisbf(I)\pisbf(F)|\gamma_{\sigma,I\to F}|\nonumber\\
& = \frac{2m}{\pisbf(\hat{H})}\sum_{(I,F)\in\cp(H,H')}\pisbf(I)\pisbf(F)|\gamma_{\sigma,I\to F}|\nonumber\\
& \le \frac{2m^2}{\mathcal{P}(G)}\sum_{(I,F)\in\cp(H,H')}\frac{\weight((V,I))\weight((V,F))}{\weight((V,\hat{H}))} \label{line1}\\
& \le \frac{2m^2}{\mathcal{P}(G)}\sum_{(I,F)\in\cp(H,H')}\weight((V,\inj(I,F)))\hat\lambda^{4\ell} \label{line2}\\
& \le 2m^2\hat\lambda^{4\ell}, \label{line3}
\end{align}
where~\eqref{line1} follows from the facts $|\gamma_{\sigma,I\to F}|\le m$ and $\pisbf(S) = \weight((V,S))/\mathcal{P}(G)$,
\eqref{line2} follows from~\eqref{eqn:fdiffedge},
and~\eqref{line3} follows from the fact that $\inj$ is an injection.  Then, substituting the bound~\eqref{line3} into~\eqref{eqn:congestion}, we obtain $\varrho(\Gamma_\sigma) \le 2m^2\hat\lambda^{4\ell}$, as claimed.
\end{proof}


\section{Glauber dynamics for vertex subsets}\label{sec:induced}

Until now, we have considered edge subsets (subgraphs) and Glauber transitions which change one edge at a time.  In this section, we modify our methods to treat vertex subsets (induced subgraphs) and transitions that involve one vertex at a time --- each such transition can affect many edges, up to the maximum degree of $G$.  We sketch how to obtain rapid mixing for this process upon graphs of bounded tree-width still with only a modest condition on the base graph polynomials.

A {\em vertex subset expansion formula for $\mathcal{P}$} is written as follows: for any simple graph $G = (V,E)$, %hypergraph
\begin{align}
\label{eqn:polyvertex}
\mathcal{P}(G)
= \sum_{S\subseteq V}
\weight(G[S])
\end{align}
for some graph function $\weight$, where $G[S]$ denotes the subgraph of $G$ induced by $S$. %hypergraph
If the function $\weight$ is non-negative, we refer to~\eqref{eqn:polyvertex} as a {\em vertex subset weighting for $\mathcal{P}$} and to $\weight$ as its {\em weight function}.  Again, for our results to hold, aside from some other constraints, we need the weight function to be positive on all induced subgraphs.
 

From the formulation in~\eqref{eqn:polyvertex}, we define the {\em single site flip chain} $\Mssf$ on a given graph $G = (V,E)$ as follows.  We start with an arbitrary subset $X_0 \subseteq V$ and repeatedly generate $X_{t+1}$ from $X_t$ by running the following experiment.%hypergraph
\begin{enumerate}
\item Pick a vertex $v\in V$ uniformly at random and let $S = X_t \oplus \{v\}$.
\item Set $X_{t+1} = S$ with probability $\frac12\min\left\{1,\weight(G[S])/\weight(G[X_t])\right\}$ and with the remaining probability set $X_{t+1} = X_t$.
\end{enumerate}
We denote the state space of $\Mssf$ by $\Ossf$ (i.e.~$\Ossf = 2^V$) and its transition probability matrix by $\Pssf$.
It can be shown that $\Mssf$ is a reversible Markov chain that has a unique stationary distribution $\pissf$ satisfying
$\pissf(S) \propto \weight(G[S])$.
Hence, we may use $\Mssf$ as a Markov chain in MCMC sampling for the following problem.
\medskip

\noindent
$\PWV(\mathcal{P})$: \textsc{$\mathcal{P}$-weighted Vertex Subsets}\\
\indent Input: a graph $G = (V, E)$.\\%hypergraph
\indent Output: a subset $S\subseteq V$ with probability $\weight(G[S])/\mathcal{P}(G)$.
\medskip

We now describe the condition required of the weight function $\weight$ in~\eqref{eqn:polyvertex}.
For fixed $\lambda > 0$, we say that the weight function $\weight$ is {\em vertex $\lambda$-multiplicative}, if for any $G = (V,E)$ and $K$ a vertex cut that separates sets $ V_1$ and $V_2$ with respect to $G$, we have
\begin{align}\label{eqn:vertexkmult}
\hat\lambda^{-|K|} \le \frac{\weight(G[V_1])\weight(G[V_2\cup K])}{\weight(G)} \le \hat\lambda^{|K|}.
\end{align}
Note that, if $\weight$ is vertex $\lambda$-multiplicative, then it follows that $\weight$ is multiplicative with respect to disjoint union by taking $K = \emptyset$; furthermore, taking $V_2 = \emptyset$ gives that the addition of a few vertices does not change $\weight$ wildly.


The main result of this section is the following.

\begin{theorem}\label{thm:mainvertex,tw}
Let $G=(V,E)$ where $|V| = n$.  If $\weight$ is vertex $\lambda$-multiplicative for some $\lambda > 0$, then the mixing time of $\Mssf$ on $G$ satisfies
\begin{align*}
\tau(\eps) = O\left(n^{2+4(\tw(G)+1)|\log\lambda|}
\left(
\max_{S\subseteq V}\left\{\log\frac{\mathcal{P}(G)}{\weight(G[S])}\right\}
+
\log\frac1{\eps}
\right)
\right).
\end{align*}
\end{theorem}

\subsection{A sketch of the proof}

As before, we do not treat tree-width directly, but instead work with a different width parameter.
For any graph $G=(V,E)$, an ordering  $(v_1,\dots,v_n)$ of $V$ has vertex-separation at most $\ell$, if, for each $i \in \{2,\dots,n\}$, there are at most $\ell$ vertices in $\{v_1,\dots,v_{i-1}\}$ that are adjacent to a vertex in $\{v_i,\dots,v_n\}$. 
The {\em vertex-separation $\vs(G)$} of $G = (V,E)$ is the smallest integer $\ell$ such that there is an ordering of $V$ with vertex-separation at most $\ell$. 
It was shown by Kinnersley~\cite{Kin92} that the vertex-separation of $G$ satisfies $\vs(G) = \pw(G)$, and so the inequalities in~\eqref{eqn:width} remain relevant.

To bound the mixing time of $\Mssf$, we again follow a canonical paths argument.
Given $G = (V,E)$, let $\sigma = (v_1,\ldots,v_n)$ be an ordering of $V$.  Given $I,F \in \Ossf$, let $I\oplus F$ denote the symmetric difference of $I$ and $F$, let $\sigma[I\oplus F] := (v_{i_1},\dots,v_{i_k})$ denote the restriction of $\sigma$ to $I\oplus F$ (that is, $\{v_{i_1},\dots,v_{i_k}\} = I\oplus F$ and $i_1<\cdots<i_k$), and let $\gamma_{\sigma,I\to F}$ denote the {\em canonical path} from $I$ to $F$, defined as
\begin{align*}
\gamma_{\sigma,I\to F} := (H_0,\dots,H_k),
\end{align*}
where $H_0 = I$, $H_j=H_{j-1}\oplus \{v_{i_j}\}$ for all $j\in\{1,\dots,k\}$ (and hence $H_k =F$).  Let $\Gamma_\sigma = \{\gamma_{\sigma,I\to F} \ | \ I, F \in \Omega\}$.
Using inequality~\eqref{eqn:mixingcongestion}, our bound on the mixing time again follows from a bound on the {\em congestion $\varrho(\Gamma_\sigma)$}, which is defined analogously to~\eqref{eqn:congestion}.


\begin{theorem}\label{thm:mainvertex,vs}
Suppose $G = (V,E)$ has vertex-separation $\ell$. Let $\sigma = (v_1,\dots,v_n)$ be an ordering of $V$ with vertex-separation at most $\ell$.  If, for some $\lambda > 0$, $\weight$ is vertex $\lambda$-multiplicative, then $\varrho(\Gamma_\sigma) \le 2n^2\hat\lambda^{4\ell}$.
\end{theorem}

\noindent
Theorem~\ref{thm:mainvertex,vs} immediately implies a good mixing time bound for the Markov chain $\Mssf$ and hence Theorem~\ref{thm:mainvertex,tw} also.

\begin{corollary}\label{cor:vs}
Let $G=(V,E)$ where $|V| = n$.  If $\weight$ is vertex $\lambda$-multiplicative for some $\lambda > 0$, then the mixing time of $\Mssf$ on $G$ satisfies
\begin{align*}
\tau(\eps) = O\left(n^2 \hat\lambda^{4\vs(G)}
\left(
\max_{S\subseteq V}\left\{\log\frac{\mathcal{P}(G)}{\weight(G[S])}\right\}
+
\log\frac1{\eps}
\right)
\right).
\end{align*}
\end{corollary}

\begin{proof}
Substitute the congestion bound of~\eqref{eqn:mixingcongestion} into Theorem~\ref{thm:mainvertex,vs}.
\end{proof}

\begin{proof}[Proof of Theorem~\ref{thm:mainvertex,tw}]
Substitute the upper bound on $\vs(G)=\pw(G)$ of~\eqref{eqn:width} into Corollary~\ref{cor:vs}.
\end{proof}

We omit the proof of Theorem~\ref{thm:mainvertex,vs} as it is similar to that of Theorem~\ref{thm:mainedge,lw}, but give the details for the analogue of Lemma~\ref{lem:fdiffedge}.


\begin{lemma}\label{lem:fdiffvertex}
Suppose $G = (V,E)$ has vertex-separation $\ell$ and let $\sigma = (v_1,\dots,v_n)$ be an ordering of $V$ with vertex-separation at most $\ell$.  Suppose $I,F\in\Ossf$ and $H$ is on $\gamma_{\sigma,I\to F}$.
If $\weight$ is vertex $\lambda$-multiplicative for some $\lambda > 0$, then
\begin{align*}
\frac{\weight(G[I])\weight(G[F])}{\weight(G[H])\weight(G[C])} \le \hat\lambda^{4\ell},
\end{align*}
where $C = I \oplus F \oplus H$.
\end{lemma}

\begin{proof}
Since $H$ is on $\gamma_{\sigma,I\to F}$, we may assume that $H = H_j$ for some $j\in\{0,\dots,k\}$.  Let $Q = \{v_1,\dots,v_{i_j}\}$, and $\overline{Q} = V\setminus Q$. Then
\begin{align}
H = (F\cap Q)\cup(I\cap\overline{Q}) \quad \text{and} \quad C = (I\cap Q)\cup(F\cap\overline{Q}). \label{eqn:HCvertex}
\end{align}
We can partition $V$ into three sets as follows. Let $V_1$ denote the set of vertices $\overline{Q}$; let $V_2$ denote the subset of $Q$ containing vertices adjacent only to other vertices of $Q$; and let $K$ denote the set of remaining vertices, that is, the set of vertices of $Q$ incident to vertices of $V_1$.  Note that $|K|$ is at most the vertex-separation $\ell$.

Clearly, $K$ is a vertex cut separating $V_1$ and $V_2$ with respect to $G$ and also with respect to the graphs $G[I]$, $G[F]$, $G[H]$, $G[C]$. 
Therefore, by the fact that $\weight$ is vertex $\lambda$-multiplicative, and noting that $V_2\cup K = Q$,
\begin{align*}
\hat\lambda^{-\ell} \le \frac{\weight(G[Q\cap J])\weight(G[\overline{Q}\cap J])}{\weight(G[J])} \le \hat\lambda^\ell \quad \text{for } J\in \{I,F,H,C\}. 
\end{align*}
By~\eqref{eqn:HCvertex}, it follows that $H\cap Q = F\cap Q$, $H\cap\overline{Q} = I\cap\overline{Q}$, $C\cap Q = I\cap Q$ and $C\cap\overline{Q} = F\cap\overline{Q}$.
Now, letting $r = \weight(G[Q\cap I])\weight(G[\overline{Q}\cap I])\weight(G[Q\cap F])\weight(G[\overline{Q}\cap F])$,
we obtain that
\begin{align*}
\frac{\weight(G[I])\weight(G[F])}{r} \le \hat\lambda^{2\ell} 
\quad \text{and} \quad
\frac{r}{\weight(G[H])\weight(G[C])} \le \hat\lambda^{2\ell},
\end{align*}
whereby the lemma easily follows.
\end{proof}

\subsection{An example of a vertex subset chain}\label{subsec:exvertex}

Recalling the bivariate interlace polynomial $q(G;x,y)$ in~\eqref{eqn:interpoly}, which, for fixed $x,y > 1$, is given by the vertex subset weighting $$\weight(G[S]) := (x-1)^{\rank_2(S)} (y-1)^{|S|-\rank_2(S)}.$$
With arguments very similar to those given in Subsection~\ref{subsec:kmultex}, it is not difficult to verify that this weight function is vertex $\lambda$-multiplicative, as follows.
For any graph $G = (V,E)$ and any vertex cut $K$ that separates sets $V_1$ and $V_2$, consider the graph $G'$ consisting of the disjoint union of $G[V_1]$ and $G[V_2\cup K]$.  We note that the adjacency matrix of $G$ may be formed from the adjacency matrix of $G'$ by altering at most $|K|$ rows and $|K|$ columns, changing the rank $\rank_2$ of the adjacency matrix by up to $2|K|$. Thus, $\weight$ is vertex $\lambda$-multiplicative by taking $\lambda := (x-1)^2/(y-1)^2$.
So, by Theorem~\ref{thm:mainvertex,tw}, it follows that a natural Markov chain derived from the bivariate interlace polynomial --- a chain which has not been studied extensively, as far as we are aware --- mixes rapidly on tree-width-bounded graphs.

\begin{corollary}\label{cor:consequences,interpoly}
Let $G=(V,E)$ where $|V| = n$.  If $x,y>1$ are fixed, then for the single site flip chain on $G$ associated with the weighting $\weight$ of $q(G;x,y)$ given by~\eqref{eqn:interpoly},
the mixing time satisfies
\begin{align*}
\tau(\eps) = O\left(n^{2+8(\tw(G)+1)|\log ((x-1)/(y-1))|}
\left(
%\max_{S\subseteq V}\left\{\log\frac{q(G;x,y)}{\weight(G[S])}\right\}
n
+
\log\frac1{\eps}
\right)
\right).
\end{align*}
\end{corollary}


We believe that it would be of wider interest to study further properties of this single site flip chain on general graphs, in particular to compare it with known results on the random cluster, Potts and Ising models.



\section{Subset Glauber dynamics for hypergraphs}\label{sec:hyper}

In this section, we outline an extension of our results to hypergraphs.
Recall that a {\em hypergraph} $H$ is a pair $(V,E)$ where $V$ is the vertex set and $E$ is a hyperedge set that consists of (arbitrary) vertex subsets.
We let $\maxedgesize(H)$ denote the maximum size of a hyperedge in $H$.
To each hypergraph $H = (V,E)$, we associate a graph $G_p(H)$ called the {\em primal graph} or {\em Gaifman graph} defined by $V(G_p(H)) = V$ and $E(G_p(H)) = \{\{u,v\} \subseteq V \ | \ \{u,v\}\subseteq e \text{ for some } e\in E\}$.

We remark at the outset of this section that the generalisation to hypergraphs we give here could be seen as routine.  We do not include any details of the proof.  Nonetheless, we include the necessary elements for the statement of the hypergraph version of Theorem~\ref{thm:mainedge,tw}, even if these are straightforward generalisations of the graph versions.  We also provide some justification for the hypergraph analogue of~\eqref{eqn:width}.  It is important to observe that the running time of the hypergraph Markov chain includes an adjustment to take into account the maximum hyperedge size $\maxedgesize(H)$.

For a hypergraph polynomial $\mathcal{P}$, a {\em hyperedge subset expansion formula} is written as follows: for any hypergraph $H = (V,E)$,
\begin{align}
\label{eqn:polyedge,hyper}
\mathcal{P}(H)
= \sum_{S\subseteq E}
\weight((V,S))
\end{align}
for some graph function $\weight$, where $(V,S)$ denotes the hypergraph with vertex set $V$ and hyperedge set $S$. %hypergraph
Notice that if every hyperedge has order two, then this formula is the same as in~\eqref{eqn:polyedge}.
If the function $\weight$ is non-negative, that is, $\weight(H) \ge 0$ for all hypergraphs $H$, we refer to~\eqref{eqn:polyedge,hyper} as a {\em hyperedge subset weighting for $\mathcal{P}$} and to $\weight$ as its {\em weight function}.  For our purposes, we require the weight function to be positive.

For any hypergraph $H=(V,E)$, an ordering  $(e_1,\dots,e_m)$ of $E$ has linear-width at most $\ell$, if, for each $i \in \{2,\dots,m\}$, there are at most $\ell$ vertices that are contained in both a hyperedge in $\{e_1,\dots,e_{i-1}\}$ and a hyperedge in $\{e_i,\dots,e_m\}$. 
The {\em linear-width $\lw(H)$} of $H = (V,E)$ is the smallest integer $\ell$ such that there is an ordering of $E$ with linear-width at most $\ell$. 

The tree-width $\tw(H)$ and path-width $\pw(H)$ of a given a hypergraph $H$ are defined similarly to the tree- and path-width of graphs, cf.~Hlin\v{e}n\'y et al.~\cite{HOSG08}. Since all the vertices in a clique appear in the same bag of any tree decomposition, it follows that $\tw(H)$ is equal to the tree-width $\tw(G_p(H))$ of the primal graph associated with $H$ and, similarly, $\pw(H)= \pw(G_p(H))$. Noting that, just as for graphs, $\lw(H) \le \pw(H)$, we obtain
\begin{align}\label{eqn:width,hyper}
\lw(H)\leq \pw(H) \leq (\tw(H)+1)(\lfloor \log_2 n \rfloor+1)+1.
\end{align}


For a hypergraph $H = (V,E)$, a vertex cut $K$ is said to separate sets $V_1$ and $V_2$ if $(V_1,K,V_2)$ is a partition of $V$ and there is no hyperedge of $E$ that contains both a vertex of $V_1$ and a vertex of $V_2$.
A partition $(E_1,E_2)$ of $E$ is appropriate (for $K$) if $E_1$ has no hyperedge containing a vertex in $V_2$ and $E_2$ has no hyperedge containing a vertex in $V_1$.

For fixed $\lambda > 0$, we say that the weight function $\weight$ is {\em $\lambda$-multiplicative}, if for any $H = (V,E)$, any vertex cut $K$ that separates sets $V_1$ and $V_2$, and any appropriate partition $(E_1,E_2)$, we have
\begin{align*}
\hat\lambda^{-|K|} \le \frac{\weight((V_1\cup K,E_1))\weight((V_2\cup K,E_2))}{\weight(H)} \le \hat\lambda^{|K|}.
\end{align*}



From the formulation in~\eqref{eqn:polyedge,hyper}, the {\em single bond flip chain} $\Msbf$ on a given hypergraph $H = (V,E)$ is defined as follows.  We start with an arbitrary subset $X_0 \subseteq E$ and repeatedly generate $X_{t+1}$ from $X_t$ by running the following experiment.
\begin{enumerate}
\item Pick a hyperedge $e\in E$ uniformly at random and let $S = X_t \oplus \{e\}$.
\item Set $X_{t+1} = S$ with probability $\frac12\min\left\{1,\weight((V,S))/\weight((V,X_t))\right\}$ and with the remaining probability set $X_{t+1} = X_t$.
\end{enumerate}
We denote the state space of $\Msbf$ by $\Osbf$ (i.e.~$\Osbf = 2^E$) and its transition probability matrix by $\Psbf$.
It can be shown that $\Msbf$ is a reversible Markov chain that has a unique stationary distribution $\pisbf$ satisfying $\pisbf(S) \propto \weight((V,S))$.

We have obtained the following hypergraph generalisation of Theorem~\ref{thm:mainedge,tw}.


\begin{theorem}\label{thm:mainedge,tw,hyper}
Let $H=(V,E)$ be a hypergraph with $|V| = n$.  If $\weight$ is $\lambda$-multiplicative for some $\lambda > 0$, then the mixing time of $\Msbf$ on $H$ satisfies
\begin{align*}
\tau(\eps) = O\left(n^{2\maxedgesize(H)+4(\tw(H)+1)|\log\lambda|}
%hypergraph maxedgesize
\left(
\max_{S\subseteq E}\left\{\log\frac{\mathcal{P}(H)}{\weight(S)}\right\}
+
\log\frac1{\eps}
\right)
\right).
\end{align*}
\end{theorem}


The proof of this theorem closely follows the pattern set in the case of edge subset Glauber dynamics for graphs.  We follow a canonical paths argument that uses an edge ordering by linear-width, to obtain a mixing time bound in terms of linear-width, immediately combined with~\eqref{eqn:width,hyper} and the fact that $|E| = O(n^{\maxedgesize(H)})$ to obtain Theorem~\ref{thm:mainedge,tw,hyper}.  For brevity, we omit the full details; however a rereading of Section~\ref{sec:proof} with the hypergraph definitions in mind will confirm that the same proof is valid.



\section{Subset Glauber dynamics for matroids}
\label{sec:matroid}

In this section, we outline how to apply our methodology to the Tutte polynomial for matroids.
We do not attempt to give the fullest framework for matroids, though it could be distilled from the proof sketches below.  A significant difference in treating matroids compared to graphs or hypergraphs is that they are not defined with vertices.  Since there are then no direct analogues of a vertex cut, this affects the choice of width parameter and the potential definition for matroidal $\lambda$-multiplicativity.
Fortunately, there is an existing, well-studied notion of connectivity in matroids, which leads to a notion of matroidal path-width, which we have found useful here.

Recall that a matroid $M$ is a pair $(E,\mathcal{I})$, where $E$ is a ground set of edges and $\mathcal{I}$ is a collection of subsets of $E$ (referred to as the independent sets of $M$) satisfying the following properties:
\begin{itemize}
\item $\emptyset \in \mathcal{I}$;
\item for any $A' \subseteq A \subseteq E$, we have $A \in \mathcal{I} \implies A' \in \mathcal{I}$; and
\item for any $A \subseteq E$, all maximal members of $\mathcal{I}$ contained in $A$ have the same cardinality.
\end{itemize}
For any $A\subseteq E$, the {\em rank} $r(A)$ of $A$ is the cardinality of a maximal member of $\mathcal{I}$ contained in $A$,
and the {\em connectivity} $\lambda(A)$ of $A$ is equal to $r(A) + r(E \setminus A) - r(E)$.
Both $r$ and $\lambda$ are non-negative, submodular functions.
For further reading on matroid theory, consult a standard reference, by e.g.~Oxley~\cite{Oxl92} or Welsh~\cite{Wel76}. 

The Tutte polynomial naturally extends from graphs to matroids as follows:  for any matroid $M = (E,\mathcal{I})$ and parameters $x,y$, it is defined by
\begin{align}
\label{eqn:Tuttepoly,matroid}
T(M;x,y) := \sum_{S\subseteq E}(x-1)^{r(E)-r(S)}(y-1)^{|S|-r(S)}.
\end{align}
For our purposes, we shall assume $x,y>1$.
Note that if $M$ is a graphic matroid and hence $M$ is the cycle matroid $M(G)$ for some graph $G$, then the expressions in~\eqref{eqn:Tuttepoly} and~\eqref{eqn:Tuttepoly,matroid} are equal.


For any matroid $M=(E,\mathcal{I})$, an ordering  $(e_1,\dots,e_m)$ of $E$ has path-width at most $\ell$, if, for each $i \in \{2,\dots,m\}$, 
$\lambda(\{e_1,\dots,e_{i-1}\}) \le \ell$. 
The {\em path-width $\pw(M)$} of $M = (E,\mathcal{I})$ is the smallest integer $\ell$ such that there is an ordering of $E$ with path-width at most $\ell$.  For more on this width parameter, consult Kashyap~\cite{Kas08}.

The path-width of a matroid provides an upper bound on a more closely-studied matroidal width parameter, branch-width.
For any matroid $M=(E,\mathcal{I})$, a {\em branch-decomposition} of $M$ is a cubic tree $T$ with exactly $|E|$ leaves, labelled in one-to-one correspondence with the elements of $E$; such a branch-decomposition has width at most $\ell$, if, for any $e \in T$, letting $X_e$ denote a subset of $E$ corresponding to the set of labels contained in one of the two components of $T\setminus e$, then $\lambda(X_e) \le \ell$.
The {\em branch-width $\bw(M)$} of $M = (E,\mathcal{I})$ is the smallest integer $\ell$ such that there is a branch-decomposition of $M$ with width at most $\ell$.
The branch-width of $M$ can be used to upper bound the path-width of $M$ in the following way.

\begin{proposition}\label{prop:pwbw}
For any matroid $M=(E,\mathcal{I})$, $\pw(M) \le \bw(M) \log_2 |E|$.
\end{proposition}

\begin{proof}
Let $M=(E,\mathcal{I})$ be a matroid. We would like to exhibit an ordering of $E$ with path-width at most $\bw(M)\log_2 |E|$.
Let $T=(V_T,E_T)$ be a branch-decomposition of $M$ with width at most $\bw(M)$.
For any vertex $v\in V_T$, let $T_v$ be the subtree rooted at $v$ and let $E_v$ denote the subset of $E$ corresponding to the set of labels contained in $T_v$.  (Note that $T_v$ has $(|T_v|+1)/2$ leaves and hence the same quantity of labels.)
To obtain an edge ordering, we perform a stack-based depth-first search (DFS) of $T$, such that smaller subtrees are explored first. Let $\sigma$ be the ordering of $E$ induced by the order in which their corresponding leaves in $T$ are explored by the DFS\footnote{In other words, the DFS algorithm proceeds as follows. Put the root of $T$ onto the stack. While the stack is non-empty, pop an element $v$ off of the stack and, if $v$ is a leaf of $T$, add the edge of $E$ corresponding to $v$ at the end of $\sigma$; otherwise, first push the child of $v$ with the bigger subtree  onto the stack, then push the other child of $v$ onto the stack.}.
We now show that $\sigma := (e_1,\dots, e_m)$ has path-width at most $\bw(M)\log_2 m$.  Let $i\in\{1,\dots,m\}$ and suppose that $(v_1,\dots,v_k)$ are the vertices of $T$ (ordered from bottom to top) on the stack just after the leaf corresponding to $e_i$ was explored.  Note that the region of $T$ unexplored by the DFS up to this point is precisely $\bigcup_{j=1}^k T_{v_j}$.
The exploration order used by DFS (prioritising smaller subtrees) implies for $j\in \{1,\dots,k-1\}$ that $|T_{v_{j+1}}| < |T_{v_j}|/2$ and hence $|E_{v_{j+1}}| < |E_{v_j}|/2$.  Since there are $m$ labels in $T$, we see that $k \le \log_2 m$.
It then follows, using submodularity of $\lambda$ and the fact that the width of the branch-decomposition $T$ is at most $\bw(M)$, that
\begin{align*}
\lambda(\{e_1,\dots, e_i\})
 = \lambda\left(\bigcup_{j=1}^k E_{v_j}\right)
 \le \sum_{j=1}^k \lambda(E_{v_j})
 \le k \bw(M) \le \bw(M) \log_2 m,
\end{align*}
as required.
\end{proof}

It is worth noting here that matroidal path-width corresponds more closely to graphic linear-width than graphic path-width.  Although tree-width can be defined for matroids, it is not a straightforward extension; furthermore, the parameter has not gained as much traction in matroid theory as has branch-width, cf. Hlin\v{e}n\'y and Whittle~\cite{HlWh06}.  On the other hand, Hlin\v{e}n\'y and Whittle showed that a matroid's tree-width is bounded if and only if its branch-width is bounded.


From the formulation in~\eqref{eqn:Tuttepoly,matroid}, the {\em single bond flip chain $\Msbf_{Tutte}$ for the Tutte polynomial} on a given matroid $M = (E,\mathcal{I})$ is defined as follows.  For any $A\subseteq E$, let $\weight_{Tutte}(A) := (x-1)^{r(E)-r(A)}(y-1)^{|A|-r(A)}$; in other words, $\weight_{Tutte}$ is the weight function associated with $T(M;x,y)$.  We start with an arbitrary subset $X_0 \subseteq E$ and repeatedly generate $X_{t+1}$ from $X_t$ by running the following experiment.
\begin{enumerate}
\item Pick an element $e\in E$ uniformly at random and let $S = X_t \oplus \{e\}$.
\item Set $X_{t+1} = S$ with probability $\frac12\min\left\{1,\weight_{Tutte}(S)/\weight_{Tutte}(X_t)\right\}$ and with the remaining probability set $X_{t+1} = X_t$.
\end{enumerate}
We denote the state space of $\Msbf_{Tutte}$ by $\Osbf_{Tutte}$ (i.e.~$\Osbf_{Tutte} = 2^E$) and its transition probability matrix by $\Psbf_{Tutte}$.
It can be shown that $\Msbf_{Tutte}$ is a reversible Markov chain that has a unique stationary distribution $\pisbf_{Tutte}$ satisfying $\pisbf_{Tutte}(S) \propto \weight_{Tutte}(S)$.

For any matroid $M=(E,\mathcal{I})$, note that $\pw(M) \le \ell$ gives an ordering  $(e_1,\dots,e_m)$ of $E$  such that, for each $i \in \{2,\dots,m\}$, 
\begin{align*}
0 \le r(\{e_1,\dots,e_{i-1}\}) + r(\{e_i,\dots,e_m\}) - r(E) \le \ell,
\end{align*}
and so, letting $c(x,y) = \max\{(x-1)(y-1),((x-1)(y-1))^{-1}\}$,
\begin{align*}
c(x,y)^{-\ell} \le
\frac{\weight_{Tutte}(\{e_1,\dots,e_{i-1}\})\weight_{Tutte}(\{e_i,\dots,e_m\})}{\weight_{Tutte}(E)}
\le c(x,y)^\ell.
\end{align*}
Combining the above inequalities with an adaptation of the canonical paths proof given in Section~\ref{sec:proof} --- using appropriate substitutions, such as $\pw(M)$ in the place of $\lw(G)$ and  Proposition~\ref{prop:pwbw} in the place of~\eqref{eqn:width}, together with the well-known fact that the connectivity function of a matroid is closed under the minor relation --- we obtain the following matroidal analogue of Theorem~\ref{thm:mainedge,tw}. The full details of the proof are left to the reader.


\begin{theorem}\label{thm:mainedge,bw,matroid}
Let $M=(E,\mathcal{I})$ where $|E| = m$.  The mixing time of $\Msbf_{Tutte}$ on $M$ satisfies
\begin{align*}
\tau(\eps) = O\left(m^{2+4\bw(G)\log c(x,y)}
\left(
\max_{S\subseteq E}\left\{\log\frac{T(M;x,y)}{\weight_{Tutte}(S)}\right\}
+
\log\frac1{\eps}
\right)
\right).
\end{align*}
\end{theorem}


\section{Conclusion}

In this work, we have studied a general framework of graph polynomials and Markov chains defined via subset expansion formulae for these polynomials. We have demonstrated that the system mixes rapidly for graphs of bounded tree-width.  On a graph $G$ with $n$ vertices, we have shown a mixing time of order $n^{O(1)}e^{O(\pw(G))} = n^{O(\tw(G))}$.  Our results apply to many of the most prominent and well-known polynomials in the field.
The mixing times of our processes have, respectively, exponential and super-exponential dependencies upon path-width and tree-width.  It would be interesting to investigate if this could be improved, in particular, to achieve something akin to fixed-parameter tractability in terms of tree-width.  We consider it unlikely that this could be accomplished using only the approaches from this paper.

For all of our results, we need that the weight function is strictly positive for all (induced) subgraphs. Many of the classical enumeration polynomials such as the matching, independence, clique and chromatic polynomials are captured by the general polynomials that we mention as examples throughout this work. However, these are `hard-core models' --- in which some (induced) subgraphs have a zero weighting --- and hence are not included in our approach. Many of these are evaluations that fall at the boundary of the regions that we can handle. For example, the Tutte polynomial evaluated at the point $(2,1)$ counts the number of forests of the graph. We have shown rapid mixing at all fixed points $(2,1+\delta$), for $\delta>0$, with a mixing time that depends on $\delta$. It would be interesting to consider whether the chains associated with these boundary points mix rapidly for graphs of bounded tree-width. 


After a presentation of these results in Z\"urich, Thore Husfeldt suggested that our approximation schemes may be of practical use for computing partition functions. The aforementioned dynamic programming algorithms for exact computation all require an explicit tree-width decomposition.  Although a decomposition can theoretically be computed in linear time~\cite{Bod96}, the existing decomposition-generation algorithms have running times hindered by large constants and thus are impractical.  On the other hand, an explicit decomposition is not a prerequisite for rapid mixing in our framework, so an efficient randomised approximation may turn out to be more practical than an exact deterministic algorithm.


Lastly, we have demonstrated, with appropriate modifications, several extensions of our results to other settings.  We have adapted our framework to include induced subgraphs, hypergraphs and matroids, all using an appropriate choice of width parameter and ``$\lambda$-multiplicativity''.  These results are indicative; this aspect of our work has not been fully explored and we hope that further generalisations of Theorem~\ref{thm:mainedge,tw} are available.


\subsection*{Acknowledgement}

We are very grateful for the helpful referee comments.

\begin{thebibliography}{10}

\bibitem{AiHo04}
Martin Aigner and Hein van~der Holst.
\newblock Interlace polynomials.
\newblock {\em Linear Algebra Appl.}, 377:11--30, 2004.

\bibitem{AFW95}
Noga Alon, Alan~M. Frieze, and Dominic J.~A. Welsh.
\newblock Polynomial time randomized approximation schemes for
  {T}utte-{G}r\"othendieck invariants: the dense case.
\newblock {\em Random Structures Algorithms}, 6(4):459--478, 1995.

\bibitem{And98}
Artur Andrzejak.
\newblock An algorithm for the {T}utte polynomials of graphs of bounded
  treewidth.
\newblock {\em Discrete Math.}, 190(1-3):39--54, 1998.

\bibitem{ABS04b}
Richard Arratia, B{\'e}la Bollob{\'a}s, and Gregory~B. Sorkin.
\newblock A two-variable interlace polynomial.
\newblock {\em Combinatorica}, 24(4):567--584, 2004.

\bibitem{BKMP05}
Noam Berger, Claire Kenyon, Elchanan Mossel, and Yuval Peres.
\newblock Glauber dynamics on trees and hyperbolic graphs.
\newblock {\em Probab. Theory Related Fields}, 131(3):311--340, 2005.

\bibitem{Bir12}
George~D. Birkhoff.
\newblock A determinant formula for the number of ways of coloring a map.
\newblock {\em Ann.~of Math.~(2)}, 14(1-4):42--46, 1912/13.

\bibitem{BlHo08}
Markus Bl{\"a}ser and Christian Hoffmann.
\newblock On the complexity of the interlace polynomial.
\newblock In Susanne Albers and Pascal Weil, editors, {\em 25th International
  Symposium on Theoretical Aspects of Computer Science (STACS 2008)}, volume~1
  of {\em Leibniz International Proceedings in Informatics (LIPIcs)}, pages
  97--108, Dagstuhl, Germany, 2008. Schloss Dagstuhl--Leibniz-Zentrum fuer
  Informatik.

\bibitem{BlHo11}
Markus Bl{\"a}ser and Christian Hoffmann.
\newblock Fast evaluation of interlace polynomials on graphs of bounded
  treewidth.
\newblock {\em Algorithmica}, 61(1):3--35, 2011.

\bibitem{Bod88}
Hans~L. Bodlaender.
\newblock Some classes of graphs with bounded treewidth.
\newblock {\em Bulletin of the EATCS}, 36:116--125, 1988.

\bibitem{Bod96}
Hans~L. Bodlaender.
\newblock A linear-time algorithm for finding tree-decompositions of small
  treewidth.
\newblock {\em SIAM J. Comput.}, 25(6):1305--1317, 1996.

\bibitem{Bod06}
Hans~L. Bodlaender.
\newblock Treewidth: Characterizations, applications, and computations.
\newblock In Fedor~V. Fomin, editor, {\em WG}, volume 4271 of {\em Lecture
  Notes in Computer Science}, pages 1--14. Springer, 2006.

\bibitem{Bor04}
Magnus Bordewich.
\newblock Approximating the number of acyclic orientations for a class of
  sparse graphs.
\newblock {\em Combin.~Probab.~Comput.}, 13(1):1--16, 2004.

\bibitem{BDK06}
Magnus Bordewich, Martin~E. Dyer, and Marek Karpinski.
\newblock Stopping times, metrics and approximate counting.
\newblock In Michele Bugliesi, Bart Preneel, Vladimiro Sassone, and Ingo
  Wegener, editors, {\em ICALP (1)}, volume 4051 of {\em Lecture Notes in
  Computer Science}, pages 108--119. Springer, 2006.

\bibitem{BDK08}
Magnus Bordewich, Martin~E. Dyer, and Marek Karpinski.
\newblock Path coupling using stopping times and counting independent sets and
  colorings in hypergraphs.
\newblock {\em Random Structures Algorithms}, 32(3):375--399, 2008.

\bibitem{BoKa11}
Magnus Bordewich and Ross~J. Kang.
\newblock Rapid mixing of subset glauber dynamics on graphs of bounded
  tree-width.
\newblock In Luca Aceto, Monika Henzinger, and Jiri Sgall, editors, {\em ICALP
  (1)}, volume 6755 of {\em Lecture Notes in Computer Science}, pages 533--544.
  Springer, 2011.

\bibitem{Bub01}
Russ Bubley.
\newblock {\em Randomized algorithms: approximation, generation, and counting}.
\newblock CPHC/BCS Distinguished Dissertations. Springer-Verlag London Ltd.,
  London, 2001.

\bibitem{ChSe89}
Fan R.~K. Chung and Paul~D. Seymour.
\newblock Graphs with small bandwidth and cutwidth.
\newblock {\em Discrete Math.}, 75(1-3):113--119, 1989.
\newblock Graph theory and combinatorics (Cambridge, 1988).

\bibitem{Cou08}
Bruno Courcelle.
\newblock A multivariate interlace polynomial and its computation for graphs of
  bounded clique-width.
\newblock {\em Electron.~J. Combin.}, 15(1):\#69, 36 pp., 2008.

\bibitem{DeMo10}
Amir Dembo and Andrea Montanari.
\newblock Ising models on locally tree-like graphs.
\newblock {\em Ann. Appl.~Probab.}, 20(2):565--592, 2010.

\bibitem{DiSt91}
Persi Diaconis and Daniel Stroock.
\newblock Geometric bounds for eigenvalues of {M}arkov chains.
\newblock {\em Ann.~Appl.~Probab.}, 1(1):36--61, 1991.

\bibitem{DLP10}
Jian Ding, Eyal Lubetzky, and Yuval Peres.
\newblock Mixing time of critical {I}sing model on trees is polynomial in the
  height.
\newblock {\em Comm. Math. Phys.}, 295(1):161--207, 2010.

\bibitem{DyGr00}
Martin Dyer and Catherine Greenhill.
\newblock On {M}arkov chains for independent sets.
\newblock {\em J. Algorithms}, 35(1):17--49, 2000.

\bibitem{ElMe11a}
Joanna~A. Ellis-Monaghan and Criel Merino.
\newblock Graph polynomials and their applications {I}: The {T}utte polynomial.
\newblock In Matthias Dehmer, editor, {\em Structural Analysis of Complex
  Networks}, pages 219--255. Birkh\"{a}user Boston, 2011.

\bibitem{ElMe11b}
Joanna~A. Ellis-Monaghan and Criel Merino.
\newblock Graph polynomials and their applications {II}: Interrelations and
  interpretations.
\newblock In Matthias Dehmer, editor, {\em Structural Analysis of Complex
  Networks}, pages 257--292. Birkh\"{a}user Boston, 2011.

\bibitem{FoTh06}
Fedor~V. Fomin and Dimitrios~M. Thilikos.
\newblock A 3-approximation for the pathwidth of {H}alin graphs.
\newblock {\em J. Discrete Algorithms}, 4(4):499--510, 2006.

\bibitem{GeSt12}
Qi~Ge and Daniel {\v{S}}tefankovi{\v{c}}.
\newblock A graph polynomial for independent sets of bipartite graphs.
\newblock {\em Combin.~Probab.~Comput.}, 21(5):695--714, 2012.

\bibitem{GeSt09+}
Qi~Ge and Daniel \v{S}tefankovi\v{c}.
\newblock A graph polynomial for independent sets of bipartite graphs.
\newblock {\em CoRR}, \arxiv{0911.4732}, 2009.

\bibitem{Gla63}
Roy~J. Glauber.
\newblock Time-dependent statistics of the {I}sing model.
\newblock {\em J. Mathematical Phys.}, 4:294--307, 1963.

\bibitem{GoJe12b}
Leslie~Ann Goldberg and Mark Jerrum.
\newblock Approximating the partition function of the ferromagnetic {P}otts
  model.
\newblock {\em J. ACM}, 59(5):\#25, 31 pp., 2012.

\bibitem{GoJe12a}
Leslie~Ann Goldberg and Mark Jerrum.
\newblock A counterexample to rapid mixing of the {G}e-\v {S}tefankovi\v c
  process.
\newblock {\em Electron.~Commun.~Probab.}, 17:\#5, 6 pp., 2012.

\bibitem{GJK10}
Leslie~Ann Goldberg, Mark Jerrum, and Marek Karpinski.
\newblock The mixing time of {G}lauber dynamics for coloring regular trees.
\newblock {\em Random Structures Algorithms}, 36(4):464--476, 2010.

\bibitem{GoTr90}
Gary Gordon and Lorenzo Traldi.
\newblock Generalized activities and the {T}utte polynomial.
\newblock {\em Discrete Math.}, 85(2):167--176, 1990.

\bibitem{Gri06}
Geoffrey Grimmett.
\newblock {\em The random-cluster model}, volume 333 of {\em Grundlehren der
  Mathematischen Wissenschaften}.
\newblock Springer-Verlag, Berlin, 2006.

\bibitem{Hli06}
Petr Hlin{\v{e}}n{\'y}.
\newblock The {T}utte polynomial for matroids of bounded branch-width.
\newblock {\em Combin.~Probab.~Comput.}, 15(3):397--409, 2006.

\bibitem{HOSG08}
Petr Hlin\v{e}n\'y, Sang-il Oum, Detlef Seese, and Georg Gottlob.
\newblock {Width Parameters Beyond Tree-width and their Applications}.
\newblock {\em The Computer Journal}, 51(3):326--362, 2008.

\bibitem{HlWh06}
Petr Hlin\v{e}n\'y and Geoff Whittle.
\newblock Matroid tree-width.
\newblock {\em European J. Combin.}, 27(7):1117--1128, 2006.

\bibitem{Hub11}
Mark Huber.
\newblock Simulation reductions for the {I}sing model.
\newblock {\em J. Stat. Theory Pract.}, 5(3):413--424, 2011.

\bibitem{Isi25}
Ernst Ising.
\newblock Beitrag zur theorie des ferromagnetismus.
\newblock {\em Zeitschrift f\"ur Physik A Hadrons and Nuclei}, 31:253--258,
  1925.

\bibitem{JVW90}
Fran{\c{c}}ois Jaeger, Dirk~L. Vertigan, and Dominic J.~A. Welsh.
\newblock On the computational complexity of the {J}ones and {T}utte
  polynomials.
\newblock {\em Math.~Proc.~Cambridge Philos.~Soc.}, 108(1):35--53, 1990.

\bibitem{Jer03}
Mark Jerrum.
\newblock {\em Counting, sampling and integrating: algorithms and complexity}.
\newblock Lectures in Mathematics ETH Z\"urich. Birkh\"auser Verlag, Basel,
  2003.

\bibitem{JeSi93}
Mark Jerrum and Alistair Sinclair.
\newblock Polynomial-time approximation algorithms for the {I}sing model.
\newblock {\em SIAM J. Comput.}, 22(5):1087--1116, 1993.

\bibitem{Jon85}
Vaughan F.~R. Jones.
\newblock A polynomial invariant for knots via von {N}eumann algebras.
\newblock {\em Bull. Amer. Math. Soc. (N.S.)}, 12(1):103--111, 1985.

\bibitem{Kar01}
David~R. Karger.
\newblock A randomized fully polynomial time approximation scheme for the
  all-terminal network reliability problem.
\newblock {\em SIAM Rev.}, 43(3):499--522, 2001.

\bibitem{Kas08}
Navin Kashyap.
\newblock Matroid pathwidth and code trellis complexity.
\newblock {\em SIAM J. Discrete Math.}, 22(1):256--272, 2008.

\bibitem{Kin92}
Nancy~G. Kinnersley.
\newblock The vertex separation number of a graph equals its path-width.
\newblock {\em Inform.~Process.~Lett.}, 42(6):345--350, 1992.

\bibitem{KoSo93}
Ephraim Korach and Nir Solel.
\newblock Tree-width, path-width, and cutwidth.
\newblock {\em Discrete Appl.~Math.}, 43(1):97--101, 1993.

\bibitem{Mak08}
Johann~A. Makowsky.
\newblock From a zoo to a zoology: towards a general theory of graph
  polynomials.
\newblock {\em Theory Comput. Syst.}, 43(3-4):542--562, 2008.

\bibitem{MaMa03}
Johann~A. Makowsky and Julian~P. Mari{\~n}o.
\newblock Farrell polynomials on graphs of bounded tree width.
\newblock {\em Adv.~in Appl.~Math.}, 30(1-2):160--176, 2003.
\newblock Formal power series and algebraic combinatorics (Scottsdale, AZ,
  2001).

\bibitem{MSW07}
Fabio Martinelli, Alistair Sinclair, and Dror Weitz.
\newblock Fast mixing for independent sets, colorings, and other models on
  trees.
\newblock {\em Random Structures Algorithms}, 31(2):134--172, 2007.

\bibitem{Nob98}
Steven~D. Noble.
\newblock Evaluating the {T}utte polynomial for graphs of bounded tree-width.
\newblock {\em Combin.~Probab.~Comput.}, 7(3):307--321, 1998.

\bibitem{Nob09}
Steven~D. Noble.
\newblock Evaluating a weighted graph polynomial for graphs of bounded
  tree-width.
\newblock {\em Electron.~J. Combin.}, 16(1):\#64, 14 pp., 2009.

\bibitem{NoWe99}
Steven~D. Noble and Dominic J.~A. Welsh.
\newblock A weighted graph polynomial from chromatic invariants of knots.
\newblock {\em Ann.~Inst.~Fourier (Grenoble)}, 49(3):1057--1087, 1999.
\newblock Symposium {\`a} la M{\'e}moire de Fran{\c{c}}ois Jaeger (Grenoble,
  1998).

\bibitem{Oxl92}
James~G. Oxley.
\newblock {\em Matroid theory}.
\newblock Oxford Science Publications. The Clarendon Press Oxford University
  Press, New York, 1992.

\bibitem{OxWe79}
James~G. Oxley and Dominic J.~A. Welsh.
\newblock The {T}utte polynomial and percolation.
\newblock In {\em Graph theory and related topics ({P}roc. {C}onf., {U}niv.
  {W}aterloo, {W}aterloo, {O}nt., 1977)}, pages 329--339. Academic Press, New
  York, 1979.

\bibitem{Pot52}
Renfrey~B. Potts.
\newblock Some generalized order-disorder transformations.
\newblock {\em Proc.~Cambridge Philos.~Soc.}, 48:106--109, 1952.

\bibitem{Ran06}
Dana Randall.
\newblock Rapidly mixing {M}arkov chains with applications in computer science
  and physics.
\newblock {\em Computing in Science Engineering}, 8(2):30--41, March-April
  2006.

\bibitem{Sin92}
Alistair Sinclair.
\newblock Improved bounds for mixing rates of {M}arkov chains and
  multicommodity flow.
\newblock {\em Combin.~Probab.~Comput.}, 1(4):351--370, 1992.

\bibitem{Sok05}
Alan~D. Sokal.
\newblock The multivariate {T}utte polynomial (alias {P}otts model) for graphs
  and matroids.
\newblock In {\em Surveys in Combinatorics 2005}, volume 327 of {\em London
  Math. Soc. Lecture Note Ser.}, pages 173--226. Cambridge Univ.~Press,
  Cambridge, 2005.

\bibitem{TVVY10}
Prasad Tetali, Juan~Carlos Vera, Eric Vigoda, and Linji Yang.
\newblock Phase transition for the mixing time of the {G}lauber dynamics for
  coloring regular trees.
\newblock In Moses Charikar, editor, {\em SODA}, pages 1646--1656. SIAM, 2010.

\bibitem{Tho96}
Robin Thomas.
\newblock Tree-decompositions of graphs, 1996.
\newblock Lecture notes; available at
  \url{http://people.math.gatech.edu/~thomas/PAP/treenotes.pdf}. Accessed
  10/2014.

\bibitem{Tut54}
William~T. Tutte.
\newblock A contribution to the theory of chromatic polynomials.
\newblock {\em Canadian J. Math.}, 6:80--91, 1954.

\bibitem{Wel76}
Dominic J.~A. Welsh.
\newblock {\em Matroid theory}.
\newblock Academic Press, London, 1976.
\newblock L. M. S. Monographs, No. 8.

\bibitem{Wel93}
Dominic J.~A. Welsh.
\newblock {\em Complexity: knots, colourings and counting}, volume 186 of {\em
  London Mathematical Society Lecture Note Series}.
\newblock Cambridge University Press, Cambridge, 1993.

\bibitem{WeMe00}
Dominic J.~A. Welsh and Criel Merino.
\newblock The {P}otts model and the {T}utte polynomial.
\newblock {\em J. Math.~Phys.}, 41(3):1127--1152, 2000.
\newblock Probabilistic techniques in equilibrium and nonequilibrium
  statistical physics.

\bibitem{Whi11}
Jacob~A. White.
\newblock On multivariate chromatic polynomials of hypergraphs and hyperedge
  elimination.
\newblock {\em Electron.~J. Combin.}, 18(1):\#160, 17 pp., 2011.

\end{thebibliography}





%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{document}