@article{6900,
  abstract     = {Across diverse biological systems—ranging from neural networks to intracellular signaling and genetic regulatory networks—the information about changes in the environment is frequently encoded in the full temporal dynamics of the network nodes. A pressing data-analysis challenge has thus been to efficiently estimate the amount of information that these dynamics convey from experimental data. Here we develop and evaluate decoding-based estimation methods to lower bound the mutual information about a finite set of inputs, encoded in single-cell high-dimensional time series data. For biological reaction networks governed by the chemical Master equation, we derive model-based information approximations and analytical upper bounds, against which we benchmark our proposed model-free decoding estimators. In contrast to the frequently-used k-nearest-neighbor estimator, decoding-based estimators robustly extract a large fraction of the available information from high-dimensional trajectories with a realistic number of data samples. We apply these estimators to previously published data on Erk and Ca2+ signaling in mammalian cells and to yeast stress-response, and find that substantial amount of information about environmental state can be encoded by non-trivial response statistics even in stationary signals. We argue that these single-cell, decoding-based information estimates, rather than the commonly-used tests for significant differences between selected population response statistics, provide a proper and unbiased measure for the performance of biological signaling networks.},
  author       = {Cepeda Humerez, Sarah A and Ruess, Jakob and Tkačik, Gašper},
  issn         = {15537358},
  journal      = {PLoS computational biology},
  number       = {9},
  pages        = {e1007290},
  publisher    = {Public Library of Science},
  title        = {{Estimating information in time-varying signals}},
  doi          = {10.1371/journal.pcbi.1007290},
  volume       = {15},
  year         = {2019},
}

@phdthesis{6473,
  abstract     = {Single cells are constantly interacting with their environment and each other, more importantly, the accurate perception of environmental cues is crucial for growth, survival, and reproduction. This communication between cells and their environment can be formalized in mathematical terms and be quantified as the information flow between them, as prescribed by information theory. 
The recent availability of real–time dynamical patterns of signaling molecules in single cells has allowed us to identify encoding about the identity of the environment in the time–series. However, efficient estimation of the information transmitted by these signals has been a data–analysis challenge due to the high dimensionality of the trajectories and the limited number of samples. In the first part of this thesis, we develop and evaluate decoding–based estimation methods to lower bound the mutual information and derive model–based precise information estimates for biological reaction networks governed by the chemical master equation. This is followed by applying the decoding-based methods to study the intracellular representation of extracellular changes in budding yeast, by observing the transient dynamics of nuclear translocation of 10 transcription factors in response to 3 stress conditions. Additionally, we apply these estimators to previously published data on ERK and Ca2+ signaling and yeast stress response. We argue that this single cell decoding-based measure of information provides an unbiased, quantitative and interpretable measure for the fidelity of biological signaling processes. 
Finally, in the last section, we deal with gene regulation which is primarily controlled by transcription factors (TFs) that bind to the DNA to activate gene expression. The possibility that non-cognate TFs activate transcription diminishes the accuracy of regulation with potentially disastrous effects for the cell. This ’crosstalk’ acts as a previously unexplored source of noise in biochemical networks and puts a strong constraint on their performance. To mitigate erroneous initiation we propose an out of equilibrium scheme that implements kinetic proofreading. We show that such architectures are favored  over their equilibrium counterparts for complex organisms despite introducing noise in gene expression. },
  author       = {Cepeda Humerez, Sarah A},
  issn         = {2663-337X},
  keywords     = {Information estimation, Time-series, data analysis},
  pages        = {135},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Estimating information flow in single cells}},
  doi          = {10.15479/AT:ISTA:6473},
  year         = {2019},
}

@article{281,
  abstract     = {Although cells respond specifically to environments, how environmental identity is encoded intracellularly is not understood. Here, we study this organization of information in budding yeast by estimating the mutual information between environmental transitions and the dynamics of nuclear translocation for 10 transcription factors. Our method of estimation is general, scalable, and based on decoding from single cells. The dynamics of the transcription factors are necessary to encode the highest amounts of extracellular information, and we show that information is transduced through two channels: Generalists (Msn2/4, Tod6 and Dot6, Maf1, and Sfp1) can encode the nature of multiple stresses, but only if stress is high; specialists (Hog1, Yap1, and Mig1/2) encode one particular stress, but do so more quickly and for a wider range of magnitudes. In particular, Dot6 encodes almost as much information as Msn2, the master regulator of the environmental stress response. Each transcription factor reports differently, and it is only their collective behavior that distinguishes between multiple environmental states. Changes in the dynamics of the localization of transcription factors thus constitute a precise, distributed internal representation of extracellular change. We predict that such multidimensional representations are common in cellular decision-making.},
  author       = {Granados, Alejandro and Pietsch, Julian and Cepeda Humerez, Sarah A and Farquhar, Isebail and Tkacik, Gasper and Swain, Peter},
  journal      = {PNAS},
  number       = {23},
  pages        = {6088 -- 6093},
  publisher    = {National Academy of Sciences},
  title        = {{Distributed and dynamic intracellular organization of extracellular information}},
  doi          = {10.1073/pnas.1716659115},
  volume       = {115},
  year         = {2018},
}

@article{2016,
  abstract     = {The Ising model is one of the simplest and most famous models of interacting systems. It was originally proposed to model ferromagnetic interactions in statistical physics and is now widely used to model spatial processes in many areas such as ecology, sociology, and genetics, usually without testing its goodness-of-fit. Here, we propose an exact goodness-of-fit test for the finite-lattice Ising model. The theory of Markov bases has been developed in algebraic statistics for exact goodness-of-fit testing using a Monte Carlo approach. However, this beautiful theory has fallen short of its promise for applications, because finding a Markov basis is usually computationally intractable. We develop a Monte Carlo method for exact goodness-of-fit testing for the Ising model which avoids computing a Markov basis and also leads to a better connectivity of the Markov chain and hence to a faster convergence. We show how this method can be applied to analyze the spatial organization of receptors on the cell membrane.},
  author       = {Martin Del Campo Sanchez, Abraham and Cepeda Humerez, Sarah A and Uhler, Caroline},
  issn         = {03036898},
  journal      = {Scandinavian Journal of Statistics},
  number       = {2},
  pages        = {285 -- 306},
  publisher    = {Wiley-Blackwell},
  title        = {{Exact goodness-of-fit testing for the Ising model}},
  doi          = {10.1111/sjos.12251},
  volume       = {44},
  year         = {2017},
}

@article{1576,
  abstract     = {Gene expression is controlled primarily by interactions between transcription factor proteins (TFs) and the regulatory DNA sequence, a process that can be captured well by thermodynamic models of regulation. These models, however, neglect regulatory crosstalk: the possibility that noncognate TFs could initiate transcription, with potentially disastrous effects for the cell. Here, we estimate the importance of crosstalk, suggest that its avoidance strongly constrains equilibrium models of TF binding, and propose an alternative nonequilibrium scheme that implements kinetic proofreading to suppress erroneous initiation. This proposal is consistent with the observed covalent modifications of the transcriptional apparatus and predicts increased noise in gene expression as a trade-off for improved specificity. Using information theory, we quantify this trade-off to find when optimal proofreading architectures are favored over their equilibrium counterparts. Such architectures exhibit significant super-Poisson noise at low expression in steady state.},
  author       = {Cepeda Humerez, Sarah A and Rieckh, Georg and Tkacik, Gasper},
  journal      = {Physical Review Letters},
  number       = {24},
  publisher    = {American Physical Society},
  title        = {{Stochastic proofreading mechanism alleviates crosstalk in transcriptional regulation}},
  doi          = {10.1103/PhysRevLett.115.248101},
  volume       = {115},
  year         = {2015},
}

