@phdthesis{6473,
  abstract     = {Single cells are constantly interacting with their environment and each other, more importantly, the accurate perception of environmental cues is crucial for growth, survival, and reproduction. This communication between cells and their environment can be formalized in mathematical terms and be quantified as the information flow between them, as prescribed by information theory. 
The recent availability of real–time dynamical patterns of signaling molecules in single cells has allowed us to identify encoding about the identity of the environment in the time–series. However, efficient estimation of the information transmitted by these signals has been a data–analysis challenge due to the high dimensionality of the trajectories and the limited number of samples. In the first part of this thesis, we develop and evaluate decoding–based estimation methods to lower bound the mutual information and derive model–based precise information estimates for biological reaction networks governed by the chemical master equation. This is followed by applying the decoding-based methods to study the intracellular representation of extracellular changes in budding yeast, by observing the transient dynamics of nuclear translocation of 10 transcription factors in response to 3 stress conditions. Additionally, we apply these estimators to previously published data on ERK and Ca2+ signaling and yeast stress response. We argue that this single cell decoding-based measure of information provides an unbiased, quantitative and interpretable measure for the fidelity of biological signaling processes. 
Finally, in the last section, we deal with gene regulation which is primarily controlled by transcription factors (TFs) that bind to the DNA to activate gene expression. The possibility that non-cognate TFs activate transcription diminishes the accuracy of regulation with potentially disastrous effects for the cell. This ’crosstalk’ acts as a previously unexplored source of noise in biochemical networks and puts a strong constraint on their performance. To mitigate erroneous initiation we propose an out of equilibrium scheme that implements kinetic proofreading. We show that such architectures are favored  over their equilibrium counterparts for complex organisms despite introducing noise in gene expression. },
  author       = {Cepeda Humerez, Sarah A},
  issn         = {2663-337X},
  keywords     = {Information estimation, Time-series, data analysis},
  pages        = {135},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Estimating information flow in single cells}},
  doi          = {10.15479/AT:ISTA:6473},
  year         = {2019},
}

@article{6784,
  abstract     = {Mathematical models have been used successfully at diverse scales of biological organization, ranging from ecology and population dynamics to stochastic reaction events occurring between individual molecules in single cells. Generally, many biological processes unfold across multiple scales, with mutations being the best studied example of how stochasticity at the molecular scale can influence outcomes at the population scale. In many other contexts, however, an analogous link between micro- and macro-scale remains elusive, primarily due to the challenges involved in setting up and analyzing multi-scale models. Here, we employ such a model to investigate how stochasticity propagates from individual biochemical reaction events in the bacterial innate immune system to the ecology of bacteria and bacterial viruses. We show analytically how the dynamics of bacterial populations are shaped by the activities of immunity-conferring enzymes in single cells and how the ecological consequences imply optimal bacterial defense strategies against viruses. Our results suggest that bacterial populations in the presence of viruses can either optimize their initial growth rate or their population size, with the first strategy favoring simple immunity featuring a single restriction modification system and the second strategy favoring complex bacterial innate immunity featuring several simultaneously active restriction modification systems.},
  author       = {Ruess, Jakob and Pleska, Maros and Guet, Calin C and Tkačik, Gašper},
  issn         = {1553-7358},
  journal      = {PLoS Computational Biology},
  number       = {7},
  publisher    = {Public Library of Science},
  title        = {{Molecular noise of innate immunity shapes bacteria-phage ecologies}},
  doi          = {10.1371/journal.pcbi.1007168},
  volume       = {15},
  year         = {2019},
}

@article{6900,
  abstract     = {Across diverse biological systems—ranging from neural networks to intracellular signaling and genetic regulatory networks—the information about changes in the environment is frequently encoded in the full temporal dynamics of the network nodes. A pressing data-analysis challenge has thus been to efficiently estimate the amount of information that these dynamics convey from experimental data. Here we develop and evaluate decoding-based estimation methods to lower bound the mutual information about a finite set of inputs, encoded in single-cell high-dimensional time series data. For biological reaction networks governed by the chemical Master equation, we derive model-based information approximations and analytical upper bounds, against which we benchmark our proposed model-free decoding estimators. In contrast to the frequently-used k-nearest-neighbor estimator, decoding-based estimators robustly extract a large fraction of the available information from high-dimensional trajectories with a realistic number of data samples. We apply these estimators to previously published data on Erk and Ca2+ signaling in mammalian cells and to yeast stress-response, and find that substantial amount of information about environmental state can be encoded by non-trivial response statistics even in stationary signals. We argue that these single-cell, decoding-based information estimates, rather than the commonly-used tests for significant differences between selected population response statistics, provide a proper and unbiased measure for the performance of biological signaling networks.},
  author       = {Cepeda Humerez, Sarah A and Ruess, Jakob and Tkačik, Gašper},
  issn         = {15537358},
  journal      = {PLoS computational biology},
  number       = {9},
  pages        = {e1007290},
  publisher    = {Public Library of Science},
  title        = {{Estimating information in time-varying signals}},
  doi          = {10.1371/journal.pcbi.1007290},
  volume       = {15},
  year         = {2019},
}

@misc{9786,
  author       = {Ruess, Jakob and Pleska, Maros and Guet, Calin C and Tkačik, Gašper},
  publisher    = {Public Library of Science},
  title        = {{Supporting text and results}},
  doi          = {10.1371/journal.pcbi.1007168.s001},
  year         = {2019},
}

@article{161,
  abstract     = {Which properties of metabolic networks can be derived solely from stoichiometry? Predictive results have been obtained by flux balance analysis (FBA), by postulating that cells set metabolic fluxes to maximize growth rate. Here we consider a generalization of FBA to single-cell level using maximum entropy modeling, which we extend and test experimentally. Specifically, we define for Escherichia coli metabolism a flux distribution that yields the experimental growth rate: the model, containing FBA as a limit, provides a better match to measured fluxes and it makes a wide range of predictions: on flux variability, regulation, and correlations; on the relative importance of stoichiometry vs. optimization; on scaling relations for growth rate distributions. We validate the latter here with single-cell data at different sub-inhibitory antibiotic concentrations. The model quantifies growth optimization as emerging from the interplay of competitive dynamics in the population and regulation of metabolism at the level of single cells.},
  author       = {De Martino, Daniele and Mc, Andersson Anna and Bergmiller, Tobias and Guet, Calin C and Tkacik, Gasper},
  journal      = {Nature Communications},
  number       = {1},
  publisher    = {Springer Nature},
  title        = {{Statistical mechanics for metabolic networks during steady state growth}},
  doi          = {10.1038/s41467-018-05417-9},
  volume       = {9},
  year         = {2018},
}

@article{19,
  abstract     = {Bacteria regulate genes to survive antibiotic stress, but regulation can be far from perfect. When regulation is not optimal, mutations that change gene expression can contribute to antibiotic resistance. It is not systematically understood to what extent natural gene regulation is or is not optimal for distinct antibiotics, and how changes in expression of specific genes quantitatively affect antibiotic resistance. Here we discover a simple quantitative relation between fitness, gene expression, and antibiotic potency, which rationalizes our observation that a multitude of genes and even innate antibiotic defense mechanisms have expression that is critically nonoptimal under antibiotic treatment. First, we developed a pooled-strain drug-diffusion assay and screened Escherichia coli overexpression and knockout libraries, finding that resistance to a range of 31 antibiotics could result from changing expression of a large and functionally diverse set of genes, in a primarily but not exclusively drug-specific manner. Second, by synthetically controlling the expression of single-drug and multidrug resistance genes, we observed that their fitness-expression functions changed dramatically under antibiotic treatment in accordance with a log-sensitivity relation. Thus, because many genes are nonoptimally expressed under antibiotic treatment, many regulatory mutations can contribute to resistance by altering expression and by activating latent defenses.},
  author       = {Palmer, Adam and Chait, Remy P and Kishony, Roy},
  issn         = {0737-4038},
  journal      = {Molecular Biology and Evolution},
  number       = {11},
  pages        = {2669 -- 2684},
  publisher    = {Oxford University Press},
  title        = {{Nonoptimal gene expression creates latent potential for antibiotic resistance}},
  doi          = {10.1093/molbev/msy163},
  volume       = {35},
  year         = {2018},
}

@article{543,
  abstract     = {A central goal in theoretical neuroscience is to predict the response properties of sensory neurons from first principles. To this end, “efficient coding” posits that sensory neurons encode maximal information about their inputs given internal constraints. There exist, however, many variants of efficient coding (e.g., redundancy reduction, different formulations of predictive coding, robust coding, sparse coding, etc.), differing in their regimes of applicability, in the relevance of signals to be encoded, and in the choice of constraints. It is unclear how these types of efficient coding relate or what is expected when different coding objectives are combined. Here we present a unified framework that encompasses previously proposed efficient coding models and extends to unique regimes. We show that optimizing neural responses to encode predictive information can lead them to either correlate or decorrelate their inputs, depending on the stimulus statistics; in contrast, at low noise, efficiently encoding the past always predicts decorrelation. Later, we investigate coding of naturalistic movies and show that qualitatively different types of visual motion tuning and levels of response sparsity are predicted, depending on whether the objective is to recover the past or predict the future. Our approach promises a way to explain the observed diversity of sensory neural responses, as due to multiple functional goals and constraints fulfilled by different cell types and/or circuits.},
  author       = {Chalk, Matthew J and Marre, Olivier and Tkacik, Gasper},
  journal      = {PNAS},
  number       = {1},
  pages        = {186 -- 191},
  publisher    = {National Academy of Sciences},
  title        = {{Toward a unified theory of efficient, predictive, and sparse coding}},
  doi          = {10.1073/pnas.1711114115},
  volume       = {115},
  year         = {2018},
}

@misc{5584,
  abstract     = {This package contains data for the publication "Nonlinear decoding of a complex movie from the mammalian retina" by Deny S. et al, PLOS Comput Biol (2018). 

The data consists of
(i) 91 spike sorted, isolated rat retinal ganglion cells that pass stability and quality criteria, recorded on the multi-electrode array, in response to the presentation of the complex movie with many randomly moving dark discs. The responses are represented as 648000 x 91 binary matrix, where the first index indicates the timebin of duration 12.5 ms, and the second index the neural identity. The matrix entry is 0/1 if the neuron didn't/did spike in the particular time bin.
(ii) README file and a graphical illustration of the structure of the experiment, specifying how the 648000 timebins are split into epochs where 1, 2, 4, or 10 discs  were displayed, and which stimulus segments are exact repeats or unique ball trajectories.
(iii) a 648000 x 400 matrix of luminance traces for each of the 20 x 20 positions ("sites") in the movie frame, with time that is locked to the recorded raster. The luminance traces are produced as described in the manuscript by filtering the raw disc movie with a small gaussian spatial kernel. },
  author       = {Deny, Stephane and Marre, Olivier and Botella-Soler, Vicente and Martius, Georg S and Tkacik, Gasper},
  keywords     = {retina, decoding, regression, neural networks, complex stimulus},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Nonlinear decoding of a complex movie from the mammalian retina}},
  doi          = {10.15479/AT:ISTA:98},
  year         = {2018},
}

@misc{5585,
  abstract     = {Mean repression values and standard error of the mean are given for all operator mutant libraries.},
  author       = {Igler, Claudia and Lagator, Mato and Tkacik, Gasper and Bollback, Jonathan P and Guet, Calin C},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Data for the paper Evolutionary potential of transcription factors for gene regulatory rewiring}},
  doi          = {10.15479/AT:ISTA:108},
  year         = {2018},
}

@misc{5587,
  abstract     = {Supporting material to the article 
STATISTICAL MECHANICS FOR METABOLIC NETWORKS IN STEADY-STATE GROWTH

boundscoli.dat
Flux Bounds of the E. coli catabolic core model iAF1260 in a glucose limited minimal medium. 

polcoli.dat
Matrix enconding the polytope of the E. coli catabolic core model iAF1260 in a glucose limited minimal medium, 
obtained from the soichiometric matrix by standard linear algebra  (reduced row echelon form).

ellis.dat
Approximate Lowner-John ellipsoid rounding the polytope of the E. coli catabolic core model iAF1260 in a glucose limited minimal medium
obtained with the Lovasz method.

point0.dat
Center of the approximate Lowner-John ellipsoid rounding the polytope of the E. coli catabolic core model iAF1260 in a glucose limited minimal medium
obtained with the Lovasz method.

lovasz.cpp  
This c++ code file receives in input the polytope of the feasible steady states of a metabolic network, 
(matrix and bounds), and it gives in output an approximate Lowner-John ellipsoid rounding the polytope
with the Lovasz method 
NB inputs are referred by defaults to the catabolic core of the E.Coli network iAF1260. 
For further details we refer to  PLoS ONE 10.4 e0122670 (2015).

sampleHRnew.cpp  
This c++ code file receives in input the polytope of the feasible steady states of a metabolic network, 
(matrix and bounds), the ellipsoid rounding the polytope, a point inside and  
it gives in output a max entropy sampling at fixed average growth rate 
of the steady states by performing an Hit-and-Run Monte Carlo Markov chain.
NB inputs are referred by defaults to the catabolic core of the E.Coli network iAF1260. 
For further details we refer to  PLoS ONE 10.4 e0122670 (2015).},
  author       = {De Martino, Daniele and Tkacik, Gasper},
  keywords     = {metabolic networks, e.coli core, maximum entropy, monte carlo markov chain sampling, ellipsoidal rounding},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Supporting materials "STATISTICAL MECHANICS FOR METABOLIC NETWORKS IN STEADY-STATE GROWTH"}},
  doi          = {10.15479/AT:ISTA:62},
  year         = {2018},
}

@article{607,
  abstract     = {We study the Fokker-Planck equation derived in the large system limit of the Markovian process describing the dynamics of quantitative traits. The Fokker-Planck equation is posed on a bounded domain and its transport and diffusion coefficients vanish on the domain's boundary. We first argue that, despite this degeneracy, the standard no-flux boundary condition is valid. We derive the weak formulation of the problem and prove the existence and uniqueness of its solutions by constructing the corresponding contraction semigroup on a suitable function space. Then, we prove that for the parameter regime with high enough mutation rate the problem exhibits a positive spectral gap, which implies exponential convergence to equilibrium.Next, we provide a simple derivation of the so-called Dynamic Maximum Entropy (DynMaxEnt) method for approximation of observables (moments) of the Fokker-Planck solution, which can be interpreted as a nonlinear Galerkin approximation. The limited applicability of the DynMaxEnt method inspires us to introduce its modified version that is valid for the whole range of admissible parameters. Finally, we present several numerical experiments to demonstrate the performance of both the original and modified DynMaxEnt methods. We observe that in the parameter regimes where both methods are valid, the modified one exhibits slightly better approximation properties compared to the original one.},
  author       = {Bodova, Katarina and Haskovec, Jan and Markowich, Peter},
  journal      = {Physica D: Nonlinear Phenomena},
  pages        = {108--120},
  publisher    = {Elsevier},
  title        = {{Well posedness and maximum entropy approximation for the dynamics of quantitative traits}},
  doi          = {10.1016/j.physd.2017.10.015},
  volume       = {376-377},
  year         = {2018},
}

@article{67,
  abstract     = {Gene regulatory networks evolve through rewiring of individual components—that is, through changes in regulatory connections. However, the mechanistic basis of regulatory rewiring is poorly understood. Using a canonical gene regulatory system, we quantify the properties of transcription factors that determine the evolutionary potential for rewiring of regulatory connections: robustness, tunability and evolvability. In vivo repression measurements of two repressors at mutated operator sites reveal their contrasting evolutionary potential: while robustness and evolvability were positively correlated, both were in trade-off with tunability. Epistatic interactions between adjacent operators alleviated this trade-off. A thermodynamic model explains how the differences in robustness, tunability and evolvability arise from biophysical characteristics of repressor–DNA binding. The model also uncovers that the energy matrix, which describes how mutations affect repressor–DNA binding, encodes crucial information about the evolutionary potential of a repressor. The biophysical determinants of evolutionary potential for regulatory rewiring constitute a mechanistic framework for understanding network evolution.},
  author       = {Igler, Claudia and Lagator, Mato and Tkacik, Gasper and Bollback, Jonathan P and Guet, Calin C},
  journal      = {Nature Ecology and Evolution},
  number       = {10},
  pages        = {1633 -- 1643},
  publisher    = {Nature Publishing Group},
  title        = {{Evolutionary potential of transcription factors for gene regulatory rewiring}},
  doi          = {10.1038/s41559-018-0651-y},
  volume       = {2},
  year         = {2018},
}

@misc{9813,
  abstract     = {File S1 contains figures that clarify the following features: (i) effect of population size on the average number/frequency of SI classes, (ii) changes in the minimal completeness deficit in time for a single class, and (iii) diversification diagrams for all studied pathways, including the summary figure for k = 8. File S2 contains the code required for a stochastic simulation of the SLF system with an example. This file also includes the output in the form of figures and tables.},
  author       = {Bod'ová, Katarína and Priklopil, Tadeas and Field, David and Barton, Nicholas H and Pickup, Melinda},
  publisher    = {Genetics Society of America},
  title        = {{Supplemental material for Bodova et al., 2018}},
  doi          = {10.25386/genetics.6148304.v1},
  year         = {2018},
}

@misc{9831,
  abstract     = {Implementation of the inference method in Matlab, including three applications of the method: The first one for the model of ant motion, the second one for bacterial chemotaxis, and the third one for the motion of fish.},
  author       = {Bod’Ová, Katarína and Mitchell, Gabriel and Harpaz, Roy and Schneidman, Elad and Tkačik, Gašper},
  publisher    = {Public Library of Science},
  title        = {{Implementation of the inference method in Matlab}},
  doi          = {10.1371/journal.pone.0193049.s001},
  year         = {2018},
}

@article{406,
  abstract     = {Recent developments in automated tracking allow uninterrupted, high-resolution recording of animal trajectories, sometimes coupled with the identification of stereotyped changes of body pose or other behaviors of interest. Analysis and interpretation of such data represents a challenge: the timing of animal behaviors may be stochastic and modulated by kinematic variables, by the interaction with the environment or with the conspecifics within the animal group, and dependent on internal cognitive or behavioral state of the individual. Existing models for collective motion typically fail to incorporate the discrete, stochastic, and internal-state-dependent aspects of behavior, while models focusing on individual animal behavior typically ignore the spatial aspects of the problem. Here we propose a probabilistic modeling framework to address this gap. Each animal can switch stochastically between different behavioral states, with each state resulting in a possibly different law of motion through space. Switching rates for behavioral transitions can depend in a very general way, which we seek to identify from data, on the effects of the environment as well as the interaction between the animals. We represent the switching dynamics as a Generalized Linear Model and show that: (i) forward simulation of multiple interacting animals is possible using a variant of the Gillespie’s Stochastic Simulation Algorithm; (ii) formulated properly, the maximum likelihood inference of switching rate functions is tractably solvable by gradient descent; (iii) model selection can be used to identify factors that modulate behavioral state switching and to appropriately adjust model complexity to data. To illustrate our framework, we apply it to two synthetic models of animal motion and to real zebrafish tracking data. },
  author       = {Bod’Ová, Katarína and Mitchell, Gabriel and Harpaz, Roy and Schneidman, Elad and Tkacik, Gasper},
  journal      = {PLoS One},
  number       = {3},
  publisher    = {Public Library of Science},
  title        = {{Probabilistic models of individual and collective animal behavior}},
  doi          = {10.1371/journal.pone.0193049},
  volume       = {13},
  year         = {2018},
}

@article{457,
  abstract     = {Temperate bacteriophages integrate in bacterial genomes as prophages and represent an important source of genetic variation for bacterial evolution, frequently transmitting fitness-augmenting genes such as toxins responsible for virulence of major pathogens. However, only a fraction of bacteriophage infections are lysogenic and lead to prophage acquisition, whereas the majority are lytic and kill the infected bacteria. Unless able to discriminate lytic from lysogenic infections, mechanisms of immunity to bacteriophages are expected to act as a double-edged sword and increase the odds of survival at the cost of depriving bacteria of potentially beneficial prophages. We show that although restriction-modification systems as mechanisms of innate immunity prevent both lytic and lysogenic infections indiscriminately in individual bacteria, they increase the number of prophage-acquiring individuals at the population level. We find that this counterintuitive result is a consequence of phage-host population dynamics, in which restriction-modification systems delay infection onset until bacteria reach densities at which the probability of lysogeny increases. These results underscore the importance of population-level dynamics as a key factor modulating costs and benefits of immunity to temperate bacteriophages},
  author       = {Pleska, Maros and Lang, Moritz and Refardt, Dominik and Levin, Bruce and Guet, Calin C},
  journal      = {Nature Ecology and Evolution},
  number       = {2},
  pages        = {359 -- 366},
  publisher    = {Springer Nature},
  title        = {{Phage-host population dynamics promotes prophage acquisition in bacteria with innate immunity}},
  doi          = {10.1038/s41559-017-0424-z},
  volume       = {2},
  year         = {2018},
}

@article{281,
  abstract     = {Although cells respond specifically to environments, how environmental identity is encoded intracellularly is not understood. Here, we study this organization of information in budding yeast by estimating the mutual information between environmental transitions and the dynamics of nuclear translocation for 10 transcription factors. Our method of estimation is general, scalable, and based on decoding from single cells. The dynamics of the transcription factors are necessary to encode the highest amounts of extracellular information, and we show that information is transduced through two channels: Generalists (Msn2/4, Tod6 and Dot6, Maf1, and Sfp1) can encode the nature of multiple stresses, but only if stress is high; specialists (Hog1, Yap1, and Mig1/2) encode one particular stress, but do so more quickly and for a wider range of magnitudes. In particular, Dot6 encodes almost as much information as Msn2, the master regulator of the environmental stress response. Each transcription factor reports differently, and it is only their collective behavior that distinguishes between multiple environmental states. Changes in the dynamics of the localization of transcription factors thus constitute a precise, distributed internal representation of extracellular change. We predict that such multidimensional representations are common in cellular decision-making.},
  author       = {Granados, Alejandro and Pietsch, Julian and Cepeda Humerez, Sarah A and Farquhar, Isebail and Tkacik, Gasper and Swain, Peter},
  journal      = {PNAS},
  number       = {23},
  pages        = {6088 -- 6093},
  publisher    = {National Academy of Sciences},
  title        = {{Distributed and dynamic intracellular organization of extracellular information}},
  doi          = {10.1073/pnas.1716659115},
  volume       = {115},
  year         = {2018},
}

@article{292,
  abstract     = {Retina is a paradigmatic system for studying sensory encoding: the transformation of light into spiking activity of ganglion cells. The inverse problem, where stimulus is reconstructed from spikes, has received less attention, especially for complex stimuli that should be reconstructed “pixel-by-pixel”. We recorded around a hundred neurons from a dense patch in a rat retina and decoded movies of multiple small randomly-moving discs. We constructed nonlinear (kernelized and neural network) decoders that improved significantly over linear results. An important contribution to this was the ability of nonlinear decoders to reliably separate between neural responses driven by locally fluctuating light signals, and responses at locally constant light driven by spontaneous-like activity. This improvement crucially depended on the precise, non-Poisson temporal structure of individual spike trains, which originated in the spike-history dependence of neural responses. We propose a general principle by which downstream circuitry could discriminate between spontaneous and stimulus-driven activity based solely on higher-order statistical structure in the incoming spike trains.},
  author       = {Botella Soler, Vicent and Deny, Stephane and Martius, Georg S and Marre, Olivier and Tkacik, Gasper},
  journal      = {PLoS Computational Biology},
  number       = {5},
  publisher    = {Public Library of Science},
  title        = {{Nonlinear decoding of a complex movie from the mammalian retina}},
  doi          = {10.1371/journal.pcbi.1006057},
  volume       = {14},
  year         = {2018},
}

@article{305,
  abstract     = {The hanging-drop network (HDN) is a technology platform based on a completely open microfluidic network at the bottom of an inverted, surface-patterned substrate. The platform is predominantly used for the formation, culturing, and interaction of self-assembled spherical microtissues (spheroids) under precisely controlled flow conditions. Here, we describe design, fabrication, and operation of microfluidic hanging-drop networks.},
  author       = {Misun, Patrick and Birchler, Axel and Lang, Moritz and Hierlemann, Andreas and Frey, Olivier},
  journal      = {Methods in Molecular Biology},
  pages        = {183 -- 202},
  publisher    = {Springer},
  title        = {{Fabrication and operation of microfluidic hanging drop networks}},
  doi          = {10.1007/978-1-4939-7792-5_15},
  volume       = {1771},
  year         = {2018},
}

@article{306,
  abstract     = {A cornerstone of statistical inference, the maximum entropy framework is being increasingly applied to construct descriptive and predictive models of biological systems, especially complex biological networks, from large experimental data sets. Both its broad applicability and the success it obtained in different contexts hinge upon its conceptual simplicity and mathematical soundness. Here we try to concisely review the basic elements of the maximum entropy principle, starting from the notion of ‘entropy’, and describe its usefulness for the analysis of biological systems. As examples, we focus specifically on the problem of reconstructing gene interaction networks from expression data and on recent work attempting to expand our system-level understanding of bacterial metabolism. Finally, we highlight some extensions and potential limitations of the maximum entropy approach, and point to more recent developments that are likely to play a key role in the upcoming challenges of extracting structures and information from increasingly rich, high-throughput biological data.},
  author       = {De Martino, Andrea and De Martino, Daniele},
  journal      = {Heliyon},
  number       = {4},
  publisher    = {Elsevier},
  title        = {{An introduction to the maximum entropy approach and its application to inference problems in biology}},
  doi          = {10.1016/j.heliyon.2018.e00596},
  volume       = {4},
  year         = {2018},
}

