@article{12511,
  abstract     = {We consider the problem of formally verifying almost-sure (a.s.) asymptotic stability in discrete-time nonlinear stochastic control systems. While verifying stability in deterministic control systems is extensively studied in the literature, verifying stability in stochastic control systems is an open problem. The few existing works on this topic either consider only specialized forms of stochasticity or make restrictive assumptions on the system, rendering them inapplicable to learning algorithms with neural network policies. 
 In this work, we present an approach for general nonlinear stochastic control problems with two novel aspects: (a) instead of classical stochastic extensions of Lyapunov functions, we use ranking supermartingales (RSMs) to certify a.s. asymptotic stability, and (b) we present a method for learning neural network RSMs. 
 We prove that our approach guarantees a.s. asymptotic stability of the system and
 provides the first method to obtain bounds on the stabilization time, which stochastic Lyapunov functions do not.
 Finally, we validate our approach experimentally on a set of nonlinear stochastic reinforcement learning environments with neural network policies.},
  author       = {Lechner, Mathias and Zikelic, Dorde and Chatterjee, Krishnendu and Henzinger, Thomas A},
  isbn         = {9781577358350},
  issn         = {2374-3468},
  journal      = {Proceedings of the AAAI Conference on Artificial Intelligence},
  keywords     = {General Medicine},
  number       = {7},
  pages        = {7326--7336},
  publisher    = {Association for the Advancement of Artificial Intelligence},
  title        = {{Stability verification in stochastic control systems via neural network supermartingales}},
  doi          = {10.1609/aaai.v36i7.20695},
  volume       = {36},
  year         = {2022},
}

@inproceedings{12516,
  abstract     = {The homogeneous continuous LWE (hCLWE) problem is to distinguish samples of a specific high-dimensional Gaussian mixture from standard normal samples. It was shown to be at least as hard as Learning with Errors, but no reduction in the other direction is currently known.
We present four new public-key encryption schemes based on the hardness of hCLWE, with varying tradeoffs between decryption and security errors, and different discretization techniques. Our schemes yield a polynomial-time algorithm for solving hCLWE using a Statistical Zero-Knowledge oracle.},
  author       = {Bogdanov, Andrej and Cueto Noval, Miguel and Hoffmann, Charlotte and Rosen, Alon},
  booktitle    = {Theory of Cryptography},
  isbn         = {9783031223648},
  issn         = {1611-3349},
  location     = {Chicago, IL, United States},
  pages        = {565--592},
  publisher    = {Springer Nature},
  title        = {{Public-Key Encryption from Homogeneous CLWE}},
  doi          = {10.1007/978-3-031-22365-5_20},
  volume       = {13748},
  year         = {2022},
}

@misc{12522,
  abstract     = {This .zip File contains the transport data, the codes for the data analysis, the microscopy analysis and the codes for the theoretical simulations for "Majorana-like Coulomb spectroscopy in the absence of zero bias peaks" by M. Valentini, et. al. The transport data are saved with hdf5 file format. The files can be open with the log browser of Labber.},
  author       = {Valentini, Marco and San-Jose, Pablo and Arbiol, Jordi and Marti-Sanchez, Sara and Botifoll, Marc},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Data for "Majorana-like Coulomb spectroscopy in the absence of zero bias peaks"}},
  doi          = {10.15479/AT:ISTA:12102},
  year         = {2022},
}

@unpublished{12536,
  abstract     = {We consider the problem of estimating a rank-1 signal corrupted by structured rotationally invariant noise, and address the following question: how well do inference algorithms perform when the noise statistics is unknown and hence Gaussian noise is assumed? While the matched Bayes-optimal setting with unstructured noise is well understood, the analysis of this mismatched problem is only at its premises. In this paper, we make a step towards understanding the effect of the strong source of mismatch which is the noise statistics. Our main technical contribution is the rigorous analysis of a Bayes estimator and of an approximate message passing (AMP) algorithm, both of which incorrectly assume a Gaussian setup. The first result exploits the theory of spherical integrals and of low-rank matrix perturbations; the idea behind the second one is to design and analyze an artificial AMP which, by taking advantage of the flexibility in the denoisers, is able to "correct" the mismatch. Armed with these sharp asymptotic characterizations, we unveil a rich and often unexpected phenomenology. For example, despite AMP is in principle designed to efficiently compute the Bayes estimator, the former is outperformed by the latter in terms of mean-square error. We show that this performance gap is due to an incorrect estimation of the signal norm. In fact, when the SNR is large enough, the overlaps of the AMP and the Bayes estimator coincide, and they even match those of optimal estimators taking into account the structure of the noise.},
  author       = {Barbier, Jean and Hou, TianQi and Mondelli, Marco and Saenz, Manuel},
  booktitle    = {arXiv},
  title        = {{The price of ignorance: How much does it cost to forget noise structure in low-rank matrix estimation?}},
  doi          = {10.48550/arXiv.2205.10009},
  year         = {2022},
}

@inproceedings{12537,
  abstract     = {The Neural Tangent Kernel (NTK) has emerged as a powerful tool to provide memorization, optimization and generalization guarantees in deep neural networks. A line of work has studied the NTK spectrum for two-layer and deep networks with at least a layer with Ω(N) neurons, N being the number of training samples. Furthermore, there is increasing evidence suggesting that deep networks with sub-linear layer widths are powerful memorizers and optimizers, as long as the number of parameters exceeds the number of samples. Thus, a natural open question is whether the NTK is well conditioned in such a challenging sub-linear setup. In this paper, we answer this question in the affirmative. Our key technical contribution is a lower bound on the smallest NTK eigenvalue for deep networks with the minimum possible over-parameterization: the number of parameters is roughly Ω(N) and, hence, the number of neurons is as little as Ω(N−−√). To showcase the applicability of our NTK bounds, we provide two results concerning memorization capacity and optimization guarantees for gradient descent training.},
  author       = {Bombari, Simone and Amani, Mohammad Hossein and Mondelli, Marco},
  booktitle    = {36th Conference on Neural Information Processing Systems},
  isbn         = {9781713871088},
  pages        = {7628--7640},
  publisher    = {Curran Associates},
  title        = {{Memorization and optimization in deep neural networks with minimum over-parameterization}},
  volume       = {35},
  year         = {2022},
}

@article{12538,
  abstract     = {In this paper, we study the compression of a target two-layer neural network with N nodes into a compressed network with M<N nodes. More precisely, we consider the setting in which the weights of the target network are i.i.d. sub-Gaussian, and we minimize the population L_2 loss between the outputs of the target and of the compressed network, under the assumption of Gaussian inputs. By using tools from high-dimensional probability, we show that this non-convex problem can be simplified when the target network is sufficiently over-parameterized, and provide the error rate of this approximation as a function of the input dimension and N. In this mean-field limit, the simplified objective, as well as the optimal weights of the compressed network, does not depend on the realization of the target network, but only on expected scaling factors. Furthermore, for networks with ReLU activation, we conjecture that the optimum of the simplified optimization problem is achieved by taking weights on the Equiangular Tight Frame (ETF), while the scaling of the weights and the orientation of the ETF depend on the parameters of the target network. Numerical evidence is provided to support this conjecture.},
  author       = {Amani, Mohammad Hossein and Bombari, Simone and Mondelli, Marco and Pukdee, Rattana and Rini, Stefano},
  isbn         = {9781665483414},
  journal      = {IEEE Information Theory Workshop},
  location     = {Mumbai, India},
  pages        = {588--593},
  publisher    = {IEEE},
  title        = {{Sharp asymptotics on the compression of two-layer neural networks}},
  doi          = {10.1109/ITW54588.2022.9965870},
  year         = {2022},
}

@inproceedings{12540,
  abstract     = {We consider the problem of signal estimation in generalized linear models defined via rotationally invariant design matrices. Since these matrices can have an arbitrary spectral distribution, this model is well suited for capturing complex correlation structures which often arise in applications. We propose a novel family of approximate message passing (AMP) algorithms for signal estimation, and rigorously characterize their performance in the high-dimensional limit via a state evolution recursion. Our rotationally invariant AMP has complexity of the same order as the existing AMP derived under the restrictive assumption of a Gaussian design; our algorithm also recovers this existing AMP as a special case. Numerical results showcase a performance close to Vector AMP (which is conjectured to be Bayes-optimal in some settings), but obtained with a much lower complexity, as the proposed algorithm does not require a computationally expensive singular value decomposition.},
  author       = {Venkataramanan, Ramji and Kögler, Kevin and Mondelli, Marco},
  booktitle    = {Proceedings of the 39th International Conference on Machine Learning},
  location     = {Baltimore, MD, United States},
  publisher    = {ML Research Press},
  title        = {{Estimation in rotationally invariant generalized linear models via approximate message passing}},
  volume       = {162},
  year         = {2022},
}

@inproceedings{12568,
  abstract     = {We treat the problem of risk-aware control for stochastic shortest path (SSP) on Markov decision processes (MDP). Typically, expectation is considered for SSP, which however is oblivious to the incurred risk. We present an alternative view, instead optimizing conditional value-at-risk (CVaR), an established risk measure. We treat both Markov chains as well as MDP and introduce, through novel insights, two algorithms, based on linear programming and value iteration, respectively. Both algorithms offer precise and provably correct solutions. Evaluation of our prototype implementation shows that risk-aware control is feasible on several moderately sized models.},
  author       = {Meggendorfer, Tobias},
  booktitle    = {Proceedings of the 36th AAAI Conference on Artificial Intelligence, AAAI 2022},
  isbn         = {1577358767},
  issn         = {2374-3468},
  location     = {Virtual},
  number       = {9},
  pages        = {9858--9867},
  publisher    = {Association for the Advancement of Artificial Intelligence},
  title        = {{Risk-aware stochastic shortest path}},
  doi          = {10.1609/aaai.v36i9.21222},
  volume       = {36},
  year         = {2022},
}

@unpublished{12660,
  abstract     = {We present Cross-Client Label Propagation(XCLP), a new method for transductive federated learning. XCLP estimates a data graph jointly from the data of multiple clients and computes labels for the unlabeled data by propagating label information across the graph. To avoid clients having to share their data with anyone, XCLP employs two cryptographically secure protocols: secure Hamming distance computation and secure summation. We demonstrate two distinct applications of XCLP within federated learning. In the first, we use it in a one-shot way to predict labels for unseen test points. In the second, we use it to repeatedly pseudo-label unlabeled training data in a federated semi-supervised setting. Experiments on both real federated and standard benchmark datasets show that in both applications XCLP achieves higher classification accuracy than alternative approaches.},
  author       = {Scott, Jonathan A and Yeo, Michelle X and Lampert, Christoph},
  booktitle    = {arXiv},
  title        = {{Cross-client Label Propagation for transductive federated learning}},
  doi          = {10.48550/arXiv.2210.06434},
  year         = {2022},
}

@unpublished{12662,
  abstract     = {Modern machine learning tasks often require considering not just one but multiple objectives. For example, besides the prediction quality, this could be the efficiency, robustness or fairness of the learned models, or any of their combinations. Multi-objective learning offers a natural framework for handling such problems without having to commit to early trade-offs. Surprisingly, statistical learning theory so far offers almost no insight into the generalization properties of multi-objective learning. In this work, we make first steps to fill this gap: we establish foundational generalization bounds for the multi-objective setting as well as generalization and excess bounds for learning with scalarizations. We also provide the first theoretical analysis of the relation between the Pareto-optimal sets of the true objectives and the Pareto-optimal sets of their empirical approximations from training data. In particular, we show a surprising asymmetry: all Pareto-optimal solutions can be approximated by empirically Pareto-optimal ones, but not vice versa.},
  author       = {Súkeník, Peter and Lampert, Christoph},
  booktitle    = {arXiv},
  title        = {{Generalization in Multi-objective machine learning}},
  doi          = {10.48550/arXiv.2208.13499},
  year         = {2022},
}

@article{12670,
  abstract     = {DNA methylation plays essential homeostatic functions in eukaryotic genomes. In animals, DNA methylation is also developmentally regulated and, in turn, regulates development. In the past two decades, huge research effort has endorsed the understanding that DNA methylation plays a similar role in plant development, especially during sexual reproduction. The power of whole-genome sequencing and cell isolation techniques, as well as bioinformatics tools, have enabled recent studies to reveal dynamic changes in DNA methylation during germline development. Furthermore, the combination of these technological advances with genetics, developmental biology and cell biology tools has revealed functional methylation reprogramming events that control gene and transposon activities in flowering plant germlines. In this review, we discuss the major advances in our knowledge of DNA methylation dynamics during male and female germline development in flowering plants.},
  author       = {He, Shengbo and Feng, Xiaoqi},
  issn         = {1744-7909},
  journal      = {Journal of Integrative Plant Biology},
  keywords     = {Plant Science, General Biochemistry, Genetics and Molecular Biology, Biochemistry},
  number       = {12},
  pages        = {2240--2251},
  publisher    = {Wiley},
  title        = {{DNA methylation dynamics during germline development}},
  doi          = {10.1111/jipb.13422},
  volume       = {64},
  year         = {2022},
}

@article{12671,
  abstract     = {Sperm chromatin is typically transformed by protamines into a compact and transcriptionally inactive state1,2. Sperm cells of flowering plants lack protamines, yet they have small, transcriptionally active nuclei with chromatin condensed through an unknown mechanism3,4. Here we show that a histone variant, H2B.8, mediates sperm chromatin and nuclear condensation in Arabidopsis thaliana. Loss of H2B.8 causes enlarged sperm nuclei with dispersed chromatin, whereas ectopic expression in somatic cells produces smaller nuclei with aggregated chromatin. This result demonstrates that H2B.8 is sufficient for chromatin condensation. H2B.8 aggregates transcriptionally inactive AT-rich chromatin into phase-separated condensates, which facilitates nuclear compaction without reducing transcription. Reciprocal crosses show that mutation of h2b.8 reduces male transmission, which suggests that H2B.8-mediated sperm compaction is important for fertility. Altogether, our results reveal a new mechanism of nuclear compaction through global aggregation of unexpressed chromatin. We propose that H2B.8 is an evolutionary innovation of flowering plants that achieves nuclear condensation compatible with active transcription.},
  author       = {Buttress, Toby and He, Shengbo and Wang, Liang and Zhou, Shaoli and Saalbach, Gerhard and Vickers, Martin and Li, Guohong and Li, Pilong and Feng, Xiaoqi},
  issn         = {1476-4687},
  journal      = {Nature},
  number       = {7936},
  pages        = {614--622},
  publisher    = {Springer Nature},
  title        = {{Histone H2B.8 compacts flowering plant sperm through chromatin phase separation}},
  doi          = {10.1038/s41586-022-05386-6},
  volume       = {611},
  year         = {2022},
}

@unpublished{12677,
  abstract     = {In modern sample-driven Prophet Inequality, an adversary chooses a sequence of n items with values v1,v2,…,vn to be presented to a decision maker (DM). The process follows in two phases. In the first phase (sampling phase), some items, possibly selected at random, are revealed to the DM, but she can never accept them. In the second phase, the DM is presented with the other items in a random order and online fashion. For each item, she must make an irrevocable decision to either accept the item and stop the process or reject the item forever and proceed to the next item. The goal of the DM is to maximize the expected value as compared to a Prophet (or offline algorithm) that has access to all information. In this setting, the sampling phase has no cost and is not part of the optimization process. However, in many scenarios, the samples are obtained as part of the decision-making process.
We model this aspect as a two-phase Prophet Inequality where an adversary chooses a sequence of 2n items with values v1,v2,…,v2n and the items are randomly ordered. Finally, there are two phases of the Prophet Inequality problem with the first n-items and the rest of the items, respectively. We show that some basic algorithms achieve a ratio of at most 0.450. We present an algorithm that achieves a ratio of at least 0.495. Finally, we show that for every algorithm the ratio it can achieve is at most 0.502. Hence our algorithm is near-optimal.},
  author       = {Chatterjee, Krishnendu and Mohammadi, Mona and Saona Urmeneta, Raimundo J},
  booktitle    = {arXiv},
  title        = {{Repeated prophet inequality with near-optimal bounds}},
  doi          = {10.48550/ARXIV.2209.14368},
  year         = {2022},
}

@article{12684,
  abstract     = {Given a place  ω  of a global function field  K  over a finite field, with associated affine function ring  Rω  and completion  Kω , the aim of this paper is to give an effective joint equidistribution result for renormalized primitive lattice points  (a,b)∈Rω2  in the plane  Kω2 , and for renormalized solutions to the gcd equation  ax+by=1 . The main tools are techniques of Goronik and Nevo for counting lattice points in well-rounded families of subsets. This gives a sharper analog in positive characteristic of a result of Nevo and the first author for the equidistribution of the primitive lattice points in  \ZZ2 .},
  author       = {Horesh, Tal and Paulin, Frédéric},
  issn         = {2118-8572},
  journal      = {Journal de Theorie des Nombres de Bordeaux},
  number       = {3},
  pages        = {679--703},
  publisher    = {Centre Mersenne},
  title        = {{Effective equidistribution of lattice points in positive characteristic}},
  doi          = {10.5802/JTNB.1222},
  volume       = {34},
  year         = {2022},
}

@unpublished{12750,
  abstract     = {Quantum kinetically constrained models have recently attracted significant attention due to their anomalous dynamics and thermalization. In this work, we introduce a hitherto unexplored family of kinetically constrained models featuring a conserved particle number and strong inversion-symmetry breaking due to facilitated hopping. We demonstrate that these models provide a generic example of so-called quantum Hilbert space fragmentation, that is manifested in disconnected sectors in the Hilbert space that are not apparent in the computational basis. Quantum Hilbert space fragmentation leads to an exponential in system size number of eigenstates with exactly zero entanglement entropy across several bipartite cuts. These eigenstates can be probed dynamically using quenches from simple initial product states. In addition, we study the particle spreading under unitary dynamics launched from the domain wall state, and find faster than diffusive dynamics at high particle densities, that crosses over into logarithmically slow relaxation at smaller densities. Using a classically simulable cellular automaton, we reproduce the logarithmic dynamics observed in the quantum case. Our work suggests that particle conserving constrained models with inversion symmetry breaking realize so far unexplored universality classes of dynamics and invite their further theoretical and experimental studies.},
  author       = {Brighi, Pietro and Ljubotina, Marko and Serbyn, Maksym},
  booktitle    = {arXiv},
  title        = {{Hilbert space fragmentation and slow dynamics in particle-conserving quantum East models}},
  doi          = {10.48550/arXiv.2210.15607},
  year         = {2022},
}

@inproceedings{12775,
  abstract     = {We consider the problem of approximating the reachability probabilities in Markov decision processes (MDP) with uncountable (continuous) state and action spaces. While there are algorithms that, for special classes of such MDP, provide a sequence of approximations converging to the true value in the limit, our aim is to obtain an algorithm with guarantees on the precision of the approximation.
As this problem is undecidable in general, assumptions on the MDP are necessary. Our main contribution is to identify sufficient assumptions that are as weak as possible, thus approaching the "boundary" of which systems can be correctly and reliably analyzed. To this end, we also argue why each of our assumptions is necessary for algorithms based on processing finitely many observations.
We present two solution variants. The first one provides converging lower bounds under weaker assumptions than typical ones from previous works concerned with guarantees. The second one then utilizes stronger assumptions to additionally provide converging upper bounds. Altogether, we obtain an anytime algorithm, i.e. yielding a sequence of approximants with known and iteratively improving precision, converging to the true value in the limit. Besides, due to the generality of our assumptions, our algorithms are very general templates, readily allowing for various heuristics from literature in contrast to, e.g., a specific discretization algorithm. Our theoretical contribution thus paves the way for future practical improvements without sacrificing correctness guarantees.},
  author       = {Grover, Kush and Kretinsky, Jan and Meggendorfer, Tobias and Weininger, Maimilian},
  booktitle    = {33rd International Conference on Concurrency Theory },
  issn         = {1868-8969},
  location     = {Warsaw, Poland},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{Anytime guarantees for reachability in uncountable Markov decision processes}},
  doi          = {10.4230/LIPIcs.CONCUR.2022.11},
  volume       = {243},
  year         = {2022},
}

@article{12776,
  abstract     = {An improved asymptotic formula is established for the number of rational points of bounded height on the split smooth del Pezzo surface of degree 5. The proof uses the five conic bundle structures on the surface.},
  author       = {Browning, Timothy D},
  issn         = {1076-9803},
  journal      = {New York Journal of Mathematics},
  pages        = {1193 -- 1229},
  publisher    = {State University of New York},
  title        = {{Revisiting the Manin–Peyre conjecture for the split del Pezzo surface of degree 5}},
  volume       = {28},
  year         = {2022},
}

@inproceedings{12780,
  abstract     = {The ability to scale out training workloads has been one of the key performance enablers of deep learning. The main scaling approach is data-parallel GPU-based training, which has been boosted by hardware and software support for highly efficient point-to-point communication, and in particular via hardware bandwidth over-provisioning. Overprovisioning comes at a cost: there is an order of magnitude price difference between "cloud-grade" servers with such support, relative to their popular "consumer-grade" counterparts, although single server-grade and consumer-grade GPUs can have similar computational envelopes.

In this paper, we show that the costly hardware overprovisioning approach can be supplanted via algorithmic and system design, and propose a framework called CGX, which provides efficient software support for compressed communication in ML applications, for both multi-GPU single-node training, as well as larger-scale multi-node training. CGX is based on two technical advances: At the system level, it relies on a re-developed communication stack for ML frameworks, which provides flexible, highly-efficient support for compressed communication. At the application level, it provides seamless, parameter-free integration with popular frameworks, so that end-users do not have to modify training recipes, nor significant training code. This is complemented by a layer-wise adaptive compression technique which dynamically balances compression gains with accuracy preservation. CGX integrates with popular ML frameworks, providing up to 3X speedups for multi-GPU nodes based on commodity hardware, and order-of-magnitude improvements in the multi-node setting, with negligible impact on accuracy.},
  author       = {Markov, Ilia and Ramezanikebrya, Hamidreza and Alistarh, Dan-Adrian},
  booktitle    = {Proceedings of the 23rd ACM/IFIP International Middleware Conference},
  isbn         = {9781450393409},
  location     = {Quebec, QC, Canada},
  pages        = {241--254},
  publisher    = {Association for Computing Machinery},
  title        = {{CGX: Adaptive system support for communication-efficient deep learning}},
  doi          = {10.1145/3528535.3565248},
  year         = {2022},
}

@article{12793,
  abstract     = {Let F be a global function field with constant field Fq. Let G be a reductive group over Fq. We establish a variant of Arthur's truncated kernel for G and for its Lie algebra which generalizes Arthur's original construction. We establish a coarse geometric expansion for our variant truncation.
As applications, we consider some existence and uniqueness problems of some cuspidal automorphic representations for the functions field of the projective line P1Fq with two points of ramifications.},
  author       = {Yu, Hongjie},
  issn         = {1945-5844},
  journal      = {Pacific Journal of Mathematics},
  keywords     = {Arthur–Selberg trace formula, cuspidal automorphic representations, global function fields},
  number       = {1},
  pages        = {193--237},
  publisher    = {Mathematical Sciences Publishers},
  title        = {{ A coarse geometric expansion of a variant of Arthur's truncated traces and some applications}},
  doi          = {10.2140/pjm.2022.321.193},
  volume       = {321},
  year         = {2022},
}

@unpublished{12860,
  abstract     = {Memorization of the relation between entities in a dataset can lead to privacy issues when using a trained model for question answering. We introduce Relational Memorization (RM) to understand, quantify and control this phenomenon. While bounding general memorization can have detrimental effects on the performance of a trained model, bounding RM does not prevent effective learning. The difference is most pronounced when the data distribution is long-tailed, with many queries having only few training examples: Impeding general memorization prevents effective learning, while impeding only relational memorization still allows learning general properties of the underlying concepts. We formalize the notion of Relational Privacy (RP) and, inspired by Differential Privacy (DP), we provide a possible definition of Differential Relational Privacy (DrP). These notions can be used to describe and compute bounds on the amount of RM in a trained model. We illustrate Relational Privacy concepts in experiments with large-scale models for Question Answering.},
  author       = {Bombari, Simone and Achille, Alessandro and Wang, Zijian and Wang, Yu-Xiang and Xie, Yusheng and Singh, Kunwar Yashraj and Appalaraju, Srikar and Mahadevan, Vijay and Soatto, Stefano},
  booktitle    = {arXiv},
  title        = {{Towards differential relational privacy and its use in question answering}},
  doi          = {10.48550/arXiv.2203.16701},
  year         = {2022},
}