@article{11849,
  abstract     = {This paper describes the DIGlTAL Continuous Profiling Infrastmcture, a sampling-based profiling system designed to run continuously on production systems. The system supports multiprocessors, works on unmodified executable& and collects profiles for entire systems, including user programs, shared libraries, and the operating system kernel. Samples are collected at a high rate (over 5200 samples/secper333-MHz processor), yet with low overhead (l-3% slowdown for most workloads). Analysis tools supplied with the profiling system use the sample data to produce an accurate accounting, down to the level of pipeline stalls incurred by individual instructions, of where time is being spent. When instructions incur stalls, the tools identify possible reasons, such as cache misses, branch mispredictions, and functional unit contention. The fine-grained instruction-level analysis guides users and automated optimizers to the causes of performance
problems and provides important insights for fixing them. },
  author       = {Anderson, Jennifer M. and Berc, Lance M. and Dean, Jeffrey and Ghemawat, Sanjay and Henzinger, Monika H and Leung, Shun-Tak A. and Sites, Richard L. and Vandevoorde, Mark T. and Waldspurger, Carl A. and Weihl, William E.},
  issn         = {0163-5980},
  journal      = {ACM SIGOPS Operating Systems Review},
  number       = {5},
  pages        = {1--14},
  publisher    = {Association for Computing Machinery},
  title        = {{Continuous profiling: Where have all the cycles gone?}},
  doi          = {10.1145/269005.266637},
  volume       = {31},
  year         = {1997},
}

