Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                

BibTeX record conf/usenix/GaoHSKJDYYZ24

download as .bib file

@inproceedings{DBLP:conf/usenix/GaoHSKJDYYZ24,
  author       = {Bin Gao and
                  Zhuomin He and
                  Puru Sharma and
                  Qingxuan Kang and
                  Djordje Jevdjic and
                  Junbo Deng and
                  Xingkun Yang and
                  Zhou Yu and
                  Pengfei Zuo},
  editor       = {Saurabh Bagchi and
                  Yiying Zhang},
  title        = {Cost-Efficient Large Language Model Serving for Multi-turn Conversations
                  with CachedAttention},
  booktitle    = {Proceedings of the 2024 {USENIX} Annual Technical Conference, {USENIX}
                  {ATC} 2024, Santa Clara, CA, USA, July 10-12, 2024},
  pages        = {111--126},
  publisher    = {{USENIX} Association},
  year         = {2024},
  url          = {https://www.usenix.org/conference/atc24/presentation/gao-bin-cost},
  timestamp    = {Tue, 16 Jul 2024 22:11:07 +0200},
  biburl       = {https://dblp.org/rec/conf/usenix/GaoHSKJDYYZ24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}