


default search action
BibTeX record conf/usenix/GaoHSKJDYYZ24
@inproceedings{DBLP:conf/usenix/GaoHSKJDYYZ24, author = {Bin Gao and Zhuomin He and Puru Sharma and Qingxuan Kang and Djordje Jevdjic and Junbo Deng and Xingkun Yang and Zhou Yu and Pengfei Zuo}, editor = {Saurabh Bagchi and Yiying Zhang}, title = {Cost-Efficient Large Language Model Serving for Multi-turn Conversations with CachedAttention}, booktitle = {Proceedings of the 2024 {USENIX} Annual Technical Conference, {USENIX} {ATC} 2024, Santa Clara, CA, USA, July 10-12, 2024}, pages = {111--126}, publisher = {{USENIX} Association}, year = {2024}, url = {https://www.usenix.org/conference/atc24/presentation/gao-bin-cost}, timestamp = {Tue, 16 Jul 2024 22:11:07 +0200}, biburl = {https://dblp.org/rec/conf/usenix/GaoHSKJDYYZ24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }

manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.