@inproceedings{NeurIPS'25:OnlineRLHF,
    author = {Long-Fei Li and Yu-Yang Qian and Peng Zhao and Zhi-Hua Zhou},
    title = {Provably Efficient Online RLHF with One-Pass Reward Modeling},
    booktitle = {Advances in Neural Information Processing Systems 38 (NeurIPS)},
    year = {2025},
    pages = {to appear}
}