Essec\Faculty\Model\Contribution {#2233 ▼
#_index: "academ_contributions"
#_id: "14738"
#_source: array:26 [
"id" => "14738"
"slug" => "14738-pac-bayesian-offline-contextual-bandits-with-guarantees"
"yearMonth" => "2024-07"
"year" => "2024"
"title" => "PAC-Bayesian Offline Contextual Bandits With Guarantees"
"description" => "SAKHI, O., ALQUIER, P. et CHOPIN, N. (2024). PAC-Bayesian Offline Contextual Bandits With Guarantees. Dans: Closing Workshop of the ISBA Programme on Interpretable Inference via Principled BNP Approaches in Biomedical Research and Beyond. Singapore.
SAKHI, O., ALQUIER, P. et CHOPIN, N. (2024). PAC-Bayesian Offline Contextual Bandits With Guarantees
"
"authors" => array:3 [
0 => array:3 [
"name" => "ALQUIER Pierre"
"bid" => "B00809923"
"slug" => "alquier-pierre"
]
1 => array:1 [
"name" => "SAKHI Otmane"
]
2 => array:1 [
"name" => "CHOPIN Nicolas"
]
]
"ouvrage" => "Closing Workshop of the ISBA Programme on Interpretable Inference via Principled BNP Approaches in Biomedical Research and Beyond
Closing Workshop of the ISBA Programme on Interpretable Inference via Principled BNP Approaches in B
"
"keywords" => []
"updatedAt" => "2024-07-31 12:31:43"
"publicationUrl" => null
"publicationInfo" => array:3 [
"pages" => ""
"volume" => ""
"number" => ""
]
"type" => array:2 [
"fr" => "Invité dans une conférence académique (Keynote speaker)"
"en" => "Invited speaker at an academic conference"
]
"support_type" => array:2 [
"fr" => null
"en" => null
]
"countries" => array:2 [
"fr" => null
"en" => null
]
"abstract" => array:2 [
"fr" => ""
"en" => "This work introduces a new principled approach for off-policy learning in contextual bandits. Unlike previous work, our approach does not derive learning principles from intractable or loose bounds. We analyse the problem through the PAC-Bayesian lens, interpreting policies as mixtures of decision rules. This allows us to propose novel generalization bounds and provide tractable algorithms to optimize them. We prove that the derived bounds are tighter than their competitors, and can be optimized directly to confidently improve upon the logging policy offline. Our approach learns policies with guarantees, uses all available data and does not require tuning additional hyperparameters on held-out sets. We demonstrate through extensive experiments the effectiveness of our approach in providing performance guarantees in practical scenarios.
This work introduces a new principled approach for off-policy learning in contextual bandits. Unlike
"
]
"authors_fields" => array:2 [
"fr" => "Systèmes d'Information, Data Analytics et Opérations"
"en" => "Information Systems, Data Analytics and Operations"
]
"indexedAt" => "2025-04-02T12:21:45.000Z"
"docTitle" => "PAC-Bayesian Offline Contextual Bandits With Guarantees"
"docSurtitle" => "Invité dans une conférence académique (Keynote speaker)"
"authorNames" => "<a href="/cv/alquier-pierre">ALQUIER Pierre</a>, SAKHI Otmane, CHOPIN Nicolas"
"docDescription" => "<span class="document-property-authors">ALQUIER Pierre, SAKHI Otmane, CHOPIN Nicolas</span><br><span class="document-property-authors_fields">Systèmes d'Information, Data Analytics et Opérations</span> | <span class="document-property-year">2024</span>
<span class="document-property-authors">ALQUIER Pierre, SAKHI Otmane, CHOPIN Nicolas</span><br><span
"
"keywordList" => ""
"docPreview" => "<b>PAC-Bayesian Offline Contextual Bandits With Guarantees</b><br><span>2024-07 | Invité dans une conférence académique (Keynote speaker) </span>
<b>PAC-Bayesian Offline Contextual Bandits With Guarantees</b><br><span>2024-07 | Invité dans une co
"
"docType" => "research"
"publicationLink" => "<a href="#" target="_blank">PAC-Bayesian Offline Contextual Bandits With Guarantees</a>"
]
+lang: "fr"
+"_type": "_doc"
+"_score": 8.71197
+"parent": null
}