@techreport{oai:ipsj.ixsq.nii.ac.jp:02003108,
 author = {三宅,悠介 and 栗林,健太郎},
 issue = {5},
 month = {Jul},
 note = {不確実性の高い課題領域においては，その領域の目的に対する手段の有用性は，実際の行動を通じてのみ明らかになる．こうした状況で多くの候補から有効な手段を効率的に見極めるために，多腕バンディット問題として定式化する手法が用いられてきた．従来の多腕バンディットでは，探索の効率を高める実用的な簡略化として，固定された目的を前提とした定式化が行われてきた．しかし実際の課題では，目的自体も流動的であり，検討の過程で見直されることも少なくない．目的と手段は相互に依存する関係にあり，検討すべき組み合わせは多岐にわたるうえ，その対応関係も単純には捉えられない．このような制約のもとでも効率的に目的と手段の有用性を見極めるためには，行動による評価結果を他の目的にも横断的に活かす知識の伝達に加え，許容できないリスクを伴う組み合わせを適切に回避する仕組みが求められる．本報告では，目的と手段の双方を探索対象とした，不確実性下における意思決定の枠組みを提案する．具体的には，両者の特徴量空間を統合した空間上で，ガウス過程モデルに基づく連続腕バンディットにより有用な組み合わせを効率的に探索し，推定の不確実性に基づくリスク制御を組み込むことで，実行可能性を高める．評価では，高次元空間における最適化問題を対象とし，既存手法との比較を通じて，探索精度と計算効率の両立を確認した．その結果，提案手法が高次元設定にも適用可能であることが示唆された．, Only actions reveal how effective a means is for achieving a goal in uncertain domains. Prior work has modeled such problems using multi-armed bandits, often assuming a fixed goal to simplify exploration. In practice, goals may shift, and their relation to means is complex and interdependent. Effective decision-making requires models that transfer knowledge across goals and avoid risky combinations. This paper proposes a framework that jointly explores goals and means under uncertainty. It embeds both into a shared feature space and applies a continuous-armed bandit with a Gaussian process to identify promising pairs. The model incorporates risk control based on predictive uncertainty. Experiments on high-dimensional optimization tasks compare the proposed method with standard approaches. Results suggest that it balances accuracy and efficiency and scales to high-dimensional settings.},
 title = {不確実性下における目的と手段の統合的探索に向けた連続腕バンディットの応用},
 year = {2025}
}