{"created":"2025-01-19T01:43:31.826217+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00239726","sets":["934:989:11507:11752"]},"path":["11752"],"owner":"44499","recid":"239726","title":["リスク関数を用いたプライバシ保護安全強化学習"],"pubdate":{"attribute_name":"公開日","attribute_value":"2024-09-25"},"_buckets":{"deposit":"3ee9f14a-9959-4124-8d88-5ea1a8f9c0bb"},"_deposit":{"id":"239726","pid":{"type":"depid","value":"239726","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"リスク関数を用いたプライバシ保護安全強化学習","author_link":["657220","657218","657219","657221"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"リスク関数を用いたプライバシ保護安全強化学習"},{"subitem_title":"Privacy-preserving Safe Reinforcement Learning Using Risk Function","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"[オリジナル論文] プライバシ保護,強化学習,安全強化学習,リスク関数,Q学習,準同型暗号","subitem_subject_scheme":"Other"}]},"item_type_id":"3","publish_date":"2024-09-25","item_3_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"東京都立産業技術高等専門学校"},{"subitem_text_value":"広島工業大学"}]},"item_3_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Tokyo Metropolitan College of Industrial Technology","subitem_text_language":"en"},{"subitem_text_value":"Hiroshima Institute of Technology","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/239726/files/IPSJ-TOM1703002.pdf","label":"IPSJ-TOM1703002.pdf"},"date":[{"dateType":"Available","dateValue":"2026-09-25"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-TOM1703002.pdf","filesize":[{"value":"987.4 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"17"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"0a67b336-0f40-4362-b185-4319b2761032","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2024 by the Information Processing Society of Japan"}]},"item_3_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"遠藤, 拓斗"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"福永, 修一"}],"nameIdentifiers":[{}]}]},"item_3_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Takuto, Endo","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Shuichi, Fukunaga","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_3_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AA11464803","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_3_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7780","subitem_source_identifier_type":"ISSN"}]},"item_3_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"プライバシ保護強化学習は機密データを暗号化した状態で学習するモデルであり,個人を特定可能な医療データや,収集された金融情報などのプライバシを保ったまま処理できる.このアルゴリズムは特に様々な個人情報を扱う医療分野で重要となるが,従来のプライバシ保護強化学習は学習した方策が患者に副作用をもたらす可能性を考慮していなかった.そこで本研究は,プライバシ保護強化学習にリスク関数を導入し,患者のプライバシを保護しつつ安全な方策を学習するモデルを提案する.提案手法は準同型暗号と呼ばれる暗号化したデータの演算が可能な暗号方式を用いてプライバシ保護を実現する.さらに,糖尿病患者へのインスリン投与において,低血糖に陥ってしまった場合にペナルティを与えるリスク関数を使用する.最後に,糖尿病患者に対するシミュレーションを行った結果,提案手法で学習した方策はプライバシを保護しつつ,低血糖に陥るリスクを抑えられることが分かった.","subitem_description_type":"Other"}]},"item_3_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Privacy-preserving reinforcement learning is a model that learns a policy from several sources, including personally identifiable medical data and collected financial information, without leaking sensitive information. Although this algorithm is significant in healthcare, where personal information is handled, conventional privacy-preserving reinforcement learning does not consider the possible side effects to the patients due to the learned policy. In this study, we introduce a risk function into the privacy-preserving reinforcement learning and propose a model that learns a safe policy without leaking sensitive information. We implement the proposed method by homomorphic encryption, which performs arithmetic operations on ciphertexts to archive confidentiality. Furthermore, the proposed method uses a risk function that penalizes hypoglycemia for its high risk in blood glucose control. Finally, we simulate blood glucose control and show that the policy learned by the proposed method minimizes the risk of hypoglycemia while protecting privacy.","subitem_description_type":"Other"}]},"item_3_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"12","bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌数理モデル化と応用(TOM)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2024-09-25","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"3","bibliographicVolumeNumber":"17"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"id":239726,"updated":"2025-01-19T08:11:35.339881+00:00","links":{}}