{"created":"2025-01-19T01:44:41.997740+00:00","updated":"2025-01-19T07:57:54.932360+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00240484","sets":["1164:2036:11466:11785"]},"path":["11785"],"owner":"44499","recid":"240484","title":["ニューラルネットワークの低ビット量子化手法の検討"],"pubdate":{"attribute_name":"公開日","attribute_value":"2024-11-05"},"_buckets":{"deposit":"13832db6-2c5b-4bf2-a847-9f29d344016a"},"_deposit":{"id":"240484","pid":{"type":"depid","value":"240484","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"ニューラルネットワークの低ビット量子化手法の検討","author_link":["659778","659777","659776","659779"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"ニューラルネットワークの低ビット量子化手法の検討"},{"subitem_title":"Low-bit Quantization Methods for Neural Networks","subitem_title_language":"en"}]},"item_type_id":"4","publish_date":"2024-11-05","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"早稲田大学"},{"subitem_text_value":"早稲田大学"},{"subitem_text_value":"早稲田大学"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Waseda University","subitem_text_language":"en"},{"subitem_text_value":"Waseda University","subitem_text_language":"en"},{"subitem_text_value":"Waseda University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/240484/files/IPSJ-SLDM24207002.pdf","label":"IPSJ-SLDM24207002.pdf"},"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-SLDM24207002.pdf","filesize":[{"value":"1.8 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"10"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_login","version_id":"fe7ec586-1eb7-4467-bb1b-ccb73c24bcdd","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2024 by the Institute of Electronics, Information and Communication Engineers This SIG report is only available to those in membership of the SIG."}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"和田, 絵美"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"木村, 晋二"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Emi, Wada","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Shinji, Kimura","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AA11451459","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8639","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"ニューラルネットワークの軽量化はエッジ応用において重要な課題である．中でも DNN (Deep Neural Network) モデルの学習後の量子化 (Post Training Quantization, PTQ) は，軽量化のための簡単かつ有効な手法である．様々な PTQ 手法が検討される中で，対数量子化は 4 ビット以下の超低精度において高い性能を示している．そこで本稿では，更なる性能向上のため，重みと活性値の各々に異なる対数ベースの量子化器を組み合わせる手法を提案する．重みの量子化では，分布柔軟性の高いサブセット量子化 (Subset Quantization, SQ) を活用する．SQ は，ユニバーサルセットと呼ばれる 2 単語の加法で構成された対数値のセットから，データ分布に適した量子化点を探索する量子化器である．すべての可能な組み合わせを網羅的に探索するため，高精度な量子化点を得ることができる．一方で活性値の量子化では，計算効率の高い選択的 2 単語対数量子化 (Selective Two-word Log-scale Quantization, STLQ) を適用する．2 単語率を設定し，量子化誤差の大きい値を優先的に 2 単語で対数量子化することで，効率的に精度を向上させることができる．重みと活性値に異なる量子化手法を用いることで，精度の向上を達成している．大規模画像分類ベンチマークを用いた評価により，2～4 ビットの超低精度において，既存 PTQ 手法と比べて良い性能を達成した．","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Lightweighting neural networks are critical challenges for edge applications. Among various methods, Post Training Quantization (PTQ) is a simple and eﬀective approach for reducing the size of Deep Neural Network (DNN) models. While various PTQ techniques have been explored, logarithmic quantization methods show high performance at ultra-low precision of 4 bits or less. In this manuscipt, we propose a method to combine diﬀerent logarithmic-based quantizers to weights and activations. For weight quantization, Subset Quantization (SQ) is used, which oﬀers high ﬂexibility in handling distribution. SQ is a quantizer that searches for optimal quantization points suitable for the data distribution from a set of logarithmic values composed of two-word addends, called the universal set. Since all possible combinations are thoroughly searched, it is possible to obtain highly accurate quantization points. For activation quantization, we apply Selective Two-word Log-scale Quantization (STLQ), which is a more computationally eﬃcient method. By setting a two-word rate and prioritizing the logarithmic quantization of values with large quantization errors, STLQ eﬃciently improves accuracy. By combining diﬀerent quantization techniques for weights and activations, we achieve enhanced precision. Evaluation on large-scale image classiﬁcation benchmarks shows that our method achieves better performance than existing PTQ techniques at ultra-low precision levels of 2 to 4 bits.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"6","bibliographic_titles":[{"bibliographic_title":"研究報告システムとLSIの設計技術（SLDM）"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2024-11-05","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"2","bibliographicVolumeNumber":"2024-SLDM-207"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"id":240484,"links":{}}