{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00241628","sets":["1164:5159:11541:11870"]},"path":["11870"],"owner":"44499","recid":"241628","title":["Linear Effect of Neuron Activations in Transformer-based Language Models"],"pubdate":{"attribute_name":"公開日","attribute_value":"2024-12-05"},"_buckets":{"deposit":"a24829c5-c7a2-445f-88d7-7b63fe2d7bec"},"_deposit":{"id":"241628","pid":{"type":"depid","value":"241628","revision_id":0},"owners":[44499],"status":"published","created_by":44499},"item_title":"Linear Effect of Neuron Activations in Transformer-based Language Models","author_link":["665551","665548","665549","665547","665550","665546"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Linear Effect of Neuron Activations in Transformer-based Language Models"},{"subitem_title":"Linear Effect of Neuron Activations in Transformer-based Language Models","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"ポスターセッション","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2024-12-05","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"The University of Tokyo"},{"subitem_text_value":"The University of Tokyo"},{"subitem_text_value":"Institute of Industrial Science, The University of Tokyo"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"The University of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"The University of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"Institute of Industrial Science, The University of Tokyo","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/241628/files/IPSJ-SLP24154008.pdf","label":"IPSJ-SLP24154008.pdf"},"date":[{"dateType":"Available","dateValue":"2026-12-05"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-SLP24154008.pdf","filesize":[{"value":"1.1 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"22"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"bc7d788a-38c8-4d2e-9557-c676eb58bebb","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2024 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Xin, Zhao"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Zehui, Jiang"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Naoki, Yoshinaga"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Xin, Zhao","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Zehui, Jiang","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Naoki, Yoshinaga","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10442647","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-8663","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"Neurons in feed-forward layers of Transformers have shown the ability to store factual knowledge. However, previous analyses mostly focused on qualitative evaluation, leaving the numerical relationship between neuron activations and model outputs less understood. Our study conducts a quantitative analysis through neuron-wise intervention experiments using the knowledge probing dataset. Our findings first reveal that neurons exhibit linearity and polarity in producing output tokens probabilities, quantified by “neuron empirical gradients.” Empirical gradients provide a direct measure of neurons' importance in representing knowledge. However, neuron-wise intervention experiments are costly, making it impractical to obtain empirical gradients in large language models. To address this, we propose NeurGrad, an efficient method for measuring neuron empirical gradients. Our experimental results show that NeurGrad outperforms several baseline methods in both efficiency and accuracy.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Neurons in feed-forward layers of Transformers have shown the ability to store factual knowledge. However, previous analyses mostly focused on qualitative evaluation, leaving the numerical relationship between neuron activations and model outputs less understood. Our study conducts a quantitative analysis through neuron-wise intervention experiments using the knowledge probing dataset. Our findings first reveal that neurons exhibit linearity and polarity in producing output tokens probabilities, quantified by “neuron empirical gradients.” Empirical gradients provide a direct measure of neurons' importance in representing knowledge. However, neuron-wise intervention experiments are costly, making it impractical to obtain empirical gradients in large language models. To address this, we propose NeurGrad, an efficient method for measuring neuron empirical gradients. Our experimental results show that NeurGrad outperforms several baseline methods in both efficiency and accuracy.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"6","bibliographic_titles":[{"bibliographic_title":"研究報告音声言語情報処理(SLP)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2024-12-05","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"8","bibliographicVolumeNumber":"2024-SLP-154"}]},"relation_version_is_last":true,"weko_creator_id":"44499"},"id":241628,"updated":"2025-01-19T07:35:55.463434+00:00","links":{},"created":"2025-01-19T01:46:20.695259+00:00"}