{"updated":"2025-01-23T01:18:00.601903+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00013278","sets":["581:729:731"]},"path":["731"],"owner":"1","recid":"13278","title":["日本語の情報量の上限の推定"],"pubdate":{"attribute_name":"公開日","attribute_value":"1997-11-15"},"_buckets":{"deposit":"a9c8268c-5416-48fe-bf24-61e5c2a5d5eb"},"_deposit":{"id":"13278","pid":{"type":"depid","value":"13278","revision_id":0},"owners":[1],"status":"published","created_by":1},"item_title":"日本語の情報量の上限の推定","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"日本語の情報量の上限の推定"},{"subitem_title":"An Estimate of an Upper Bound for the Entropy of Japanese","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"論文","subitem_subject_scheme":"Other"}]},"item_type_id":"2","publish_date":"1997-11-15","item_2_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"京都大学工学研究科電子通信工学専攻"},{"subitem_text_value":"京都大学工学研究科電子通信工学専攻"}]},"item_2_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Department of Electronics and Communication, Kyoto University","subitem_text_language":"en"},{"subitem_text_value":"Department of Electronics and Communication, Kyoto University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/13278/files/IPSJ-JNL3811011.pdf"},"date":[{"dateType":"Available","dateValue":"1999-11-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-JNL3811011.pdf","filesize":[{"value":"1.1 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"8"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"cd0ab94f-4fb8-4d31-87d5-5a5076255c9c","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 1997 by the Information Processing Society of Japan"}]},"item_2_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"森, 信介"},{"creatorName":"山地, 治"}],"nameIdentifiers":[{}]}]},"item_2_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Shinsuke, Mori","creatorNameLang":"en"},{"creatorName":"Osamu, Yamaji","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_2_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN00116647","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_2_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7764","subitem_source_identifier_type":"ISSN"}]},"item_2_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"本論文では,形態素単位のn?gramモデル(1〓n〓16)による日本語の情報量の上限の推定方法とその結果を示す.各n?gramモデルは,データスパースネスの問題に対応するため,低次のn?gramモデルとの補間を行ってある.補間係数は,最も有効であると考えられている削除補間法により求める.実験ではEDRコーパスの約9割からモデルのパラメータを推定し,残りの1割に対して情報量を計算した.その結果,n=16のときに1文字あたりの情報量は最小の4.30330ビットであった.また,学習コーパスの大きさとモデルの次数による情報量の変化を調べた結果,モデルの次数を上げることによる情報量の減少量は微小であるが,学習コーパスを大きくすることによる情報量の減少量はかなりあるということが分かった.さらに,パラメータ数とエントロピーの関係についても議論する.これは,実際の日本語処理にn?gramモデルを応用する際に,適切にnの値を選ぶ指標となる.","subitem_description_type":"Other"}]},"item_2_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"In this paper we present an estimate of an upper bound for the entropy of Japanese by morpheme n-gram model(1〓n〓16).Each n-gram model is interpolated with lower order n-gram models.The deleted interpolation method is applied for estimating interpolation coefficients.We estimated the parameters from 90% of the EDR corpus and calculated the entropy on the rest 10%.As the result,the minimum entropy was 4.30330[bit]a character with n=16.The relation between the size of learning corpus or the order of model and entropy showed that incresing the order decreases entropy slightly and increasing the size of learning corpus decreases it noteworthily.In addition,we discuss the relation between the number of parameters and entropy.This is usefull to select the value of n to apply n-gram model to the practical Japanese processing.","subitem_description_type":"Other"}]},"item_2_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"2199","bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌"}],"bibliographicPageStart":"2191","bibliographicIssueDates":{"bibliographicIssueDate":"1997-11-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"11","bibliographicVolumeNumber":"38"}]},"relation_version_is_last":true,"item_2_alternative_title_2":{"attribute_name":"その他タイトル","attribute_value_mlt":[{"subitem_alternative_title":"自然言語処理"}]},"weko_creator_id":"1"},"created":"2025-01-18T22:47:24.488412+00:00","id":13278,"links":{}}