{"updated":"2025-01-23T01:24:16.755605+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00013000","sets":["581:716:722"]},"path":["722"],"owner":"1","recid":"13000","title":["nグラム統計によるコーパスからの未知語抽出"],"pubdate":{"attribute_name":"公開日","attribute_value":"1998-07-15"},"_buckets":{"deposit":"a01e74df-2eaf-4de5-93d0-2fc39ecc390f"},"_deposit":{"id":"13000","pid":{"type":"depid","value":"13000","revision_id":0},"owners":[1],"status":"published","created_by":1},"item_title":"nグラム統計によるコーパスからの未知語抽出","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"nグラム統計によるコーパスからの未知語抽出"},{"subitem_title":"Unknown Word Extraction from Corpora Using n - gram Statistics","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"論文","subitem_subject_scheme":"Other"}]},"item_type_id":"2","publish_date":"1998-07-15","item_2_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"日本アイ・ビー・エム株式会社東京基礎研究所"},{"subitem_text_value":"京都大学"}]},"item_2_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Tokyo Research Laboratory, IBM Research","subitem_text_language":"en"},{"subitem_text_value":"Kyoto University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/13000/files/IPSJ-JNL3907007.pdf"},"date":[{"dateType":"Available","dateValue":"2000-07-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-JNL3907007.pdf","filesize":[{"value":"937.7 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"8"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"fd71b889-3ef9-475a-9047-5443ef00f7cc","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 1998 by the Information Processing Society of Japan"}]},"item_2_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"森, 信介"},{"creatorName":"長尾, 眞"}],"nameIdentifiers":[{}]}]},"item_2_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Shinsuke, Mori","creatorNameLang":"en"},{"creatorName":"Makoto, Nagao","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_2_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN00116647","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_2_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7764","subitem_source_identifier_type":"ISSN"}]},"item_2_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"自然言語処理において，辞書は単語の文法的機能や意味の情報源として必要不可欠であり，辞書に登録されていない単語を減少させるため，辞書の語彙を増強する努力がなされている．新語や専門用語は絶えず増え続けているため，辞書作成の作業は多大な労力を要するのみならず，各解析段階での未知語との遭遇は避けらず，大きな問題の1つとなっている．この問題を解決するため，本論文では，nグラム統計を用いて，コーパスからの単語の抽出とその単語が属する品詞の推定を同時に行う方法を提案する．この方法は，同一品詞に属する単語の前後に位置する文字列の分布は類似するという仮定に基づく．実験の結果，本手法が未知語の品詞推定や辞書構築に有効であることが確認された．","subitem_description_type":"Other"}]},"item_2_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Dictionaries are indispensable for NLP as a source of information of grammatical functions or meanings of words.Much endeavor is being made to reinforce their vocabulary.Given continuous increase of new words or technical terms,building a dictionary takes vast effort and unknown words are inevitable at any step of analysis and this causes a grand problem.To solve this problem,we propose a method to extract words from a corpus and estimate part-of-speeches(POSs)which they belong to simultaneously using n-gram statistics,based on the supposition that distributions of strings preceding or following words belonging to the same POS are similar.Experiments have shown that this method is effectiveto infer the POS of unknown words and build a dictionary.","subitem_description_type":"Other"}]},"item_2_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"2100","bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌"}],"bibliographicPageStart":"2093","bibliographicIssueDates":{"bibliographicIssueDate":"1998-07-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"7","bibliographicVolumeNumber":"39"}]},"relation_version_is_last":true,"item_2_alternative_title_2":{"attribute_name":"その他タイトル","attribute_value_mlt":[{"subitem_alternative_title":"自然言語処理"}]},"weko_creator_id":"1"},"created":"2025-01-18T22:47:12.360380+00:00","id":13000,"links":{}}