{"created":"2025-01-18T23:13:39.959875+00:00","updated":"2025-01-22T08:30:52.868440+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00048413","sets":["1164:4179:4222:4226"]},"path":["4226"],"owner":"1","recid":"48413","title":["統計的手法に基づくWebページからのヘッドライン生成"],"pubdate":{"attribute_name":"公開日","attribute_value":"2002-05-23"},"_buckets":{"deposit":"20b9b923-7955-4c79-8923-92684df32fcd"},"_deposit":{"id":"48413","pid":{"type":"depid","value":"48413","revision_id":0},"owners":[1],"status":"published","created_by":1},"item_title":"統計的手法に基づくWebページからのヘッドライン生成","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"統計的手法に基づくWebページからのヘッドライン生成"},{"subitem_title":"Headline Generation from Web Pages Based on Statistical Method","subitem_title_language":"en"}]},"item_type_id":"4","publish_date":"2002-05-23","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"日本電信電話株式会社NTTサイバースペース研究所"},{"subitem_text_value":"日本電信電話株式会社NTTサイバースペース研究所"},{"subitem_text_value":"日本電信電話株式会社NTTサイバースペース研究所"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"NTT Cyber Space Laboratories, NTT Corporation","subitem_text_language":"en"},{"subitem_text_value":"NTT Cyber Space Laboratories, NTT Corporation","subitem_text_language":"en"},{"subitem_text_value":"NTT Cyber Space Laboratories, NTT Corporation","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/48413/files/IPSJ-NL02149007.pdf"},"date":[{"dateType":"Available","dateValue":"2004-05-23"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-NL02149007.pdf","filesize":[{"value":"139.5 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"23"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"bcd760af-61cc-45e1-82c5-1ba76f5e0a22","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2002 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"廣嶋伸章"},{"creatorName":"長谷川, 隆明"},{"creatorName":"山崎, 毅文"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Nobuaki, Hiroshima","creatorNameLang":"en"},{"creatorName":"Takaaki, Hasegawa","creatorNameLang":"en"},{"creatorName":"Takefumi, Yamazaki","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10115061","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"現状の検索エンジンが出力する文書リストの概要文はWebページの先頭数十文字などであるため内容が把握できず、必要な情報に効率よくアクセスできない。これを解決するためには、概要文の代わりにWebページの内容を簡潔に表したヘッドラインを提示すればよい。そこで本研究は、Webページからその内容を簡潔に表したヘッドラインを自動生成することを目的とする。ヘッドラインは「(1)内容網羅性」、「(2)可読性」、「(3)高圧縮性」の3条件を満たす必要があるが、従来のテキスト要約技術ではこれらの3つの条件を同時に満たすことができない。本研究では、2値分類の機械学習手法であるSupport Vector Machine(SVM)を用いて、単語がヘッドラインとして必要か不要かに分類することにより重要語の選択を行い、単語trigramモデルと単語の重要度を組み合わせたNoisy channel modelを用いてヘッドライン生成を行う方法を提案する。Webページを用いた評価実験の結果、提案した重要語選択モデルはTF・IDFモデルより優れていることを検証し、これを用いたヘッドライン生成実験においてTF・IDFモデルに基づくベースライン手法よりテキスト全体の内容をより的確に表せることを検証した。","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"The purpose of this study is to generate a headlines automatically from a given web page. We can define a headline as the sentence which is brief, easy to read and highly compressed. Our method consists of two steps: word selection and headline generation. For word selection, we classify each word into two categories by appling Support Vector Machine(SVM). For headline generation, we use a noisy channel model which is the combination of a word trigram model and a word importance model. The experimental result shows that the generated headline by our method explains the original contents more precisely than that in the baseline.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"50","bibliographic_titles":[{"bibliographic_title":"情報処理学会研究報告自然言語処理(NL)"}],"bibliographicPageStart":"45","bibliographicIssueDates":{"bibliographicIssueDate":"2002-05-23","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"44(2002-NL-149)","bibliographicVolumeNumber":"2002"}]},"relation_version_is_last":true,"weko_creator_id":"1"},"id":48413,"links":{}}