{"created":"2025-01-18T23:20:33.392126+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00057331","sets":["1164:5159:5198:5202"]},"path":["5202"],"owner":"1","recid":"57331","title":["講演の書き起こしに対する統計的手法を用いた文体の整形"],"pubdate":{"attribute_name":"公開日","attribute_value":"2002-05-24"},"_buckets":{"deposit":"a846e9ee-3802-4eae-916c-6d5e80a3c9c6"},"_deposit":{"id":"57331","pid":{"type":"depid","value":"57331","revision_id":0},"owners":[1],"status":"published","created_by":1},"item_title":"講演の書き起こしに対する統計的手法を用いた文体の整形","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"講演の書き起こしに対する統計的手法を用いた文体の整形"},{"subitem_title":"Automatic Transformation of Lecture Transcription into Document Style using Statistical Framework","subitem_title_language":"en"}]},"item_type_id":"4","publish_date":"2002-05-24","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"京都大学情報学研究科知能情報学専攻"},{"subitem_text_value":"京都大学情報学研究科知能情報学専攻"},{"subitem_text_value":"京都大学情報学研究科知能情報学専攻"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"School of Informatics, Kyoto University","subitem_text_language":"en"},{"subitem_text_value":"School of Informatics, Kyoto University","subitem_text_language":"en"},{"subitem_text_value":"School of Informatics, Kyoto University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/57331/files/IPSJ-SLP02041003.pdf"},"date":[{"dateType":"Available","dateValue":"2004-05-24"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-SLP02041003.pdf","filesize":[{"value":"1.4 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"22"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"d5ce88e5-3859-42d0-9432-9502d78e2d3b","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2002 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"下岡, 和也"},{"creatorName":"河原, 達也"},{"creatorName":"奥乃, 博"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Kazuya, Shitaoka","creatorNameLang":"en"},{"creatorName":"Tatsuya, Kawahara","creatorNameLang":"en"},{"creatorName":"Hiroshi, G.Okuno","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10442647","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"講演音声のような話し言葉の書き起こしや音声認識結果には、話し言葉特有の表現が数多く含まれて二次利用しにくいため、文章として適した形態に整形する必要がある。本稿では、統計的機械翻訳の考え方に基づいて講演の書き起こしを自動的に整形された文章に変換する方法を提案する。本研究で扱う処理は、フィラーの削除、句点の挿入、助詞の挿入、書き言葉表現への変換、文体の統一である。これらの処理を統合的に行うようにビームサーチを導入した。以上の手法により、実際の講演の書き起こしを整形された文章に変換し、講演録編集者によって一次整形されたものを正解として、句点の挿入、助詞の挿入、文体の統一に関して定量的な評価を行った。","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Transcriptions and recognition results of lecture speech include many expressions peculiar to spoken language. Thus, it is necessary to transform them into document style for practical use of them. We apply the statistical approach used by machine translation to automatic transformation of the spoken language into document style sentences. We deal with deletion of the fillers, insertion of periods, insertion of particles, conversion to written word expression and unification of a text style. To apply these processings in an integrated manner, we introduce a beam search. The preliminary evaluation of the proposed method is presented using real lecture transcriptions and their archives.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"24","bibliographic_titles":[{"bibliographic_title":"情報処理学会研究報告音声言語情報処理(SLP)"}],"bibliographicPageStart":"17","bibliographicIssueDates":{"bibliographicIssueDate":"2002-05-24","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"50(2002-SLP-041)","bibliographicVolumeNumber":"2002"}]},"relation_version_is_last":true,"weko_creator_id":"1"},"id":57331,"updated":"2025-01-22T04:32:56.583143+00:00","links":{}}