{"updated":"2025-01-22T23:09:05.150205+00:00","links":{},"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00017759","sets":["934:1022:1069:1070"]},"path":["1070"],"owner":"1","recid":"17759","title":["転置ファイルおよび接尾辞配列の効率的圧縮法"],"pubdate":{"attribute_name":"公開日","attribute_value":"1999-11-15"},"_buckets":{"deposit":"c9c7264c-7577-4462-bff3-ab3f00c7a77a"},"_deposit":{"id":"17759","pid":{"type":"depid","value":"17759","revision_id":0},"owners":[1],"status":"published","created_by":1},"item_title":"転置ファイルおよび接尾辞配列の効率的圧縮法","author_link":["0","0"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"転置ファイルおよび接尾辞配列の効率的圧縮法"},{"subitem_title":"Efficient Compression of Inverted Files and Suffix Arrays","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"研究論文","subitem_subject_scheme":"Other"}]},"item_type_id":"3","publish_date":"1999-11-15","item_3_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"東京大学理学系研究科情報科学専攻"},{"subitem_text_value":"東京大学理学系研究科情報科学専攻"}]},"item_3_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Department of Information Science, University of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"Department of Information Science, University of Tokyo","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/17759/files/IPSJ-TOD4008009.pdf"},"date":[{"dateType":"Available","dateValue":"2001-11-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-TOD4008009.pdf","filesize":[{"value":"2.1 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"13"},{"tax":["include_tax"],"price":"0","billingrole":"39"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"a3b5772f-acad-42cb-ad73-062bf00482cf","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 1999 by the Information Processing Society of Japan"}]},"item_3_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"定兼, 邦彦"},{"creatorName":"今井, 浩"}],"nameIdentifiers":[{}]}]},"item_3_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Kunihiko, Sadakane","creatorNameLang":"en"},{"creatorName":"Hiroshi, Imai","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_3_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AA11464847","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_3_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7799","subitem_source_identifier_type":"ISSN"}]},"item_3_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"単語ブロックソート圧縮法を提案する.これは文書と全文検索のための索引を圧縮する方法であり 圧縮データから転置ファイルを高速に生成できる.文書は圧縮時に単語に区切られるため 復号時には形態素解析などの時間のかかる処理は必要ない.これにより 全文検索のための索引を保存する際のディスク容量やネットワークを介して転送する際の負荷を減らすことができる.htmlに対する実験から 圧縮率はgzipよりも良く 圧縮データから転置ファイルを生成する時間は転置ファイルを0から作るよりも短く 形態素解析にかかる時間を含めると5倍以上速いことを確認した.また 単語ブロックソート圧縮法よりも圧縮率の良い通常のブロックソート圧縮法で圧縮された文書から単語を切り出し転置ファイルを生成するアルゴリズムも提案する.","subitem_description_type":"Other"}]},"item_3_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"We propose word-based block sorting, which is used for compressing both texts and their full-text indexes, inverted files. Since texts are separated into words, morphological analysis, which is time consuming, is not necessary in the decoder. By using the proposed compression scheme, we can reduce space for storing full-text indexes and a load for transferring them via network. We confirmed by experiments that our compression scheme has better compression ratio than gzip and creating the inverted file from compressed data is faster than creating it from scratch. Furthermore, this is more than five times faster if time for morphological analysis is included. We also propose an algorithm for creating an inverted file from a compressed file by the ordinary block sorting which has better compression ratio than the word-based block sorting","subitem_description_type":"Other"}]},"item_3_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"94","bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌データベース（TOD）"}],"bibliographicPageStart":"85","bibliographicIssueDates":{"bibliographicIssueDate":"1999-11-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"SIG08(TOD4)","bibliographicVolumeNumber":"40"}]},"relation_version_is_last":true,"weko_creator_id":"1"},"id":17759,"created":"2025-01-18T22:50:40.397804+00:00"}