{"created":"2025-01-18T23:35:50.111342+00:00","updated":"2025-01-20T06:51:54.276234+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00081778","sets":["581:6644:6761"]},"path":["6761"],"owner":"11","recid":"81778","title":["Max-Shift BM and Max-Shift Horspool: Practical Fast Exact String Matching Algorithms"],"pubdate":{"attribute_name":"公開日","attribute_value":"2012-04-15"},"_buckets":{"deposit":"ea88eab9-a627-44a8-ab8e-d8430ac634f5"},"_deposit":{"id":"81778","pid":{"type":"depid","value":"81778","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"Max-Shift BM and Max-Shift Horspool: Practical Fast Exact String Matching Algorithms","author_link":["357817","357818","357815","357816"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Max-Shift BM and Max-Shift Horspool: Practical Fast Exact String Matching Algorithms"},{"subitem_title":"Max-Shift BM and Max-Shift Horspool: Practical Fast Exact String Matching Algorithms","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"一般論文","subitem_subject_scheme":"Other"}]},"item_type_id":"2","publish_date":"2012-04-15","item_2_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"Department of Computer Science, Graduate School of Information Science and Technology, The University of Tokyo"},{"subitem_text_value":"Human Genome Center, Institute of Medical Science, The University of Tokyo"}]},"item_2_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Department of Computer Science, Graduate School of Information Science and Technology, The University of Tokyo","subitem_text_language":"en"},{"subitem_text_value":"Human Genome Center, Institute of Medical Science, The University of Tokyo","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"publish_status":"0","weko_shared_id":11,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/81778/files/IPSJ-JNL5304020.pdf","label":"IPSJ-JNL5304020"},"date":[{"dateType":"Available","dateValue":"2014-04-15"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-JNL5304020.pdf","filesize":[{"value":"1.3 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"8"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"2132afef-1183-4024-8025-f2d0e971f34d","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2012 by the Information Processing Society of Japan"}]},"item_2_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Mohammed, Sahli"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Tetsuo, Shibuya"}],"nameIdentifiers":[{}]}]},"item_2_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Mohammed, Sahli","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Tetsuo, Shibuya","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_2_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN00116647","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_2_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7764","subitem_source_identifier_type":"ISSN"}]},"item_2_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"Exact string matching is the problem of finding all occurrences of a pattern P in a text T. The problem is well-known and many sophisticated algorithms have been proposed. Some fast exact string matching algorithms have been described since the 80s (e.g., the Boyer-Moore algorithm and its simplified version the Boyer-Moore-Horspool algorithm). They have been regarded as the standard benchmarks for the practical exact string search literature. In this paper, we propose two algorithms MSBM (Max-Shift BM) and MSH (Max-Shift BMH) both based on the combination of the bad-character rule of the right-most character used in the Boyer-Moore-Horspool algorithm, the extended bad-character rule and the good-suffix rule used in the Gusfield algorithm, which is a modification of the Boyer-Moore algorithm. Only a small extra space and preprocessing time are needed with respect to the BM and BMH algorithms. Nonetheless, empirical results on different data (DNA, Protein and Text Web) with different pattern lengths show that both MSBM and MSH are very fast in practice. MSBM algorithm usually won against other algorithms.\n------------------------------ \nThis is a preprint of an article intended for publication Journal of Information Processing(JIP). This preprint should not be cited. This article should be cited as: Journal of Information Processing Vol.20(2012) No.2 (online) \nDOI　http://dx.doi.org/10.2197/ipsjjip.20.419\n------------------------------ \n","subitem_description_type":"Other"}]},"item_2_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Exact string matching is the problem of finding all occurrences of a pattern P in a text T. The problem is well-known and many sophisticated algorithms have been proposed. Some fast exact string matching algorithms have been described since the 80s (e.g., the Boyer-Moore algorithm and its simplified version the Boyer-Moore-Horspool algorithm). They have been regarded as the standard benchmarks for the practical exact string search literature. In this paper, we propose two algorithms MSBM (Max-Shift BM) and MSH (Max-Shift BMH) both based on the combination of the bad-character rule of the right-most character used in the Boyer-Moore-Horspool algorithm, the extended bad-character rule and the good-suffix rule used in the Gusfield algorithm, which is a modification of the Boyer-Moore algorithm. Only a small extra space and preprocessing time are needed with respect to the BM and BMH algorithms. Nonetheless, empirical results on different data (DNA, Protein and Text Web) with different pattern lengths show that both MSBM and MSH are very fast in practice. MSBM algorithm usually won against other algorithms.\n------------------------------ \nThis is a preprint of an article intended for publication Journal of Information Processing(JIP). This preprint should not be cited. This article should be cited as: Journal of Information Processing Vol.20(2012) No.2 (online) \nDOI　http://dx.doi.org/10.2197/ipsjjip.20.419\n------------------------------ \n","subitem_description_type":"Other"}]},"item_2_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌"}],"bibliographicIssueDates":{"bibliographicIssueDate":"2012-04-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"4","bibliographicVolumeNumber":"53"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"id":81778,"links":{}}