{"id":185064,"updated":"2025-01-20T03:05:59.083177+00:00","links":{},"created":"2025-01-19T00:52:16.334213+00:00","metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00185064","sets":["1164:1165:9020:9321"]},"path":["9321"],"owner":"11","recid":"185064","title":["Detection of mergeable Wikipedia articles based on multiple embedding results"],"pubdate":{"attribute_name":"公開日","attribute_value":"2017-12-15"},"_buckets":{"deposit":"68208156-3d6c-48cb-80a8-8a292e1c0696"},"_deposit":{"id":"185064","pid":{"type":"depid","value":"185064","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"Detection of mergeable Wikipedia articles based on multiple embedding results","author_link":["410409","410407","410408","410410"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Detection of mergeable Wikipedia articles based on multiple embedding results"},{"subitem_title":"Detection of mergeable Wikipedia articles based on multiple embedding results","subitem_title_language":"en"}]},"item_type_id":"4","publish_date":"2017-12-15","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"Graduate School of Information, Production and Systems, Waseda University"},{"subitem_text_value":"Graduate School of Information, Production and Systems, Waseda University"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Graduate School of Information, Production and Systems, Waseda University","subitem_text_language":"en"},{"subitem_text_value":"Graduate School of Information, Production and Systems, Waseda University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/185064/files/IPSJ-DBS17166015.pdf","label":"IPSJ-DBS17166015.pdf"},"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-DBS17166015.pdf","filesize":[{"value":"333.6 kB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"13"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_login","version_id":"878f7303-5c30-41ec-9784-918dde2dfa0f","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2017 by the Institute of Electronics, Information and Communication Engineers This SIG report is only available to those in membership of the SIG."}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Renzhi, Wang"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Mizuho, Iwaihara"}],"nameIdentifiers":[{}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Renzhi, Wang","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Mizuho, Iwaihara","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AN10112482","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2188-871X","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"Wikipedia is the largest online encyclopedia, in which articles are edited by different volunteers with different thoughts and styles. Sometimes two or more articles' titles are different but the themes of these articles are exactly the same or strongly similar. Administrators and editors are supposed to detect these article pairs and determine whether they should be merged together. In this paper, we propose a method to automatically determine whether an article pair should be merged together. We consider both duplicate case and overlap case. In the duplicate case, the articles pairs are covering exactly the same contents. In the overlap case, the articles pairs are covering related subjects that have a significant overlap. The content of an overlap part is similar but the words in the pair are probably different, so methods that exploit semantic relatedness are necessary. To deal with this problem we propose combination of multiple embedding results and rebuild word vectors for detecting mergeable article pairs. We also deal with various mergeable cases by combining distinct text fragments together. Our experiments show that our method performs better than existing embedding methods.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Wikipedia is the largest online encyclopedia, in which articles are edited by different volunteers with different thoughts and styles. Sometimes two or more articles' titles are different but the themes of these articles are exactly the same or strongly similar. Administrators and editors are supposed to detect these article pairs and determine whether they should be merged together. In this paper, we propose a method to automatically determine whether an article pair should be merged together. We consider both duplicate case and overlap case. In the duplicate case, the articles pairs are covering exactly the same contents. In the overlap case, the articles pairs are covering related subjects that have a significant overlap. The content of an overlap part is similar but the words in the pair are probably different, so methods that exploit semantic relatedness are necessary. To deal with this problem we propose combination of multiple embedding results and rebuild word vectors for detecting mergeable article pairs. We also deal with various mergeable cases by combining distinct text fragments together. Our experiments show that our method performs better than existing embedding methods.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"5","bibliographic_titles":[{"bibliographic_title":"研究報告データベースシステム(DBS)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2017-12-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"15","bibliographicVolumeNumber":"2017-DBS-166"}]},"relation_version_is_last":true,"weko_creator_id":"11"}}