{"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:00190376","sets":["934:1022:9445:9498"]},"path":["9498"],"owner":"11","recid":"190376","title":["Estimating Reference Scopes of Wikipedia Article Inner-links"],"pubdate":{"attribute_name":"公開日","attribute_value":"2018-07-11"},"_buckets":{"deposit":"5abf068f-e71e-40af-bc92-cd6aa971ea9f"},"_deposit":{"id":"190376","pid":{"type":"depid","value":"190376","revision_id":0},"owners":[11],"status":"published","created_by":11},"item_title":"Estimating Reference Scopes of Wikipedia Article Inner-links","author_link":["435444","435446","435445","435447"],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Estimating Reference Scopes of Wikipedia Article Inner-links"},{"subitem_title":"Estimating Reference Scopes of Wikipedia Article Inner-links","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"[研究論文] Wikipedia, link suggestion, LDA, word embedding, PMI","subitem_subject_scheme":"Other"}]},"item_type_id":"3","publish_date":"2018-07-11","item_3_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"Waseda University"},{"subitem_text_value":"Waseda University"}]},"item_3_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Waseda University","subitem_text_language":"en"},{"subitem_text_value":"Waseda University","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/190376/files/IPSJ-TOD1102004.pdf","label":"IPSJ-TOD1102004.pdf"},"date":[{"dateType":"Available","dateValue":"2020-07-11"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-TOD1102004.pdf","filesize":[{"value":"1.8 MB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"0","billingrole":"5"},{"tax":["include_tax"],"price":"0","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"13"},{"tax":["include_tax"],"price":"0","billingrole":"39"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"08a96374-a0cc-4640-b307-560a33d3c01f","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2018 by the Information Processing Society of Japan"}]},"item_3_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Renzhi, Wang"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Mizuho, Iwaihara"}],"nameIdentifiers":[{}]}]},"item_3_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Renzhi, Wang","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Mizuho, Iwaihara","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_3_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AA11464847","subitem_source_identifier_type":"NCID"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_6501","resourcetype":"journal article"}]},"item_3_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1882-7799","subitem_source_identifier_type":"ISSN"}]},"item_3_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"Wikipedia is the largest online encyclopedia, utilized as machine-knowledgeable and semantic resources. Links within Wikipedia indicate that two linked articles or parts of them are related each other about their topics. Existing link detection methods focus on linking to article titles, because most of links in Wikipedia point to article titles. But there is a number of links in Wikipedia pointing to corresponding specific segments, such as paragraphs, because the whole article is too general and it is hard for readers to obtain the intention of the link. We propose a method to automatically predict whether a link target is a specific segment or the whole article, and evaluate which segment is most relevant. We propose a combination method of Latent Dirichlet Allocation (LDA) and Maximum Likelihood Estimation (MLE) to represent every segment as a vector, and then we obtain similarity of each segment pair. Finally, we utilize variance, standard deviation and other statistical features to produce prediction results. We also apply word embeddings to embed all the segments into a semantic space and calculate cosine similarities between segment pairs. Then we utilize Random Forest to train a classifier to predict link scopes. Evaluations on Wikipedia articles show an ensemble of the proposed features achieved the best results.\n------------------------------\nThis is a preprint of an article intended for publication Journal of\nInformation Processing(JIP). This preprint should not be cited. This\narticle should be cited as: Journal of Information Processing Vol.26(2018) (online)\n------------------------------","subitem_description_type":"Other"}]},"item_3_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"Wikipedia is the largest online encyclopedia, utilized as machine-knowledgeable and semantic resources. Links within Wikipedia indicate that two linked articles or parts of them are related each other about their topics. Existing link detection methods focus on linking to article titles, because most of links in Wikipedia point to article titles. But there is a number of links in Wikipedia pointing to corresponding specific segments, such as paragraphs, because the whole article is too general and it is hard for readers to obtain the intention of the link. We propose a method to automatically predict whether a link target is a specific segment or the whole article, and evaluate which segment is most relevant. We propose a combination method of Latent Dirichlet Allocation (LDA) and Maximum Likelihood Estimation (MLE) to represent every segment as a vector, and then we obtain similarity of each segment pair. Finally, we utilize variance, standard deviation and other statistical features to produce prediction results. We also apply word embeddings to embed all the segments into a semantic space and calculate cosine similarities between segment pairs. Then we utilize Random Forest to train a classifier to predict link scopes. Evaluations on Wikipedia articles show an ensemble of the proposed features achieved the best results.\n------------------------------\nThis is a preprint of an article intended for publication Journal of\nInformation Processing(JIP). This preprint should not be cited. This\narticle should be cited as: Journal of Information Processing Vol.26(2018) (online)\n------------------------------","subitem_description_type":"Other"}]},"item_3_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographic_titles":[{"bibliographic_title":"情報処理学会論文誌データベース(TOD)"}],"bibliographicIssueDates":{"bibliographicIssueDate":"2018-07-11","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"2","bibliographicVolumeNumber":"11"}]},"relation_version_is_last":true,"weko_creator_id":"11"},"id":190376,"updated":"2025-01-20T01:04:29.736795+00:00","links":{},"created":"2025-01-19T00:56:20.893237+00:00"}