{"links":{},"id":2008201,"metadata":{"_oai":{"id":"oai:ipsj.ixsq.nii.ac.jp:02008201","sets":["1164:8666:1771567929868:1771567998952"]},"path":["1771567998952"],"owner":"80578","recid":"2008201","title":["Data-Centricな手話コーパス構築に向けた動画選別:部分的アノテーションによる言語的特性の評価"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2026-03-08"},"_buckets":{"deposit":"a5152d0c-e02b-43e6-b76b-8882561fbddd"},"_deposit":{"id":"2008201","pid":{"type":"depid","value":"2008201","revision_id":0},"owners":[80578],"status":"published","created_by":80578},"item_title":"Data-Centricな手話コーパス構築に向けた動画選別:部分的アノテーションによる言語的特性の評価","author_link":[],"item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Data-Centricな手話コーパス構築に向けた動画選別:部分的アノテーションによる言語的特性の評価","subitem_title_language":"ja"},{"subitem_title":"Data-Centric Sign Language Corpus Construction: Video Selection via Partial Annotation for Evaluating Linguistic Characteristics","subitem_title_language":"en"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"聴覚障害","subitem_subject_scheme":"Other"}]},"item_type_id":"4","publish_date":"2026-03-08","item_4_text_3":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"筑波技術大学"},{"subitem_text_value":"筑波技術大学"},{"subitem_text_value":"筑波技術大学"},{"subitem_text_value":"筑波技術大学"}]},"item_4_text_4":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_text_value":"Tsukuba University of Technology","subitem_text_language":"en"},{"subitem_text_value":"Tsukuba University of Technology","subitem_text_language":"en"},{"subitem_text_value":"Tsukuba University of Technology","subitem_text_language":"en"},{"subitem_text_value":"Tsukuba University of Technology","subitem_text_language":"en"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_publisher":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"情報処理学会","subitem_publisher_language":"ja"}]},"publish_status":"0","weko_shared_id":-1,"item_file_price":{"attribute_name":"Billing file","attribute_type":"file","attribute_value_mlt":[{"url":{"url":"https://ipsj.ixsq.nii.ac.jp/record/2008201/files/IPSJ-AAC26030024.pdf","label":"IPSJ-AAC26030024.pdf"},"date":[{"dateType":"Available","dateValue":"2028-03-08"}],"format":"application/pdf","billing":["billing_file"],"filename":"IPSJ-AAC26030024.pdf","filesize":[{"value":"371.6 KB"}],"mimetype":"application/pdf","priceinfo":[{"tax":["include_tax"],"price":"660","billingrole":"5"},{"tax":["include_tax"],"price":"330","billingrole":"6"},{"tax":["include_tax"],"price":"0","billingrole":"52"},{"tax":["include_tax"],"price":"0","billingrole":"44"}],"accessrole":"open_date","version_id":"558e0852-a670-4016-88f9-a1001dfe8532","displaytype":"detail","licensetype":"license_note","license_note":"Copyright (c) 2026 by the Information Processing Society of Japan"}]},"item_4_creator_5":{"attribute_name":"著者名","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"船山,滉介"}]},{"creatorNames":[{"creatorName":"米山,文雄"}]},{"creatorNames":[{"creatorName":"加藤,伸子"}]},{"creatorNames":[{"creatorName":"白石,優旗"}]}]},"item_4_creator_6":{"attribute_name":"著者名(英)","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Kosuke Funayama","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"Fumio Yoneyama","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"Nobuko Kato","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"Yuhki Shiraishi","creatorNameLang":"en"}]}]},"item_4_source_id_9":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AA12752949","subitem_source_identifier_type":"NCID"}]},"item_4_textarea_12":{"attribute_name":"Notice","attribute_value_mlt":[{"subitem_textarea_value":"SIG Technical Reports are nonrefereed and hence may later appear in any journals, conferences, symposia, etc."}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourceuri":"http://purl.org/coar/resource_type/c_18gh","resourcetype":"technical report"}]},"item_4_source_id_11":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2432-2431","subitem_source_identifier_type":"ISSN"}]},"item_4_description_7":{"attribute_name":"論文抄録","attribute_value_mlt":[{"subitem_description":"大規模言語モデルの発展に伴い,手話認識研究でもYouTube-SL-25等の大規模Webデータセットの活用が進んでいる.これらは量的資源として有用だが,Data-Centric AIの観点からは,混在する多様な手話形式が存在する日本の手話(日本手話,中間型手話,日本語対応手話)から,学習目的に合致した日本の手話を適切に整理し,データの整合性を確保するプロセスが喫緊の課題である.著者らは先行研究において,外形的特徴に基づくフィルタリングを実施した.本稿では次段階として,各動画に含まれる「日本の手話」を効率的に評価する部分的アノテーション手法の妥当性を検討する.具体的には,選別された単独出演動画の中央部20秒間を抽出し,ろう者アノテーターがELANを用いて,手指動作(Gloss)に加え,非手指要素,分類詞,役割交代等の言語学的特徴を多層的に記述する.予備的検討では,本手法により日本手話に特徴的な文法構造が顕著なデータと,他の手話形式との差異が記述的に確認された.本稿では,この選別プロセスの詳細と,高品質な基盤モデル構築に向けた方法論としての有効性を論じる.","subitem_description_type":"Other"}]},"item_4_description_8":{"attribute_name":"論文抄録(英)","attribute_value_mlt":[{"subitem_description":"With the rapid progress of large language models, sign language recognition research has increasingly leveraged large-scale web datasets such as YouTube-SL-25. While these resources are valuable in terms of scale, web videos in Japan often contain mixed sign language varieties, including Japanese Sign Language (JSL), intermediate varieties, and Japanese-based Signed Japanese (MSS). From a data-centric AI perspective, such heterogeneity can undermine dataset consistency and interpretability, making purpose-driven selection and organization essential. In this paper, we examine a partial-annotation approach for efficiently characterizing the ”Japanese sign language” contained in each video. We extract a 20-second segment from the middle of each selected single-signer video, and Deaf annotators use ELAN to provide multi-layer, time-aligned descriptions of linguistic features, including glosses, classifiers, mouth actions, non-manual markers, and role shift. A preliminary investigation suggests that this approach can descriptively identify segments in which grammatical structures characteristic of JSL are salient and can document differences from other varieties. We discuss the selection workflow and its implications for constructing higher-quality corpora and foundation models.","subitem_description_type":"Other"}]},"item_4_biblio_info_10":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicPageEnd":"7","bibliographic_titles":[{"bibliographic_title":"研究報告アクセシビリティ(AAC)"}],"bibliographicPageStart":"1","bibliographicIssueDates":{"bibliographicIssueDate":"2026-03-08","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"24","bibliographicVolumeNumber":"2026-AAC-30"}]},"relation_version_is_last":true,"weko_creator_id":"80578"},"created":"2026-03-02T07:32:36.775103+00:00","updated":"2026-03-02T07:32:40.948882+00:00"}