@article{oai:ipsj.ixsq.nii.ac.jp:00207736,
 author = {山西, 良典 and 田中, 一星 and 井本, 桂右 and 山下, 洋一 and Ryosuke, Yamanishi and Issei, Tanaka and Keisuke, Imoto and Yoichi, Yamashita},
 issue = {11},
 journal = {情報処理学会論文誌},
 month = {Nov},
 note = {ウェブ上には様々なマルチメディアで構成されたユーザ参加型のエンタテインメントコンテンツが存在している．これらのエンタテインメントコンテンツからは，統制された条件に従った映像や音声を取得できる可能性がある．本稿では，音声データの活用に焦点を当て，ウェブ上のエンタテインメントコンテンツからの統制された環境下での音声データの収集をウェブ音声マイニングとして提案する．ウェブ音声マイニングの基本的な手続きのフレームワークを示し，ウェブ上のエンタテイメントコンテンツから音声データセットを取得した．音声コンテキスト認識とt-SNE法を用いた2次元空間上への可視化を通して，取得した音声データセット中の発話に見られる音響特徴の傾向について基礎的な考察を行った．その結果，各発話は課題コンテキストごとに複数の発話者で共通の音響特徴を示す傾向であることを確認し，ユーザ発信型のエンタテインメントコンテンツ中の音声をラベル付き音声データとして研究用途に応用できる可能性を示した．, There is a lot of participatory entertainment consisting of varied multimedia on Web. From such entertainment contents, we believe that it should be possible to acquire multimedia data such as movie and audio under the fixed condition. This paper focuses on the application of the speech data, and proposes the framework that acquires speech data under the fixed condition from vocal entertainment contents on Web as Web speech mining. In this paper, basic procedures of Web speech mining were introduced and the speech dataset was constructed from the entertainment content on Web. The speeches in the constructed dataset were foundationally studied based on their acoustic features through speech context recognition and visulaization using t-SNE method. As the result, we confrimed that speeches in the consturcted dataset showed a trend that the speakers commonly expressed specific acoustic features for each context. The results also pointed the application vision of Web speech mining, where speeches in user-generated entertainment contents can be applied to labeled speech data for research use.},
 pages = {1708--1717},
 title = {音声エンタテインメントからのウェブ音声マイニングの可能性},
 volume = {61},
 year = {2020}
}