@article{oai:ipsj.ixsq.nii.ac.jp:00090269,
 author = {大野哲平 and 秋葉, 友良 and Teppei, Ohno and Tomoyosi, Akiba},
 issue = {2},
 journal = {情報処理学会論文誌},
 month = {Feb},
 note = {情報通信網の発展とデータ記録コストの低減により,音声を含むマルチメディアコンテンツが増大している.現在主流となっているマルチメディアデータに対する検索システムが検索の根拠としているファイル名やタグ情報等の人手によるメタデータ付与は,人的コストが非常に高い.そこで,音声データから求めたい情報になるべく早く,低コストでアクセスできる検索技術が求められている.音声検索語検出(Spoken Term Detection; STD)はある特定の検索語が音声データ中のどこで発話されたかを特定するタスクであり,現在活発な研究活動が行われている分野である.先行研究として,近似文字列照合を音節間距離平面上の直線検出問題ととらえる手法が提案されており,高速で距離順の検出が可能であることが示されている.しかし,認識誤りに対する対策に問題が残されていた.本研究では,直線検出に基づくSTD手法に,音節継続時間情報を組み込むことにより検索性能の向上を試みた.提案手法は,音節の代わりに分析フレームを単位とした距離空間を構成することで,脱落・挿入誤りに頑健な検出を可能にする.評価実験の結果,高Recallの領域で検索性能を改善することを確認した., Nowadays, multimedia contents including speech are rapidly increasing due to both the growth of the communication networks around the world and the decrease of storage cost. The current retrieval systems for such contents rely on the manually annotated metadata, which are too expensive to be obtained. Therefore, it is required the retrieval method that is not expensive but quick to access the desired information by using their speech data. Spoken term detection (STD) is one of the solution, which tries to find the positions that the given query term is uttered at in the spoken document, and recently has been actively studied in the context of speech processing. While conventional methods for STD are to apply approximate string matching against a subword sequence of spoken document obtained by speech recognition, there has been proposed a line-detection-based STD method, which regarded string matching as line detection in a syllable distance plane. While it demonstrated to enable fast and distance-ordered detections, it still suffered from the insertion and deletion errors brought by speech recognition. In this work, we try to improve the detection performance by employing the syllable duration information. The proposed method enables the robust detection by introducing the distance plane using frames as units, instead of using syllables as units. Our experimental evaluation showed that the incorporation of syllable duration improved its detection performance in high-recall regions.},
 pages = {484--494},
 title = {音節継続時間を利用した直線検出に基づく音声検索語検出},
 volume = {54},
 year = {2013}
}