@techreport{oai:ipsj.ixsq.nii.ac.jp:00224446, author = {西山, 翔大 and 玉森, 聡 and Shota, Nishiyama and Akira, Tamamori}, issue = {49}, month = {Feb}, note = {異常音検知とは,入力される対象の機械音を正常か異常かを識別するタスクである.異常音は発生頻度が低く,多様性に富んでるため,正常音のみから異常音を検知する問題として扱われる.多くの異常音検知手法のモデルの入力に用いられる音声特徴量はメルスペクトログラムである.しかし,音声波形をフーリエ変換し得られる複素スペクトログラムからメルスペクトログラムへ変換する際に,位相変動は失われる.本研究では,位相変動の有用性を示すために複素数値ニューラルネットワークと実数値ニューラルネットワークによる異常音検知手法を比較する.比較の結果,位相変動が有用である機械音と有用ではない機械音が存在した.そこで本研究では,位相変動を考慮するために,複素スペクトログラムを入力可能なすべての特徴量抽出演算において,複素数値の構造を保つ複素数値モジュールとメルスペクトログラムを入力とする実数値モジュールを組み合わせた複素数値ハイブリッドニューラルネットワークを提案する.提案手法の有効性は,ToyADMOS データセットのマルチチャネル音声を対象とした異常音検知実験により検証した.実験の結果,提案手法は,複素数値ニューラルネットワークならびに実数値ニューラルネットワークと比較して,すべての機械音の平均 AUC を約 3% 向上させた., Anomalous sound detection is the task of identifying whether an incoming mechanical sound is normal or anomalous. Since anomalous sounds occur infrequently and are highly diverse, it is treated as a problem of detecting anomalous sounds from normal sounds only. The acoustic features used as input to most anomalous sound detection models are mel-spectrogram. However, the phase variation is lost when the complex-spectrogram obtained by Fourier transforming the sound waveform is converted to the mel-spectrogram. In this study, we compare anomalous sound detection methods using complex-valued neural networks and real-valued neural networks to demonstrate the usefulness of phase variation. As a result of the comparison, there existed machine sounds for which phase variation was valuable and machine sounds for which it was not valuable. In this study, we propose a complex-valued hybrid neural network that combines a complex-valued module that preserves the structure of complex values and a real-valued module that takes mel-spectrogram as input for all feature extraction operations in which complex-spectrogram can be input in order to take phase variation into account. We propose a complex-valued hybrid neural network that combines a complex-valued structure-preserving module and a real-valued module that takes the mel-spectrogram as input for all feature extraction operations. Experiments verified the effectiveness of the proposed method on anomalous sound detection for multi-channel sound in the ToyADMOS dataset. Experimental results showed that the proposed method improved the average AUC of all machine sounds by around 3% compared to both complex-valued and real-valued neural networks.}, title = {位相変動を考慮した複素数値ハイブリッドニューラルワークによる異常音検知}, year = {2023} }