<?xml version='1.0' encoding='UTF-8'?>
<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
  <responseDate>2026-04-19T21:31:42Z</responseDate>
  <request verb="GetRecord" metadataPrefix="oai_dc" identifier="oai:ipsj.ixsq.nii.ac.jp:00234734">https://ipsj.ixsq.nii.ac.jp/oai</request>
  <GetRecord>
    <record>
      <header>
        <identifier>oai:ipsj.ixsq.nii.ac.jp:00234734</identifier>
        <datestamp>2025-01-19T09:42:35Z</datestamp>
        <setSpec>1164:5159:11541:11627</setSpec>
      </header>
      <metadata>
        <oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns="http://www.w3.org/2001/XMLSchema" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
          <dc:title>Cross-lingual Singing Voice Conversion Leveraging Transformer-based ASR Encoder</dc:title>
          <dc:title>Cross-lingual Singing Voice Conversion Leveraging Transformer-based ASR Encoder</dc:title>
          <dc:creator>Xinyuan, Zhao</dc:creator>
          <dc:creator>Daisuke, Saito</dc:creator>
          <dc:creator>Nobuaki, Minematsu</dc:creator>
          <dc:creator>Xinyuan, Zhao</dc:creator>
          <dc:creator>Daisuke, Saito</dc:creator>
          <dc:creator>Nobuaki, Minematsu</dc:creator>
          <dc:subject>ポスターセッション2</dc:subject>
          <dc:description>In this paper, we focus on singing voice conversion task, including monolingual and cross-lingual singing voice conversion. We proposed a method to modify MaskCycleGAN, a model designed for nonparallel speech conversion task, and applied the proposed model for the singing voice conversion task. The new model concatenates MaskCycleGAN with Whisper, a transformer-based ASR encoder that recognizes the content of the song. The experimental results demonstrated significant performance improvements over the original MaskCycleGAN in both monolingual and cross-lingual scenarios for the singing voice conversion task.</dc:description>
          <dc:description>technical report</dc:description>
          <dc:publisher>情報処理学会</dc:publisher>
          <dc:date>2024-06-07</dc:date>
          <dc:format>application/pdf</dc:format>
          <dc:identifier>研究報告音声言語情報処理（SLP）</dc:identifier>
          <dc:identifier>47</dc:identifier>
          <dc:identifier>2024-SLP-152</dc:identifier>
          <dc:identifier>1</dc:identifier>
          <dc:identifier>4</dc:identifier>
          <dc:identifier>2188-8663</dc:identifier>
          <dc:identifier>AN10442647</dc:identifier>
          <dc:identifier>https://ipsj.ixsq.nii.ac.jp/record/234734/files/IPSJ-SLP24152047.pdf</dc:identifier>
          <dc:language>eng</dc:language>
        </oai_dc:dc>
      </metadata>
    </record>
  </GetRecord>
</OAI-PMH>
