@techreport{oai:ipsj.ixsq.nii.ac.jp:00232333,
author = {Subin, Choi and Dongshik, Kang and Subin, Choi and Dongshik, Kang},
issue = {6},
month = {Feb},
note = {This paper introduces a semi-supervised learning framework that employs Variational Autoencoders (VAEs) to create synthetic data, thereby enhancing the training sets in smart factory environments where labeled data is typically scarce. By accurately modeling the input data distribution, VAEs are able to generate new instances that closely resemble the real dataset, enriching the available data for model training. This strategy significantly reduces the need for laborious labeling efforts while improving the robustness of the datasets. The effectiveness of this approach is validated using the STL-10 and MNIST datasets. Results show a modest decrease in accuracy for the STL-10 dataset, dropping from 0.44 to 0.41 when incorporating VAE-augmented data into the training process. Similarly, the accuracy for the MNIST dataset slightly fell from 0.99 to 0.97 when applying the semi-supervised technique. These findings highlight the importance of precise calibration in the use of unlabeled data to ensure sustained model performance. Further investigation is suggested to enhance VAE configurations and the semi-supervised learning process, potentially improving the outcome of such methods. Additionally, the semi-supervised learning in this context employs the Mixmatch algorithm, which facilitates the effective integration of labeled and unlabeled data. Image classification tasks within the study are carried out using Convolutional Neural Networks (CNNs), capitalizing on their powerful feature extraction capabilities. Future research directions may include the refinement of Mixmatch parameters and CNN architectures to further leverage the composite dataset of VAE-generated and unlabeled data for optimal performance in image classification tasks., This paper introduces a semi-supervised learning framework that employs Variational Autoencoders (VAEs) to create synthetic data, thereby enhancing the training sets in smart factory environments where labeled data is typically scarce. By accurately modeling the input data distribution, VAEs are able to generate new instances that closely resemble the real dataset, enriching the available data for model training. This strategy significantly reduces the need for laborious labeling efforts while improving the robustness of the datasets. The effectiveness of this approach is validated using the STL-10 and MNIST datasets. Results show a modest decrease in accuracy for the STL-10 dataset, dropping from 0.44 to 0.41 when incorporating VAE-augmented data into the training process. Similarly, the accuracy for the MNIST dataset slightly fell from 0.99 to 0.97 when applying the semi-supervised technique. These findings highlight the importance of precise calibration in the use of unlabeled data to ensure sustained model performance. Further investigation is suggested to enhance VAE configurations and the semi-supervised learning process, potentially improving the outcome of such methods. Additionally, the semi-supervised learning in this context employs the Mixmatch algorithm, which facilitates the effective integration of labeled and unlabeled data. Image classification tasks within the study are carried out using Convolutional Neural Networks (CNNs), capitalizing on their powerful feature extraction capabilities. Future research directions may include the refinement of Mixmatch parameters and CNN architectures to further leverage the composite dataset of VAE-generated and unlabeled data for optimal performance in image classification tasks.},
title = {Semi-supervised learning using VAE generated data and unlabeled data},
year = {2024}
}