@techreport{oai:ipsj.ixsq.nii.ac.jp:00225946, author = {Bowen, Wang and Liangzhi, Li and Yuta, Nakashima and Hajime, Nagahara and Bowen, Wang and Liangzhi, Li and Yuta, Nakashima and Hajime, Nagahara}, issue = {2}, month = {May}, note = {Deep Neural Networks (DNNs) have shown their power in many research fields, and related applications are entering people's daily lives with unstoppable momentum. However, the large number of DNNs' training parameters causes difficulty in learning representation from real-world data efficiently, and the black-box nature harms its explainability. In this thesis, we will show how to design a DNN for better representation, as well as interpret its behavior for reliable artificial intelligence (AI). By embedding a slot-attention-based XAI module, we find that a DNN model is interpretable, and the learning of representation can be benefited from this interpretability. XAI methods are further extended to find representation in a simple classification task. The found representation is transferred as training data for a complex object detection task, realizing weak supervision. In two different real-world scenarios, we evaluate that our proposal can encourage DNNs to learn better representation and let them be interpretable., Deep Neural Networks (DNNs) have shown their power in many research fields, and related applications are entering people's daily lives with unstoppable momentum. However, the large number of DNNs' training parameters causes difficulty in learning representation from real-world data efficiently, and the black-box nature harms its explainability. In this thesis, we will show how to design a DNN for better representation, as well as interpret its behavior for reliable artificial intelligence (AI). By embedding a slot-attention-based XAI module, we find that a DNN model is interpretable, and the learning of representation can be benefited from this interpretability. XAI methods are further extended to find representation in a simple classification task. The found representation is transferred as training data for a complex object detection task, realizing weak supervision. In two different real-world scenarios, we evaluate that our proposal can encourage DNNs to learn better representation and let them be interpretable.}, title = {Towards Better Representation and Interpretability for Deep Neural Networks on Visual Tasks}, year = {2023} }