@techreport{oai:ipsj.ixsq.nii.ac.jp:00234255,
 author = {Hu, Shizhen and 西島, 直 and Shizhen, Hu and Nao, Nishijima},
 issue = {18},
 month = {May},
 note = {マイクロサービス障害発生時の初期対応では，障害の原因をアプリケーション側とインフラ基盤側のどちらにあるか切り分ける必要がある．Kubernetes の環境では，従来の根本原因分析において，監視データから仮説を作り，それを検証するプロセスが非効率という課題がある．異常 Pod の分布特徴を活用することで，仮説の範囲を絞ることが可能である．監視データから異常が発生している Pod の分布特徴から障害原因の仮説を検証する機能を提案・実装し，運用チームからフィードバックで効果を検証した．本稿では，Pod 間通信の可視化によるマイクロサービスの障害原因分析支援の機能検討について報告する．, In the initial response to microservice failures, it is necessary to determine whether the cause of the failure is on the application side or the infrastructure side. In the K8s environment, the traditional root cause analysis process of creating and testing hypotheses based on monitoring data is inefficient. By using the distribution characteristics of anomaly pods, it is possible to narrow the scope of the hypothesis. We proposed and implemented a function to test the hypothesis of failure causes based on the distribution characteristics of anomaly pods from the monitoring data. We then verified the effectiveness of the function with feedback from the operations team. In this report, we report the results of our study to support the root cause analysis of microservice failures by visualizing the inter-pod communication.},
 title = {Pod間通信の可視化によるマイクロサービスの障害根本原因分析支援機能提案},
 year = {2024}
}