@article{oai:ipsj.ixsq.nii.ac.jp:00009861,
 author = {大場, 勝 and 権藤, 克彦 and Masaru, Ohba and Katsuhiko, Gondow},
 issue = {8},
 journal = {情報処理学会論文誌},
 month = {Aug},
 note = {本論文では，識別子からコンセプトキーワードを発見するためのckTF/IDF（Concept Keyword Term Frequency/Inverse Document Frequency）法を提案する．ckTF/IDF 法は大規模なソフトウェアにおけるコンセプトキーワードの抽出に適している．その理由は以下の2 つである．1 つ目は，ckTF/IDF 法はTF/IDF 法に比べ非常に軽量であること．2 つ目は識別子からコンセプトキーワードを抽出するための発見的手法を導入している点である．我々は，教育用OS udos（約5 000 行）とgcc（GNU Compiler Collection，約90 万行）とを事例に予備実験を行った．予備実験の結果，ckTF/IDF 法によるコンセプトキーワードの計算速度は，gcc の場合，TF/IDF 法と比べ新規検索で約6 倍，ファイル更新にともなう再計算で約890 倍も高速だった．コンセプトキーワードの抽出の精度と再現率は，udos の場合で，それぞれ57%と26%だった．これは，我々のアプローチが識別子におけるコンセプトキーワードの抽出に向いていることを示している．今後の課題は，たとえば，ckTF/IDF 法を使って高速で精度の高いソースコード検索エンジンを開発することといった，コンセプトキーワードの応用である．, We propose the Concept Keyword Term Frequency/Inverse Document Frequency (ckTF/IDF) method as a novel technique to efficiency mine concept keywords from identifiers in large software projects. ckTF/IDF is suitable for mining concept keywords，since the ckTF/IDF is more lightweight than the TF/IDF method，and the ckTF/IDF’s heuristics is tuned for identifiers in programs. We then experimentally apply the ckTF/IDF to our educational operating system udos (consisting of around 5,000 lines in C code) and GNU C Compiler Collection (gcc, consisting of around 900,000 lines in C code), which produced promising results; By ckTF/IDF method, The gcc’s source code was processed in 6 times faster than TF/IDF method at first time, and in 891 times faster than TF/IDF method at updating the source code. the udos’s source code was processed with an accuracy of around 57%. This preliminary result suggests that our approach is useful for mining concept keywords from identifiers, although we need more research and experience. For example, ckTF/IDF method can apply to fast source code search engine.},
 pages = {2596--2607},
 title = {プログラム理解を支援するコンセプトキーワードの自動抽出法ckTF/IDF法の提案},
 volume = {48},
 year = {2007}
}