@article{oai:ipsj.ixsq.nii.ac.jp:00017771,
 author = {AARONJ.STOKES and HIDEO, MATSUDA and AKIHIRO, HASHIMOTO and Aaron, J.Stokes and Hideo, Matsuda and Akihiro, Hashimoto},
 issue = {SIG06(TOD3)},
 journal = {情報処理学会論文誌データベース(TOD)},
 month = {Aug},
 note = {Complete DNA sequences (complete genomes) for an increasing number of organisms are becoming available each year for use in biological research. However  genome project groups incorporate their own formats (or schemas) for representing the genome data accumulated by the projects. Such heterogeneity of their schemas prevents researchers from exchanging and comparing their data across genomes. In this paper  we present a new method for exchanging and querying information on complete genomes. Since genomes and the genetic information encoded on them have a hierarchical structure  they can be represented as a kind of structured document. We propose a document language called GXML for representing complete genomes. The document language  based on XML  can be used to exchange many kinds of genomic data  and offers a high degree of extensibility. We also define a query language called GQL to operate on the genome documents. Using this language  one can easily associate henes among different genomes and perform other biological analyses. We developed a prototype system based on the language. Using the system  we executed several test queries. The results were consistent with those published in biological literature. The processor and memory requirements of the prototype system were accptable., Complete DNA sequences (complete genomes) for an increasing number of organisms are becoming available each year for use in biological research. However, genome project groups incorporate their own formats (or schemas) for representing the genome data accumulated by the projects. Such heterogeneity of their schemas prevents researchers from exchanging and comparing their data across genomes. In this paper, we present a new method for exchanging and querying information on complete genomes. Since genomes and the genetic information encoded on them have a hierarchical structure, they can be represented as a kind of structured document. We propose a document language called GXML for representing complete genomes. The document language, based on XML, can be used to exchange many kinds of genomic data, and offers a high degree of extensibility. We also define a query language called GQL to operate on the genome documents. Using this language, one can easily associate henes among different genomes and perform other biological analyses. We developed a prototype system based on the language. Using the system, we executed several test queries. The results were consistent with those published in biological literature. The processor and memory requirements of the prototype system were accptable.},
 pages = {66--78},
 title = {GXML: A Novel Method for Exchanging and Querying Complete Genomes by Representing them as Structured Documents},
 volume = {40},
 year = {1999}
}