along with chapter 7 of Draghici: Statistics and Data Analysis for Microarrays Using R and Bioconductor
- use the
annotation()
accessor to get the type of microarray used
annotation(Golub_Merge)
- so we use the
hu6800.db
package, which provides detailed information about the hu6800 platform. The package is updated biannually.
- install the package via
biocLite("hu6800.db")
- if you want to see what objects this package supports type
ls("package:hu6800.db")
[1] "hu6800" "hu6800ACCNUM" "hu6800ALIAS2PROBE" [4] "hu6800CHR" "hu6800CHRLENGTHS" "hu6800CHRLOC" [7] "hu6800CHRLOCEND" "hu6800.db" "hu6800_dbconn" [10] "hu6800_dbfile" "hu6800_dbInfo" "hu6800_dbschema" [13] "hu6800ENSEMBL" "hu6800ENSEMBL2PROBE" "hu6800ENTREZID" [16] "hu6800ENZYME" "hu6800ENZYME2PROBE" "hu6800GENENAME" [19] "hu6800GO" "hu6800GO2ALLPROBES" "hu6800GO2PROBE" [22] "hu6800MAP" "hu6800MAPCOUNTS" "hu6800OMIM" [25] "hu6800ORGANISM" "hu6800ORGPKG" "hu6800PATH" [28] "hu6800PATH2PROBE" "hu6800PFAM" "hu6800PMID" [31] "hu6800PMID2PROBE" "hu6800PROSITE" "hu6800REFSEQ" [34] "hu6800SYMBOL" "hu6800UNIGENE" "hu6800UNIPROT"
- to get the mappings between manufacturer identifiers and gene abbreviations you can use
hu6800SYMBOL
andmget()
mget(ls(hu6800SYMBOL)[1:5],hu6800SYMBOL )
$A28102_at [1] "GABRA3" $AB000114_at [1] "OMD" $AB000115_at [1] "IFI44L" $AB000220_at [1] "SEMA3C" $AB000381_s_at [1] "GML"
- you can also use the
DBI
package infrastructure
- list the tables available for NCBI Entrez Gene metadata
- list the tables available for NCBI Entrez Gene metadata
require(DBI) hh <- org.Hs.eg_dbconn() dbListTables(hh)
[1] "accessions" "alias" "chrlengths" [4] "chromosome_locations" "chromosomes" "cytogenetic_locations" [7] "ec" "ensembl" "ensembl2ncbi" [10] "ensembl_prot" "ensembl_trans" "gene_info" [13] "genes" "go" "go_all" [16] "go_bp" "go_bp_all" "go_cc" [19] "go_cc_all" "go_mf" "go_mf_all" [22] "kegg" "map_counts" "map_metadata" [25] "metadata" "ncbi2ensembl" "omim" [28] "pfam" "prosite" "pubmed" [31] "refseq" "sqlite_stat1" "ucsc" [34] "unigene" "uniprot"
- query for genes annotated as sarcoma oncogenes
SQL <- "select * from gene_info gi, genes g where gi._id=g._id and gene_name like '%arcoma%oncogene%'" sops <- dbGetQuery(hh,SQL) head(sops)
_id gene_name symbol 1 314 v-raf murine sarcoma 3611 viral oncogene homolog ARAF 2 559 v-raf murine sarcoma viral oncogene homolog B1 BRAF 3 1156 v-crk sarcoma virus CT10 oncogene homolog (avian) CRK 4 1157 v-crk sarcoma virus CT10 oncogene homolog (avian)-like CRKL 5 1834 feline sarcoma oncogene FES 6 1859 Gardner-Rasheed feline sarcoma viral (v-fgr) oncogene homolog FGR _id gene_id 1 314 369 2 559 673 3 1156 1398 4 1157 1399 5 1834 2242 6 1859 2268
- find the LYN gene
sops[grep("LYN",sops$symbol),]
- so we see it's in the 13th row and the gene id is 4067
sops[grep("LYN",sops$symbol),]
- the we can use
get()
to get the probe set identifier
get("4067",revmap(hu6800ENTREZID))
[1] "M16038_at"
- display the expression data of the gene by disease group
boxplot(exprs(Golub_Merge)["M16038_at",]~Golub_Merge$ALL.AML,col=c("darkgreen","gold"),notch=T)
- and the ggplot2 version
require(ggplot2) gmdata <- data.frame(expr=exprs(Golub_Merge)["M16038_at",],ALL.AML=Golub_Merge$ALL.AML) ggplot(gmdata,aes(x=ALL.AML,y=expr,fill=ALL.AML)) + geom_boxplot(notch=T) + scale_fill_manual(values=c("darkgreen","gold")) + xlab("") ggsave("ggboxpl.png")
No comments :
Post a Comment