## ---- echo = FALSE, message = FALSE-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
options(width = 750)
knitr::opts_chunk$set(
  comment = "#>",
  error = FALSE,
  tidy = FALSE)

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # checking whether or not the Arabidopsis thaliana
#  # genome is avaialable for download
#  is.genome.available("Arabidopsis thaliana")

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # printing details to the console
#  is.genome.available("Arabidopsis thaliana", details = TRUE)

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  length(listGenomes())

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # cheking whether A. thaliana is available in the refseq database
#  is.genome.available("Arabidopsis thaliana", database = "refseq")

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  length(listGenomes(database = "refseq"))

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # the simplest way to retrieve names of available genomes stored within NCBI databases
#  head(listGenomes() , 5)

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # show all details
#  head(listGenomes(details = TRUE) , 5)

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # show all details only for Bacteria
#  head(listGenomes(kingdom = "Bacteria", details = TRUE) , 5)

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # the number of genomes available for each kingdom
#  ncbi_genomes <- listGenomes(details = TRUE)
#  table(ncbi_genomes[ , "kingdoms"])

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # the number of genomes available for each group
#  ncbi_genomes <- listGenomes(details = TRUE)
#  table(ncbi_genomes[ , "group"])

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # order by file size
#  library(dplyr)
#  
#  ncbi_genomes <- listGenomes(details = TRUE)
#  head(arrange(ncbi_genomes, desc(file_size_MB)) , 10)

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # users can also update the organism table using the 'update' argument
#  head(listGenomes(details = TRUE, update = TRUE) , 5)

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # list all Eukaryota that are stored in refseq
#  head(listGenomes(kingdom = "Eukaryota", database = "refseq") , 20)

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # the number of genomes available for each kingdom stored in refseq
#  ncbi_genomes <- listGenomes(details = TRUE, database = "refseq")
#  table(ncbi_genomes[ , "kingdoms"])

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # download the genome of Arabidopsis thaliana from refseq
#  # and store the corresponding genome file in '_ncbi_downloads/genomes'
#  getGenome( db       = "refseq",
#             kingdom  = "plant",
#             organism = "Arabidopsis thaliana",
#             path     = file.path("_ncbi_downloads","genomes") )

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # path to genome: '_ncbi_downloads/genomes/Arabidopsis_thaliana_genome.fna.gz'
#  file_path <- file.path("_ncbi_downloads","genomes","Arabidopsis_thaliana_genome.fna.gz")
#  # read genome as data.table object
#  Ath_genome <- read_genome(file_path, format = "fasta")
#  

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # download the proteome of Arabidopsis thaliana from refseq
#  # and store the corresponding proteome file in '_ncbi_downloads/proteomes'
#  getProteome( db       = "refseq",
#               kingdom  = "plant",
#               organism = "Arabidopsis thaliana",
#               path     = file.path("_ncbi_downloads","proteomes") )

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # path to proteome: '_ncbi_downloads/proteomes/Arabidopsis_thaliana_protein.faa.gz'
#  file_path <- file.path("_ncbi_downloads","proteomes","Arabidopsis_thaliana_protein.faa.gz")
#  # read proteome as data.table object
#  Ath_proteome <- read_proteome(file_path, format = "fasta")

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # download the genome of Arabidopsis thaliana from refseq
#  # and store the corresponding genome CDS file in '_ncbi_downloads/CDS'
#  getCDS( db       = "refseq",
#          kingdom  = "plant",
#          organism = "Arabidopsis thaliana",
#          path     = file.path("_ncbi_downloads","CDS") )

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # path to CDS file: '_ncbi_downloads/CDS/Arabidopsis_thaliana_rna.fna.gz'
#  file_path <- file.path("_ncbi_downloads","CDS","Arabidopsis_thaliana_rna.fna.gz")
#  # read CDS as data.table object
#  Ath_cds <- read_cds(file_path, format = "fasta")

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # download the proteome of Arabidopsis thaliana from refseq
#  # and store the corresponding proteome file in '_ncbi_downloads/proteomes'
#  getProteome( db       = "refseq",
#               kingdom  = "plant",
#               organism = "Arabidopsis thaliana",
#               path     = file.path("_ncbi_downloads","proteomes") )

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # search for filters related to tair
#  organismFilters("Arabidopsis thaliana", topic = "tair")

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  # search for attributes related to refseq
#  organismAttributes("Arabidopsis thaliana", topic = "refseq")

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  bm <- biomart(genes      = c("AT3G01360", "AT3G01540"),
#                mart       = "plants_mart_26",
#                dataset    = "athaliana_eg_gene",
#                attributes = "refseq_peptide",
#                filters    = "tair_locus")
#  
#  bm

## ----eval=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  genes_of_interest <- which(sapply(paste0(bm[ , "refseq_peptide"],".1"),
#                                    function(gene) stringr::str_detect(gene,Ath_proteome[ , geneids])))
#  
#  na.omit(Ath_proteome[genes_of_interest , list(geneids, seqs)])

