README.md
January 18, 2021 ยท View on GitHub
R interface to the Chemical Translation Service (CTS)
CTSgetR provides a consitent interface to translation of chemical names and over 200 database identifiers including InChIKey, HMDB, KEGG and PubChem. Translation of chemical names is hard. Use CTSgetR to robustly translate chemical names to other identifiers through 1) conversion to InChIKey 2) biological or popularity scoring and 3) translation to over 200 biological database identifiers. CTSgetR uses a sqlite database to cache and speed all of your routine translations.
This package supports metabolite identifier translation:
using R
Installation
install_github("dgrapov/CTSgetR")
Make sure CTS API is available
library(CTSgetR)
GET('https://cts.fiehnlab.ucdavis.edu/services') %>%
http_status(.) %>%
{if( .$category != 'Success'){stop('Oops looks like https://cts.fiehnlab.ucdavis.edu/services is down!') }}
View some of the possible translation options between > 200 databases
trans<-unlist(valid_from())
head(trans,10)
## [1] "BioCyc" "CAS"
## [3] "ChEBI" "Chemical Name"
## [5] "Human Metabolome Database" "InChIKey"
## [7] "KEGG" "LMSD"
## [9] "LipidMAPS" "PubChem CID"
Find a database of interest
want<-'CID'
trans[grepl(want,trans,ignore.case=TRUE)]
## [1] "PubChem CID"
Initialize a local database to speed up routine queries
db_name<-'ctsgetr.sqlite'
init_CTSgetR_db(db_name)
db_stats()
Translation examples
Chemical Name to InChIKey
db_name<-'ctsgetr.sqlite' # local cache
id<-c("alanine",'lactic acid')
from<-"Chemical Name"
to<-"InChIKey"
CTSgetR(id,from,to,db_name=db_name)
## id InChIKey
## 1 alanine QNAYBMKLOCPYGJ-REOHCLBHSA-N
## 2 lactic acid JVTAAEKCZFNVCJ-UHFFFAOYSA-N
One identifier to many
The example below shows the alternative data.frame input format for more complex queries.
id<-c("alanine",'lactic acid')
from<-"Chemical Name"
to<- c( "PubChem CID", "KEGG","Human Metabolome Database")
CTSgetR(id,from,to,db_name=db_name)
## id Human Metabolome Database KEGG PubChem CID
## 1 alanine HMDB0000161 C00041 5950
## 2 lactic acid HMDB0144295 C01432 19789253
Many identifiers to one
Build up complex queries by combining data frames of id, from to to values.
#from many to many
args <-structure(list(id = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 4L, 4L), .Label = c("alanine", "foo", "lactic acid", "HMDB0000161"
), class = "factor"), from = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L), .Label = c("Chemical Name", "Human Metabolome Database"
), class = "factor"), to = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L, 2L, 1L), .Label = c("PubChem CID", "KEGG", "Human Metabolome Database"
), class = "factor")), class = "data.frame", row.names = c(NA,
-11L))
args
## id from to
## 1 alanine Chemical Name PubChem CID
## 2 foo Chemical Name PubChem CID
## 3 lactic acid Chemical Name PubChem CID
## 4 alanine Chemical Name KEGG
## 5 foo Chemical Name KEGG
## 6 lactic acid Chemical Name KEGG
## 7 alanine Chemical Name Human Metabolome Database
## 8 foo Chemical Name Human Metabolome Database
## 9 lactic acid Chemical Name Human Metabolome Database
## 10 HMDB0000161 Human Metabolome Database KEGG
## 11 HMDB0000161 Human Metabolome Database PubChem CID
args %>%
split(.,.$from) %>%
map(~CTSgetR(.$id,.$from,.$to,db_name=db_name)) %>%
bind_rows(.)
## id Human Metabolome Database KEGG PubChem CID
## 1 alanine HMDB0000161 C00041 5950
## 2 foo <NA> <NA> <NA>
## 3 lactic acid HMDB0144295 C01432 19789253
## 4 HMDB0000161 <NA> C00041 5950
Deploy
CTSgetRas adockerizedAPI
The following docker image and docker-compose commands can be used to build and run the CTSgetR package as an opencpu based API.
-
CTSgetR image
-
ocpuclient: client library for accessign
CTSgetRAPI
The CTSgetR image contains an opencpu and Rstudio server
-
localhost/ocpu/: opencpu-server -
localhost/rstudio/: rstudio server (use user: opencpu and password:)
Build docker image
build
export rstudio_pass=mypassword # rstudio server password for user opencpu
docker-compose -f docker-compose.yml build --force-rm
Launch API
#mount to persist internal sqlite DB between updates
export ctsgetr_db_mount=<local path to save database e.g. /mypath>
docker-compose -f docker-compose.yml up -d
Test API endpoints
bash
curl http://localhost/ocpu/library/CTSgetR/R/heartbeat
R
heartbeat
library(ocpuclient)
base_url<-'http://localhost/ocpu/'
endpoint<-'library/CTSgetR/R/heartbeat'
url<-paste0(base_url,endpoint)
post_ocpu(url=url)
translation
#translate
endpoint<-'library/CTSgetR/R/CTSgetR'
url<-paste0(base_url,endpoint)
id <-
c("C15973",
"C00026")
from <- "KEGG"
to <- "PubChem CID"
body<-list(id=id,from=from,to=to,db_name=db_name)
post_ocpu(url=url,body=body)
Launch
shinyUI using asynchronousopencpuAPI
The following example shows a how to use a shiny module combined with futures and promises R packages to connect to an opencpu API uisng async calls.
library(shiny)
library(tippy)
library(CTSgetR) # local calls
library(ocpuclient) # CTSgetR opencpu API calls
Specify local database or API options
#one of local
Sys.setenv('ctsgetr_DB'='inst/ctsgetr.sqlite') #see section `in R` showing how to initialize a local databse
#or API
Sys.setenv('ctsgetr_DB'='/ctsgetr/inst/ctsgetr.sqlite') # in API docker for mount
Sys.setenv('CTSgetR_API'='http://localhost/ocpu/library/CTSgetR/R/CTSgetR') # url of API endpoint
User input translations
library(promises)
library(future)
plan(multisession)
#module
ui <- fluidPage(
sidebarLayout(position = "left",
sidebarPanel(tagList(mod_CTSgetR_ui("translate"))),
mainPanel(verbatimTextOutput("main_out")))
)
server <- function(input, output, session) {
translation <- mod_CTSgetR_server('translate')
output$main_out <- renderPrint({
translation() %...>% print(.)
})
}
shinyApp(ui, server)
Connect to other shiny components
library(promises)
library(future)
plan(multisession)
#make `example` a reactive returning a data frame to update dynamically
example<-data.frame('chemical_name' = c('alanine','Pyruvic acid'))
#module
ui <- fluidPage(
sidebarLayout(position = "left",
sidebarPanel(tagList(mod_CTSgetR_ui("translate"))),
mainPanel(verbatimTextOutput("main_out")))
)
server <- function(input, output, session) {
translation <- mod_CTSgetR_server('translate',data=example)
output$main_out <- renderPrint({
translation() %...>% print(.)
})
}
shinyApp(ui, server)