#-----------------------------------------------------------------------------------#
# Section 2.3.5 DNT NAM: OP results section
# 
# March 19, 2020
# updated 14 May 2020
# author: paul-friedman.katie@epa.gov
#-----------------------------------------------------------------------------------#

#-----------------------------------------------------------------------------------#
# loading libraries
#-----------------------------------------------------------------------------------#

rm(list = ls())

library(data.table)
library(openxlsx)
library(dplyr)
library(magrittr)
library(reshape2)
library(stringr)
library(gplots)
library(viridis)

setwd('./public_code') #set working directory

# Fresh load of source data
load('./source/op_listinfo.RData')
load('./source/HCI_13Apr2020.RData')
load('./source/NHEERL_MEA_dev_13Apr2020.RData')

#-----------------------------------------------------------------------------------#
# Qualitative results: heatmaps
#-----------------------------------------------------------------------------------#

## MEA NFA heatmap first

op.list[,is.oxon := 1]
unique(op.list[order(PREFERRED_NAME),c('DTXSID', 'PREFERRED_NAME')])
op.list[DTXSID %in% c('DTXSID9032329','DTXSID2020347', 'DTXSID9020407',
                      'DTXSID7020479', 'DTXSID4020791','DTXSID4032459',
                      'DTXSID5024261', 'DTXSID2022254') ,is.oxon :=0]

mc5.mea.dev[dsstox_substance_id %in% op.list[,DTXSID],is.oxon := 1] 
mc5.mea.dev[dsstox_substance_id %in% c('DTXSID9032329','DTXSID2020347', 'DTXSID9020407',
                                       'DTXSID7020479', 'DTXSID4020791','DTXSID4032459',
                                       'DTXSID5024261', 'DTXSID2022254') ,is.oxon :=0]

mat.op <- dcast.data.table(mc5.mea.dev[dsstox_substance_id %in% op.list[,DTXSID]],
                           chnm + + casn + dsstox_substance_id + spid + is.oxon ~ aenm,
                           value.var = c('modl_ga')
)

mat.op[, names := paste0(chnm, "_", spid)]
mat2.op <- mat.op[,lapply(.SD, function(x){ifelse(is.na(x),6,x)}), .SDcol=c(6:42)]
matrix <- as.matrix(mat2.op[,1:36])
rownames(matrix) <- mat2.op[,names]

mea.dev.2 <- as.data.table(colnames(matrix))
mea.dev.2$number <- mea.tbl$number[match(mea.dev.2$V1, mea.tbl$aenm)]
mea.dev.2$activity <- mea.tbl$activity[match(mea.dev.2$V1, mea.tbl$aenm)]

oxon.tbl <- as.data.table(rownames(matrix))
oxon.tbl[,PREFERRED_NAME := tstrsplit(V1,"_",fixed=TRUE, keep=1L)][]
oxon.tbl[op.list, is.oxon := is.oxon, on=.(PREFERRED_NAME)]
oxon.tbl[is.oxon==0, color := 'white']
oxon.tbl[is.oxon==1, color := 'gray']

file.dir <- paste("./output/figures/", sep="")
file.name <- paste("/fig_mea_nfa_op_ac50_heatmap_oxon", Sys.Date(), ".png", sep="")
file.path <- paste(file.dir, file.name, sep="")
dir.create(path=file.dir, showWarnings = FALSE, recursive = TRUE)
png(file.path, 
    width = 10, 
    height = 8, 
    units = "in",
    res = 300)

heatmap.2(matrix, scale='none', 
          col=viridis(20,option='D'), 
          trace='none', density.info = 'none',
          colsep = c(1:35), rowsep = c(1:36), sepcolor='white', sepwidth=c(0.05,0.05),
          hclustfun = function(x) hclust(x, method="ward.D2"),
          labRow = row.names(matrix),
          labCol = substr(colnames(matrix),16,50),
          #labCol = NA,
          margins = c(12,14),
          cexRow =1,
          cexCol=0.8,
          ColSideColors = as.character(as.numeric(mea.dev.2$number)),
          #ColSideColors = mea.dev.2$color,
          RowSideColors=oxon.tbl$color,
          srtCol=45,
          keysize=0.7)

legend(xpd=TRUE, x=0.8, y=1.1,
       title='Activity Type',
       legend = unique(mea.dev.2$activity),
       col = unique(as.numeric(mea.dev.2$number)), 
       #col = unique(mea.dev.2$color),
       #col = c('dark blue', 'light blue', 'dark green', 'light green'),
       bty='n',
       lty= 1,             
       lwd = 5,           
       cex=0.8
)

legend(xpd=TRUE, x=0.6, y=1.1,
       title='Oxon structure',
       legend = unique(oxon.tbl$is.oxon),
       col = unique(oxon.tbl$color), 
       #col = unique(mea.dev.2$color),
       #col = c('dark blue', 'light blue', 'dark green', 'light green'),
       bty='n',
       lty= 1,             
       lwd = 5,           
       cex=0.8
)

dev.off()

## HCI heatmap for OPs

# labeling for oxon structure type 
hci.mc5[dsstox_substance_id %in% op.list[,DTXSID],is.oxon := 1] 
hci.mc5[dsstox_substance_id %in% c('DTXSID9032329','DTXSID2020347', 'DTXSID9020407',
                                   'DTXSID7020479', 'DTXSID4020791','DTXSID4032459',
                                   'DTXSID5024261', 'DTXSID2022254') ,is.oxon :=0]

hci.op <- dcast.data.table(hci.mc5[dsstox_substance_id %in% op.list[,DTXSID]],
                           chnm + + casn + dsstox_substance_id + spid + is.oxon ~ aenm,
                           value.var = c('modl_ga')
)

hci.op[, names := paste0(chnm, "_", spid)]
length(unique(hci.op$names)) # 35 samples for 27 substances
head(hci.op)

hci2.op <- hci.op[,lapply(.SD, function(x){ifelse(is.na(x),6,x)}), .SDcol=c(6:27)]
colnames(hci2.op)
matrix2 <- as.matrix(hci2.op[,1:21])
rownames(matrix2) <- hci2.op[,names]

hci.tbl.2 <- as.data.table(colnames(matrix2))
hci.tbl$aenm <- hci.mc5$aenm[match(hci.tbl$aeid, hci.mc5$aeid)]
hci.tbl.2$number <- hci.tbl$number[match(hci.tbl.2$V1, hci.tbl$aenm)]
hci.tbl.2$activity <- hci.tbl$activity[match(hci.tbl.2$V1, hci.tbl$aenm)]

oxon.tbl <- as.data.table(rownames(matrix2))
oxon.tbl[,PREFERRED_NAME := tstrsplit(V1,"_",fixed=TRUE, keep=1L)][]
oxon.tbl[op.list, is.oxon := is.oxon, on=.(PREFERRED_NAME)]
oxon.tbl[is.oxon==0, color := 'white']
oxon.tbl[is.oxon==1, color := 'gray']

file.dir <- paste("./output/figures/", sep="")
file.name <- paste("/fig_hci_op_ac50_heatmap_oxon", Sys.Date(), ".png", sep="")
file.path <- paste(file.dir, file.name, sep="")
dir.create(path=file.dir, showWarnings = FALSE, recursive = TRUE)
png(file.path, width = 10, height = 8, res = 300, units = "in")

heatmap.2(matrix2, scale='none',
          col=viridis(20,option='D'), 
          trace='none', density.info = 'none',
          colsep = c(1:22), rowsep = c(1:27), sepcolor='white', sepwidth=c(0.05,0.05),
          hclustfun = function(x) hclust(x, method="ward.D2"),
          labRow = row.names(matrix2),
          labCol = substr(colnames(matrix2),11,50),
          margins = c(14,18),
          cexRow =1,
          cexCol=0.8,
          ColSideColors = as.character(as.numeric(hci.tbl.2$number)),
          RowSideColors = oxon.tbl$color,
          srtCol=45,
          keysize = 0.7)

legend(xpd=TRUE, x=0.77, y=1.1,
       title='Activity Type',
       legend = unique(hci.tbl.2$activity),
       col = unique(as.numeric(hci.tbl.2$number)), 
       bty='n',
       lty= 1,             
       lwd = 5,           
       cex=0.8
)

legend(xpd=TRUE, x=0.65, y=1.13,
       title='Oxon structure',
       legend = unique(oxon.tbl$is.oxon),
       col = unique(oxon.tbl$color), 
       #col = unique(mea.dev.2$color),
       #col = c('dark blue', 'light blue', 'dark green', 'light green'),
       bty='n',
       lty= 1,             
       lwd = 5,           
       cex=0.8
)
dev.off()

# Make OP potency tables
#-----------------------------------------------------------------------------------#

hci.op <- hci.mc5[dsstox_substance_id %in% op.list[,DTXSID]]
mea.op <- mc5.mea.dev[dsstox_substance_id %in% op.list[,DTXSID]]

colnames(hci.op)
colnames(mea.op)

hci.op[ hci.tbl, activity.type :=activity, on='aeid']
mea.op[ mea.tbl, activity.type :=activity, on='aenm']
op.com <- rbind(hci.op, mea.op)
op.com.pos <- op.com[hitc==1 & use.me==1]

## create a summary table by op

# mea nfa
op.com[aeid %in% c(2529:2530), mea.min.cyt:= min(modl_ga, na.rm=TRUE), by=spid]
op.com[, mea.min.cyt := min(mea.min.cyt, na.rm=TRUE), by=spid]
op.com[, mea.min.cyt := ifelse(is.infinite(mea.min.cyt), NA, mea.min.cyt)]
unique(op.com[,c('spid','chnm','mea.min.cyt','logc_max')])
op.com[is.na(mea.min.cyt), mea.min.cyt := logc_max]

## hci is by activity type
# hN2 NOG
op.com[aeid==2792 & hitc==1 & use.me==1, hci.hn2.nog.cyt := min(modl_ga, na.rm=TRUE), by=spid]
op.com[, hci.hn2.nog.cyt := min(hci.hn2.nog.cyt, na.rm=TRUE), by=spid]

op.com[aeid %in% c(2789:2791), hci.hn2.nog.min := min(modl_ga, na.rm=TRUE), by=spid]
op.com[, hci.hn2.nog.min := min(hci.hn2.nog.min, na.rm=TRUE), by=spid]

# rat NOG
op.com[aeid==2780 & hitc==1 & use.me==1, hci.rat.nog.cyt := min(modl_ga,na.rm=TRUE), by=spid]
op.com[, hci.rat.nog.cyt := min(hci.rat.nog.cyt, na.rm=TRUE), by=spid]

op.com[aeid %in% c(2777:2779), hci.rat.nog.min := min(modl_ga, na.rm=TRUE), by=spid]
op.com[, hci.rat.nog.min := min(hci.rat.nog.min, na.rm=TRUE), by=spid]

# Synaptogenesis and neurite maturation
op.com[aeid==2787 & hitc==1 & use.me==1, hci.synap.cyt := min(modl_ga, na.rm=TRUE), by=spid]
op.com[, hci.synap.cyt := min(hci.synap.cyt,na.rm=TRUE), by=spid]

op.com[aeid %in% c(2783,2784), hci.synapnm.min := min(modl_ga, na.rm=TRUE), by=spid]
op.com[, hci.synapnm.min := min(hci.synapnm.min, na.rm=TRUE), by=spid]

op.com[aeid %in% c(2781,2782,2785,2786,2788), hci.synap.min := min(modl_ga,na.rm=TRUE), by=spid]
op.com[, hci.synap.min := min(hci.synap.min, na.rm=TRUE), by=spid]

# Proliferation
op.com[aeid==2796 & hitc==1 & use.me==1, hci.hNP1.cyt := min(modl_ga, na.rm=TRUE), by=spid]
op.com[,hci.hNP1.cyt := min(hci.hNP1.cyt, na.rm=TRUE), by=spid]
op.com[aeid %in% hci.tbl[activity %in% c('Proliferation, hNP1'),aeid],
       hci.hNP1.pro.min := min(modl_ga, na.rm=TRUE), by=spid]
op.com[,hci.hNP1.pro.min := min(hci.hNP1.pro.min, na.rm=TRUE), by=spid]



unique(op.com[,c('spid','chnm','hci.hNP1.cyt','hci.synap.cyt',
                 'hci.hn2.nog.cyt','hci.rat.nog.cyt','logc_max')])

for (j in 1:ncol(op.com)) set(op.com, which(is.infinite(op.com[[j]])), j, NA)

# make any NA cytotox assays equal to max conc tested
op.com[is.na(hci.hNP1.cyt), hci.hNP1.cyt := logc_max]
op.com[is.na(hci.synap.cyt), hci.synap.cyt := logc_max]
op.com[is.na(hci.hn2.nog.cyt), hci.hn2.nog.cyt := logc_max]
op.com[is.na(hci.rat.nog.cyt), hci.rat.nog.cyt := logc_max]

op.mea.sum <- op.com[aeid %in% mea.tbl[,aeid], list(
  dsstox_substance_id = dsstox_substance_id,
  chnm = chnm,
  casn = casn,
  min.mea.nfa = min(modl_ga, na.rm=TRUE),
  med.mea.nfa = median(modl_ga, na.rm=TRUE),
  max.mea.nfa = max(modl_ga, na.rm=TRUE),
  mea.sum = sum(hitc),
  mea.min.cyt = mea.min.cyt
), by=list(spid)]

op.mea.sum[,min.mea.nfa := ifelse(is.infinite(min.mea.nfa), NA, min.mea.nfa)]
op.mea.sum[,mea.sel := round(mea.min.cyt - min.mea.nfa, 2)]
op.mea.sum <- unique(op.mea.sum)

op.mea.sum[, min.mea.uM := ifelse(!is.na(min.mea.nfa), 10^min.mea.nfa, NA)]
op.mea.sum[, med.mea.uM := ifelse(!is.na(med.mea.nfa), 10^med.mea.nfa, NA)]

op.mea.sum2 <- op.mea.sum %>% mutate_at(vars(min.mea.uM, med.mea.uM, mea.sel), ~round(.,2)) %>% data.table()

op.hci.sum <- unique(op.com[aeid %in% c(hci.tbl$aeid), list(
  dsstox_substance_id = dsstox_substance_id,
  chnm = chnm,
  casn = casn,
  min.hci = min(modl_ga, na.rm=TRUE),
  med.hci = median(modl_ga, na.rm=TRUE),
  max.hci = max(modl_ga, na.rm=TRUE),
  hci.sum = sum(hitc),
  hci.hn2.nog.cyt = min(hci.hn2.nog.cyt, na.rm=TRUE),
  hci.hn2.nog.min = min(hci.hn2.nog.min, na.rm=TRUE),
  hci.rat.nog.cyt = min(hci.rat.nog.cyt, na.rm=TRUE),
  hci.rat.nog.min = min(hci.rat.nog.min, na.rm=TRUE),
  hci.synap.cyt = min(hci.synap.cyt, na.rm=TRUE),
  hci.synapnm.min = min(hci.synapnm.min, na.rm=TRUE),
  hci.synap.min = min(hci.synap.min, na.rm=TRUE),
  hci.hNP1.cyt = min(hci.hNP1.cyt, na.rm=TRUE),
  hci.hNP1.pro.min = min(hci.hNP1.pro.min, na.rm=TRUE)
  
), by=list(spid)])

for (j in 1:ncol(op.hci.sum)) set(op.hci.sum, which(is.infinite(op.hci.sum[[j]])), j, NA)


op.hci.sum[, hci.hN2.nog.sel := round(hci.hn2.nog.cyt - hci.hn2.nog.min,2)]
op.hci.sum[, hci.rat.nog.sel := round(hci.rat.nog.cyt - hci.rat.nog.min, 2)]
op.hci.sum[, hci.synapnm.sel := round(hci.synap.cyt - hci.synapnm.min, 2)]
op.hci.sum[, hci.synap.sel := round(hci.synap.cyt - hci.synap.min, 2)]
op.hci.sum[, hci.hNP1.pro.sel := round(hci.hNP1.cyt - hci.hNP1.pro.min, 2)]

op.hci.sum[, min.hci.uM := ifelse(!is.na(min.hci), 10^min.hci, NA)]
op.hci.sum[, med.hci.uM := ifelse(!is.na(med.hci), 10^med.hci, NA)]

op.hci.sum2 <- op.hci.sum %>% mutate_at(vars(min.hci.uM, med.hci.uM, hci.hN2.nog.sel,
                                             hci.rat.nog.sel,hci.synapnm.sel,hci.synap.sel,hci.hNP1.pro.sel), ~round(.,2)) %>% data.table()

## now do AUC calcs
mc5 <- as.data.table(op.com)

# functions needed to fit

hill_curve <- function(hill_tp, hill_ga, hill_gw, lconc){
  return(hill_tp/(1+10^((hill_ga - lconc)*hill_gw)))
}

gnls_curve <- function(top, ga, gw, la, lw, lconc){
  gain <- 1/(1+10^((ga - lconc)*gw))
  loss <- 1/(1+10^((lconc - la)*lw))
  return(top*gain*loss)
}

# fit all hitc==1 curves in the mc5

mc5[use.me ==1L & modl == "hill", 
    auc := mapply(function(lower, 
                           upper, 
                           hill_tp, 
                           hill_ga, 
                           hill_gw) integrate(hill_curve, 
                                              lower, 
                                              upper, 
                                              hill_tp=hill_tp, 
                                              hill_ga=hill_ga, 
                                              hill_gw=hill_gw)$value,
                  lower = mc5[use.me ==1L & modl == "hill", logc_min], 
                  upper = mc5[use.me ==1L & modl == "hill", logc_max], 
                  hill_tp = mc5[use.me ==1L & modl == "hill", hill_tp], 
                  hill_ga = mc5[use.me ==1L & modl == "hill", hill_ga], 
                  hill_gw = mc5[use.me ==1L & modl == "hill", hill_gw])]

mc5[hitc == 1L & use.me==1L & modl == "gnls", 
    auc := mapply(function(lower, 
                           upper, 
                           top, 
                           ga, 
                           gw, 
                           la, 
                           lw) integrate(gnls_curve, 
                                         lower, 
                                         upper, 
                                         top=top, 
                                         ga=ga, 
                                         gw=gw, 
                                         la=la, 
                                         lw=lw)$value,
                  lower = mc5[use.me ==1L & modl == "gnls", logc_min], 
                  upper = mc5[use.me ==1L & modl == "gnls", logc_max], 
                  top = mc5[use.me ==1L & modl == "gnls", gnls_tp], 
                  ga = mc5[use.me ==1L & modl == "gnls", gnls_ga], 
                  gw = mc5[use.me ==1L & modl == "gnls", gnls_gw],
                  la = mc5[use.me ==1L & modl == "gnls", gnls_la], 
                  lw = mc5[use.me ==1L & modl == "gnls", gnls_lw])]

mc5[is.na(auc), auc :=0]

# make a summary table

mc5[aeid %in% hci.tbl[,aeid],auc.hci.sum := sum(auc), by = spid]
mc5[aeid %in% mea.tbl[,aeid],auc.mea.sum := sum(auc), by = spid]

mc5[,log2.auc.hci.sum := ifelse(!is.na(auc.hci.sum), log2(auc.hci.sum), NA)]
mc5[,log2.auc.mea.sum := ifelse(!is.na(auc.mea.sum), log2(auc.mea.sum), NA)]

mea95 <- quantile(mc5$log2.auc.mea.sum, 0.95, na.rm=TRUE)
hci95 <- quantile(mc5$log2.auc.hci.sum, 0.95, na.rm=TRUE)

mc5[,scaled.auc.mea := ifelse(!is.na(log2.auc.mea.sum), round(log2.auc.mea.sum/mea95, 2), NA)]
mc5[,scaled.auc.hci := ifelse(!is.na(log2.auc.hci.sum), round(log2.auc.hci.sum/hci95,2), NA)]

unique(mc5[,c('chnm','spid','scaled.auc.mea','scaled.auc.hci')])

mea.auc <- unique(mc5[aeid %in% mea.tbl[,aeid],c('chnm','spid','scaled.auc.mea')])

op.mea.sum2$scaled.auc.mea <- mea.auc$scaled.auc.mea[match(op.mea.sum2$spid,
                                                           mea.auc$spid)]
hci.auc <- unique(mc5[aeid %in% hci.tbl[,aeid],c('chnm','spid','scaled.auc.hci')])

op.hci.sum2$scaled.auc.hci <- hci.auc$scaled.auc.hci[match(op.hci.sum2$spid,
                                                           hci.auc$spid)]


## now write data files

op.combined.mc5.df <- as.data.frame(op.com)
op.mea.potency.table.df <- as.data.frame(op.mea.sum2)
op.hci.potency.table.df <- as.data.frame(op.hci.sum2)
op.mc5.auc.calcs.df <- as.data.frame(mc5)

list_op_summary <- list('op.combined.mc5'=op.combined.mc5.df,
                        'op.mea.potency.tbl'= op.mea.potency.table.df,
                        'op.hci.potency.tbl' = op.hci.potency.table.df,
                        'op.auc.calcs' = op.mc5.auc.calcs.df)
write.xlsx(list_op_summary, 
           keepNA=TRUE,
           file='./output/Tbl13_op_mea_hci_potency_summary_22Apr2020.xlsx')

save(op.com,
     op.mea.sum2,
     op.hci.sum2,
     mc5,
     file='./source/op_mea_hci_potency_summary.RData')

#-----------------------------------------------------------------------------------#
## Compare to the rest of ToxCast (supplemental Appendix C)
#-----------------------------------------------------------------------------------#

spids.op <- op.sample[,spid]

# need to switch to public DB to only show comparison results from invitrodb v3.3.
library(tcpl)
# stg_invitro = invitrodb v3.3
tcplConf(user='', 
         pass='', 
         db='stg_invitro', #comparison is to invitrodb version 3.3 to be released August 2020
         drvr='MySQL', 
         host='') #insert host

mc5.op <- tcplPrepOtpt(tcplLoadData(lvl=5, type='mc',fld='spid',val=spids.op))
mc6 <- tcplPrepOtpt(tcplLoadData(lvl=6, fld='m4id', val=mc5.op$m4id, type='mc'))
setDT(mc6)
mc6_mthds <- mc6[ , .( mc6_mthd_id = paste(mc6_mthd_id, collapse=",")), by = m4id]
mc6_flags <- mc6[ , .( flag = paste(flag, collapse=";")), by = m4id]
mc5.op$mc6_flags <- mc6_mthds$mc6_mthd_id[match(mc5.op$m4id, mc6_mthds$m4id)]
mc5.op[, flag.length := ifelse(!is.na(mc6_flags), count.fields(textConnection(mc6_flags), sep =','), NA)]

# filter the dataset, with coarse filters
mc5.op[hitc==1 & flag.length < 3, use.me := 1]
mc5.op[hitc==1 & is.na(flag.length), use.me := 1]
mc5.op[hitc==1 & flag.length >= 3, use.me := 0]
mc5.op[fitc %in% c(36,45), use.me := 0]
mc5.op[hitc==-1, use.me := 0] # make hitc interpretable as a positive sum
mc5.op[use.me==0, modl_ga := as.numeric(NA)]
mc5.op[use.me==0, hitc := 0]
mc5.op[hitc==0, modl_ga := as.numeric(NA)]
#mc5.op[hitc==0 & !is.na(modl_ga)]

# get the burst values for ToxCast

con <- dbConnect(drv = RMySQL::MySQL(), 
                 user="", #fill user
                 password = "pass", # fill pass
                 host = "", 
                 database = stg_invitro)

burst <- dbGetQuery(con,"select * from stg_invitro.cytotox inner join stg_invitro.chemical
                    on cytotox.chid = chemical.chid" ) %>% data.table()

burst.op <- burst[dsstox_substance_id %in% op.list[,DTXSID]]

op.com.pos$cytotox_lower_bound_log <- burst.op$cytotox_lower_bound_log[match(op.com.pos$DTXSID,
                                                                             burst.op$dsstox_substance_id)]

mc5.op$cytotox_lower_bound_log <- burst.op$cytotox_lower_bound_log[match(mc5.op$dsstox_substance_id,
                                                                         burst.op$dsstox_substance_id)]


# set up minimum potency values
op.com.pos[, min.potency := min(modl_ga, na.rm=TRUE), by=DTXSID]
mc5.op[,min.toxcast.potency := min(modl_ga, na.rm=TRUE), by=chid]
mc5.op[, modl_ga_fifth := quantile(modl_ga, probs=c(0.05), na.rm=TRUE), by=list(chid)]

mc5.op$min.dnt.potency <- op.com.pos$min.potency[match(mc5.op$dsstox_substance_id, op.com.pos$DTXSID)]
mc5.op[,selectivity := cytotox_lower_bound_log - min.dnt.potency]

# calc hitrates and assays screened
hit.rates <- mc5.op[ , list(
  total.assay.screened  = .N, #total number of aeids tested in mc
  active.assay.count  = as.double(lw(hitc==1)),  # active count
  inactive.assay.count  = as.double(lw(hitc==0)),  #inactive count
  active.percent = round((lw(hitc==1)/.N)*100,2), #active percent
  inactive.percent = round((lw(hitc==0)/.N)*100,2) #inactive percent
), by = list(chid, chnm, casn, dsstox_substance_id)]

mc5.op$hitrate <- hit.rates$active.percent[match(mc5.op$casn,hit.rates$casn)]
mc5.op$active.assay.count <- hit.rates$active.assay.count[match(mc5.op$casn,hit.rates$casn)]
mc5.op$total.assay.screened <- hit.rates$total.assay.screened[match(mc5.op$casn,hit.rates$casn)]

mc5.op.long <- melt.data.table(mc5.op, id.vars = c('chnm', 
                                                   'casn',
                                                   'dsstox_substance_id',
                                                   'selectivity',
                                                   'hitrate',
                                                   'active.assay.count',
                                                   'total.assay.screened'), 
                               measure.vars = c('modl_ga_fifth',
                                                'min.dnt.potency',
                                                'cytotox_lower_bound_log'),
                               variable.name = 'Comparator')

#-----------------------------------------------------------------------------------#
# Visualization 

# viridis colors manually selected

mc5.dnt <- ggplot(data=mc5.op, aes(x=reorder(factor(chnm), modl_ga_fifth), y=modl_ga))+
  geom_boxplot(outlier.shape=NA)+
  geom_point(data = mc5.op.long, 
             aes(x = factor(chnm), 
                 y = value, shape = factor(Comparator), color = factor(Comparator)), size = 2) +
  scale_color_manual(values=c("#481567FF","#1F968BFF","#95D840FF"),
                     breaks=c("modl_ga_fifth", "min.dnt.potency", "cytotox_lower_bound_log"),
                     labels=c("5th-%ile ToxCast AC50", "Min DNT-NAM AC50", "Burst"))+
  scale_shape_manual(values=c(15,16,17),
                     breaks=c("modl_ga_fifth", "min.dnt.potency", "cytotox_lower_bound_log"),
                     labels=c("5th-%ile ToxCast AC50", "Min DNT-NAM AC50", "Burst"))+
  xlab('Chemical')+
  ylab('log10 micromolar value')+
  theme_bw() +
  geom_text(data=mc5.op.long , 
            aes(x=chnm, 
                y= 4, 
                label=paste(active.assay.count, "/", total.assay.screened),
                group=`chnm`),  size=3,
            position = position_dodge(1))+
  theme(legend.position="top",
        legend.title = element_blank())+
  scale_y_continuous(breaks=seq(-5,5,1))+
  coord_flip(ylim=c(-4,5))

file.dir <- paste("./output/figures/", sep="")
file.name <- paste("/SuppApp_Fig_toxcast_v_dntnam_potency", Sys.Date(), ".png", sep="")
file.path <- paste(file.dir, file.name, sep="")
dir.create(path=file.dir, showWarnings = FALSE, recursive = TRUE)
png(file.path, width = 5, height = 6, unit='in',res = 600)
mc5.dnt
dev.off()

#-----------------------------------------------------------------------------------#
## Just for reference, create ToxCast curve-fits by chid
#-----------------------------------------------------------------------------------#

# create all plots by chid for the entire dataset
load(file='./source/op_mea_hci_potency_summary.RData')

setwd("./output/toxcast_curvefits_hitc1")
graph <- op.com[!is.na(chnm)]

graph <- graph[hitc==1]

graphics.off()
for (i in unique(graph$chnm)){
  pdf(file=paste(i,
                 '_hitc1only_',
                 format(Sys.Date(),
                        "%y%m%d.pdf"),
                 sep="_"),
      height=6,
      width=10,
      pointsize=10)
  tcplPlotM4ID(graph[chnm==i]$m4id, lvl=6)
}
graphics.off()
