#-----------------------------------------------------------------------------------#
# Section 2.3.4 DNT NAM: Evaluation of the assays
# 
# March 19, 2020
# updated 14 May 2020
# author: paul-friedman.katie@epa.gov
#-----------------------------------------------------------------------------------#

#-----------------------------------------------------------------------------------#
# loading libraries
#-----------------------------------------------------------------------------------#

rm(list = ls())

library(tcpl)
# use internal research version, invitrodb with new datasets
tcplConf(user='', pass='', #insert user and pass
         db='invitrodb', drvr='MySQL', 
         host='') #insert host
tcplConfList()
library(data.table)
library(dplyr)
library(ggplot2)
library(viridis)
library(magrittr)
library(openxlsx)
library(reshape2)
library(RMySQL)
library(stringr)

print(sessionInfo())

#R version 3.6.1 (2019-07-05)
#Platform: x86_64-w64-mingw32/x64 (64-bit)
#Running under: Windows >= 8 x64 (build 9200)

#attached base packages:
#  [1] stats     graphics  grDevices utils     datasets  methods   base     

#other attached packages:
#  [1] tcpl_2.0.2        cowplot_1.0.0     viridis_0.5.1     viridisLite_0.3.0 ggforce_0.3.1     ggplot2_3.2.1    
#[7] httk_2.0.1        stringr_1.4.0     reshape2_1.4.3    magrittr_1.5      dplyr_0.8.3       openxlsx_4.1.4   
#[13] data.table_1.12.8 RMySQL_0.10.17    DBI_1.1.0        

#loaded via a namespace (and not attached):
#  [1] Rcpp_1.0.4          msm_1.6.8           mvtnorm_1.1-0       lattice_0.20-38     assertthat_0.2.1    zeallot_0.1.0      
#[7] digest_0.6.23       R6_2.4.0            plyr_1.8.4          chron_2.3-54        backports_1.1.5     RSQLite_2.1.5      
#[13] survey_3.37         sqldf_0.4-11        pillar_1.4.2        rlang_0.4.1         lazyeval_0.2.2      rstudioapi_0.10    
#[19] blob_1.2.0          Matrix_1.2-17       gsubfn_0.7          proto_1.0.0         labeling_0.3        splines_3.6.1      
#[25] polyclip_1.10-0     bit_1.1-14          munsell_0.5.0       compiler_3.6.1      numDeriv_2016.8-1.1 pkgconfig_2.0.3    
#[31] mitools_2.4         tidyselect_0.2.5    tibble_2.1.3        gridExtra_2.3       expm_0.999-4        crayon_1.3.4       
#[37] withr_2.1.2         MASS_7.3-51.4       grid_3.6.1          gtable_0.3.0        lifecycle_0.1.0     scales_1.1.0       
#[43] zip_2.0.4           stringi_1.4.3       farver_2.0.1        vctrs_0.2.0         deSolve_1.27.1      RColorBrewer_1.1-2 
#[49] tools_3.6.1         bit64_0.9-7         glue_1.3.1          tweenr_1.0.1        purrr_0.3.3         parallel_3.6.1     
#[55] survival_2.44-1.1   colorspace_1.4-1    memoise_1.1.0  

setwd('./public_code') # set working directory

#-----------------------------------------------------------------------------------#
# Create data sets: requires tcpl and invitrodb.sql
#-----------------------------------------------------------------------------------#

## Make MEA DEV data package

shafer <- tcplLoadAeid(val=20, fld='asid',add.fld='acid')
mea.dev <- shafer[grepl('NHEERL_MEA_dev', aenm), aeid]
mea.dev.acid <- unique(tcplLoadAcid(val=mea.dev, fld='aeid')$acid)
mea.tbl <- tcplLoadAcid(val=mea.dev.acid, fld='acid', add.fld='aeid') 
mea.tbl$aenm <- shafer$aenm[match(mea.tbl$aeid,shafer$aeid)]

mc0.mea.dev <- tcplPrepOtpt(tcplLoadData(lvl=0,type='mc', fld='acid',val=mea.dev.acid))
mc1.mea.dev <- tcplPrepOtpt(tcplLoadData(lvl=1,type='mc', fld='acid',val=mea.dev.acid))
mc3.mea.dev <- tcplPrepOtpt(tcplLoadData(lvl=3, type='mc',fld='aeid',val=mea.dev))
mc5.mea.dev <- tcplPrepOtpt(tcplLoadData(lvl=5,type='mc', fld='aeid',val=mea.dev))
mc6 <- tcplPrepOtpt(tcplLoadData(lvl=6, fld='m4id', val=mc5.mea.dev$m4id, type='mc'))
setDT(mc6)
mc6_mthds <- mc6[ , .( mc6_mthd_id = paste(mc6_mthd_id, collapse=",")), by = m4id]
mc6_flags <- mc6[ , .( flag = paste(flag, collapse=";")), by = m4id]
mc5.mea.dev$mc6_flags <- mc6_mthds$mc6_mthd_id[match(mc5.mea.dev$m4id, mc6_mthds$m4id)]
mc5.mea.dev[, flag.length := ifelse(!is.na(mc6_flags), count.fields(textConnection(mc6_flags), sep =','), NA)]
mc5.mea.dev[hitc==1,ac50_uM := ifelse(!is.na(modl_ga), 10^modl_ga, NA)]
mc5.mea.dev[hitc==1,acc_uM := ifelse(!is.na(modl_acc), 10^modl_ga, NA)]

# filter the dataset, with coarse filters
mc5.mea.dev[hitc==1 & flag.length < 3, use.me := 1]
mc5.mea.dev[hitc==1 & is.na(flag.length), use.me := 1]
mc5.mea.dev[hitc==1 & flag.length >= 3, use.me := 0]
mc5.mea.dev[fitc %in% c(36,45), use.me := 0]
mc5.mea.dev[hitc==-1, use.me := 0] # make hitc interpretable as a positive sum
mc5.mea.dev[use.me==0, modl_ga := as.numeric(NA)]
mc5.mea.dev[use.me==0, hitc := 0]
mc5.mea.dev[hitc==0, modl_ga := as.numeric(NA)]

# label activity type
mea.tbl[acid %in% c(2471,2472,2473,2474), activity := 'General']
mea.tbl[acid %in% c(2475, 2476, 2477, 2478, 2481), activity := 'Bursting']
mea.tbl[acid %in% c(2479,2480,2482,2483,2484,2485,2486,2487), activity := 'Network Connectivity']
mea.tbl[acid %in% c(2488,2489), activity := 'Cytotoxicity']

mea.tbl[activity=='General', number :=1]
mea.tbl[activity=='Bursting', number :=2]
mea.tbl[activity=='Network Connectivity', number :=3]
mea.tbl[activity=='Cytotoxicity', number :=4]

save(mc0.mea.dev,
     mc1.mea.dev,
     mc3.mea.dev,
     mc5.mea.dev,
     shafer,
     mea.tbl,
     file='./source/NHEERL_MEA_dev_13Apr2020.RData')

## make HCI data package

hci.tbl <- tcplLoadAcid(add.fld='aeid',val=31,fld='asid')
hci.tbl

hci.mc0 <- tcplPrepOtpt(tcplLoadData(lvl=0,type='mc',fld='acid',val=hci.tbl$acid))
hci.mc3 <- tcplPrepOtpt(tcplLoadData(lvl=3,type='mc',fld='aeid',val=hci.tbl$aeid))
hci.mc5 <- tcplPrepOtpt(tcplLoadData(lvl=5,type='mc',fld='aeid',val=hci.tbl$aeid))

mc6 <- tcplPrepOtpt(tcplLoadData(lvl=6, fld='m4id', val=hci.mc5$m4id, type='mc'))
setDT(mc6)
mc6_mthds <- mc6[ , .( mc6_mthd_id = paste(mc6_mthd_id, collapse=",")), by = m4id]
mc6_flags <- mc6[ , .( flag = paste(flag, collapse=";")), by = m4id]
hci.mc5$mc6_flags <- mc6_mthds$mc6_mthd_id[match(hci.mc5$m4id, mc6_mthds$m4id)]
hci.mc5[, flag.length := ifelse(!is.na(mc6_flags), count.fields(textConnection(mc6_flags), sep =','), NA)]
hci.mc5[hitc==1,ac50_uM := ifelse(!is.na(modl_ga), 10^modl_ga, NA)]
hci.mc5[hitc==1,acc_uM := ifelse(!is.na(modl_acc), 10^modl_ga, NA)]

# filter the dataset, with coarse filters
hci.mc5[hitc==1 & flag.length < 3, use.me := 1]
hci.mc5[hitc==1 & is.na(flag.length), use.me := 1]
hci.mc5[hitc==1 & flag.length >= 3, use.me := 0]
hci.mc5[fitc %in% c(36,45), use.me := 0]
hci.mc5[hitc==-1, use.me := 0]
hci.mc5[use.me==0, modl_ga := as.numeric(NA)]
hci.mc5[use.me==0, hitc := 0]
hci.mc5[hitc==0, modl_ga := as.numeric(NA)]

# label activity types
hci.tbl[aeid %in% c(2777:2780), activity := 'NOG initiation, rat']
hci.tbl[aeid %in% c(2781:2788), activity := 'Synaptogenesis/maturation, rat']
hci.tbl[aeid %in% c(2789:2792), activity := 'NOG initiation, hN2']
hci.tbl[aeid %in% c(2793:2794), activity := 'Apoptosis/viability, hNP1']
hci.tbl[aeid %in% c(2795:2797), activity := 'Proliferation, hNP1']

hci.tbl[activity == 'NOG initiation, rat', number :=1]
hci.tbl[activity == 'Synaptogenesis/maturation, rat', number :=2]
hci.tbl[activity == 'NOG initiation, hN2', number :=3]
hci.tbl[activity == 'Apoptosis/viability, hNP1', number :=4]
hci.tbl[activity == 'Proliferation, hNP1', number :=5]

save(hci.tbl,
     hci.mc0,
     hci.mc3,
     hci.mc5,
     file='./source/HCI_13Apr2020.RData')

#-----------------------------------------------------------------------------------#
# Assay reproducibility
#-----------------------------------------------------------------------------------#

## libraries and directories
rm(list = ls())

library(data.table)
library(openxlsx)
library(dplyr)
library(magrittr)
library(reshape2)
library(stringr)

setwd('./public_code') # set code wd
load(file='./source/NHEERL_MEA_dev_13Apr2020.RData')

# Reference for Table 1
write.csv(mea.tbl, 'mea_assay_info.csv')


# MEA reference chemicals
#-----------------------------------------------------------------------------------#
ref.dtxsid <- c('DTXSID2020006',
                'DTXSID50157932',
                'DTXSID20274180',
                'DTXSID00880006',
                'DTXSID4040684',
                'DTXSID2037269'
)

pos.ref.dtxsid <- c(
                    'DTXSID50157932',
                    'DTXSID20274180',
                    'DTXSID00880006',
                    'DTXSID4040684',
                    'DTXSID2037269'
)

neg.ref.dtxsid <- c('DTXSID2020006')

mc0.mea.dev.pos <- mc0.mea.dev[dsstox_substance_id %in% pos.ref.dtxsid|wllt=='n']

da <- c('Domoic acid')
b1 <- c('Bisindolylmaleimide I')
lo <- c('Loperamide')
me <- c('Mevastatin')
so <- c('Sodium orthovandate')

mc3.mea.dev.pos <- mc3.mea.dev[dsstox_substance_id %in% pos.ref.dtxsid|wllt=='n'] # may be some wllt==n that are EtOH but these other solvents don't seem fundamentally different
mc3.mea.dev.pos[wllt=='n', chnm := spid]
mc3.mea.dev.pos <- mc3.mea.dev.pos[!(spid %in% c("DMSO/ethanol","Water","Ethanol"))]
mc3.mea.dev.pos[,max.cndx := max(cndx), by=list(spid)]
mc3.mea.dev.pos2 <- mc3.mea.dev.pos[max.cndx==cndx]

# Assay reproducibility metrics for MEA NFA (Table 8)
#-----------------------------------------------------------------------------------#
aq <- function(ac){
  dat <- mc3.mea.dev.pos2
  #dat <- dat[wllq==1]
  agg <- dat[ , list(
    spid = spid,
    chnm = chnm,
    dsstox_substance_id = dsstox_substance_id,
    nmed = median(resp[wllt=="n"], na.rm=TRUE),
    nmad = mad(resp[wllt=="n"], na.rm=TRUE),
    da.med = median(resp[dsstox_substance_id=='DTXSID20274180'], na.rm=TRUE),
    da.mad = mad(resp[dsstox_substance_id=='DTXSID20274180'], na.rm=TRUE),
    b1.med = median(resp[dsstox_substance_id=='DTXSID50157932'], na.rm=TRUE),
    b1.mad = mad(resp[dsstox_substance_id=='DTXSID50157932'], na.rm=TRUE),
    lo.med = median(resp[dsstox_substance_id=='DTXSID00880006'], na.rm=TRUE),
    lo.mad = mad(resp[dsstox_substance_id=='DTXSID00880006'], na.rm=TRUE),
    me.med = median(resp[dsstox_substance_id=='DTXSID4040684'], na.rm=TRUE),
    me.mad = mad(resp[dsstox_substance_id=='DTXSID4040684'], na.rm=TRUE),
    so.med = median(resp[dsstox_substance_id=='DTXSID2037269'], na.rm=TRUE),
    so.mad = mad(resp[dsstox_substance_id=='DTXSID2037269'], na.rm=TRUE)
  ), by = list(aeid, aenm,apid)] # need to add apid?
  
  agg[ , zprm.da := 1 - ((3 * (da.mad + nmad)) / abs(da.med - nmed))]
  agg[ , zprm.b1 := 1 - ((3 * (b1.mad + nmad)) / abs(b1.med - nmed))]
  agg[ , zprm.lo := 1 - ((3 * (lo.mad + nmad)) / abs(lo.med - nmed))]
  agg[ , zprm.me := 1 - ((3 * (me.mad + nmad)) / abs(me.med - nmed))]
  agg[ , zprm.so := 1 - ((3 * (so.mad + nmad)) / abs(so.med - nmed))]

  agg[ , ssmd.da := (da.med - nmed) / sqrt(da.mad^2 + nmad^2 )]
  agg[ , ssmd.b1 := (b1.med - nmed) / sqrt(b1.mad^2 + nmad^2 )]
  agg[ , ssmd.lo := (lo.med - nmed) / sqrt(lo.mad^2 + nmad^2 )]
  agg[ , ssmd.me := (me.med - nmed) / sqrt(me.mad^2 + nmad^2 )]
  agg[ , ssmd.so := (so.med - nmed) / sqrt(so.mad^2 + nmad^2 )]
  
 # agg[ , cv     := nmad / nmed]
  agg[ , sn.da :=  (da.med - nmed) / nmad]
  agg[ , sn.b1 :=  (b1.med - nmed) / nmad]
  agg[ , sn.lo :=  (lo.med - nmed) / nmad]
  agg[ , sn.me :=  (me.med - nmed) / nmad]
  agg[ , sn.so :=  (so.med - nmed) / nmad]
  
  agg[ , sb.da :=  da.med / nmed]
  agg[ , sb.b1 :=  b1.med / nmed]
  agg[ , sb.lo :=  lo.med / nmed]
  agg[ , sb.me :=  me.med / nmed]
  agg[ , sb.so :=  so.med / nmed]
  
  acqu <- agg[ , list( nmed   = round(median(nmed, na.rm = TRUE),2),
                       nmad   = round(median(nmad, na.rm = TRUE), 2),
                       da.med = round(median(da.med, na.rm = TRUE),2),
                       da.mad = round(median(da.mad, na.rm = TRUE),2),
                       b1.med = round(median(b1.med, na.rm = TRUE),2),
                       b1.mad = round(median(b1.mad, na.rm = TRUE),2),
                       lo.med = round(median(lo.med, na.rm = TRUE),2),
                       lo.mad = round(median(lo.mad, na.rm = TRUE),2),
                       me.med = round(median(me.med, na.rm = TRUE),2),
                       me.mad = round(median(me.mad, na.rm = TRUE),2),
                       so.med = round(median(so.med, na.rm = TRUE),2),
                       so.mad = round(median(so.mad, na.rm = TRUE),2),
                       zprm.da = round(median(zprm.da, na.rm = TRUE),2),
                       zprm.b1 = round(median(zprm.b1, na.rm = TRUE),2),
                       zprm.lo = round(median(zprm.lo, na.rm = TRUE),2),
                       zprm.me = round(median(zprm.me, na.rm = TRUE),2),
                       zprm.so = round(median(zprm.so, na.rm = TRUE),2),
                       ssmd.da = round(median(ssmd.da, na.rm = TRUE),0),
                       ssmd.b1 = round(median(ssmd.b1, na.rm = TRUE),0),
                       ssmd.lo = round(median(ssmd.lo, na.rm = TRUE),0),
                       ssmd.me = round(median(ssmd.me, na.rm = TRUE),0),
                       ssmd.so = round(median(ssmd.so, na.rm = TRUE),0),
                       sn.da = round(median(sn.da, na.rm = TRUE),2),
                       sn.b1 = round(median(sn.b1, na.rm = TRUE),2),
                       sn.lo = round(median(sn.lo, na.rm = TRUE),2),
                       sn.me = round(median(sn.me, na.rm = TRUE),2),
                       sn.so = round(median(sn.so, na.rm = TRUE),2),
                       sb.da = round(median(sb.da, na.rm = TRUE),2),
                       sb.b1 = round(median(sb.b1, na.rm = TRUE),2),
                       sb.lo = round(median(sb.lo, na.rm = TRUE),2),
                       sb.me = round(median(sb.me, na.rm = TRUE),2),
                       sb.so = round(median(sb.so, na.rm = TRUE),2)
                       ), by=list(aeid,aenm)]
  
  return(acqu)
} #per acid

aeids <- mea.tbl[,aeid]
acids <- mea.tbl[,acid]
aqList <- lapply(aeids, aq)
aqd <- rbindlist(aqList)
zprm.ssmd.mea.mc3 <-unique(aqd)
zprm.ssmd.mea.mc3.df <- as.data.frame(zprm.ssmd.mea.mc3) # for Table 8

#-----------------------------------------------------------------------------------#
# now calculate the CV at the mc0 for the DMSO by plate median (Table 5)


mc0.mea.dev.pos <- mc0.mea.dev[dsstox_substance_id %in% pos.ref.dtxsid|wllt=='n']
mc0.mea.dev.pos[wllt=='n', chnm := spid]
mc0.mea.dev.pos <- mc0.mea.dev.pos[!(spid %in% c("DMSO/ethanol","Water","Ethanol"))]

acids <- mea.tbl$acid
aeids <- mea.tbl$aeid

aq <- function(ac){
  dat <- mc0.mea.dev.pos
  dat <- dat[wllq==1]
  agg <- dat[ , list(
    nmed = median(rval[wllt=="n"], na.rm=TRUE),
    nmad = mad(rval[wllt=="n"], na.rm=TRUE)
  ), by = list(acid, acnm, apid)]
  
  agg[ , cv     := (nmad / nmed)*100]

  acqu <- agg[ , list( nmed   = signif(median(nmed, na.rm = TRUE)),
                       nmad   = signif(median(nmad, na.rm = TRUE)),
                       cv = round(median(cv, na.rm=TRUE),2)
  ), by = list(acid, acnm)]
  
  return(acqu)
} #per acid

aqList <- lapply(acids, aq)
aqd <- rbindlist(aqList)
aqd <-unique(aqd[,c(1:5)])

cv.summary.by.acid <- aqd %>% mutate_at(vars(nmed, nmad, cv), ~round(.,2)) %>% data.table()
#write.csv(cv.summary.by.acid, file='mc0_DMSO_CV_by_acid.csv')
dmso.cv.mea.mc0.df <- as.data.frame(cv.summary.by.acid)

#-----------------------------------------------------------------------------------#
# now look at mc5 and overall hit concordance

dat <- as.data.table(mc5.mea.dev)

agg <- dat[ , list(
  bmad  = max(bmad, na.rm = TRUE),    #baseline median absolute deviation (mad around the first 2 tested concentrations
  nconc = as.double(median(nconc, na.rm = TRUE)), #nominal number of concentrations tested for the assay endpoint
  coff  = max(coff, na.rm = TRUE),  #global response cutoff established for the assay (methods available within pipeline)
  test  = .N, #total number of samples tested in concentration response
  acnt  = as.double(lw(hitc==1)),  # active count
  apct  = lw(hitc==1)/.N,  # active percentage
  icnt  = as.double(lw(hitc==0)),  #inactive count
  ipct  = lw(hitc==0)/.N,  #inactive percentage
  ncnt  = as.double(lw(hitc==-1)), # could not model count (<=3 concentrations with viable data)
  npct  = lw(hitc==-1)/.N, # could not model percentage
  mmed  = max(max_med, na.rm = TRUE), # maximum response (median at any given concentration) across entire assay endpoint
  cmax  = 10^median(logc_max, na.rm = TRUE), # nominal maximum tested concentration (target concentration)
  cmin  = 10^median(logc_min, na.rm = TRUE), # nominal minimum tested concentration (target concentration)
  mtop  = max(modl_tp, na.rm = TRUE), # maximum modeled response (top of curve) across entire assay endpoint
  nrep  = as.double(median(nrep, na.rm = TRUE)), # nominal number of replicates per sample (target number of replicates)
  npts  = as.double(median(npts, na.rm = TRUE)), # nominal number of data points per sample
  cnst  = lw(modl=='cnst')/.N, # percentage of sample-assayendpoints where the constant model won (may not all be 'actives')
  hill  = lw(modl=='hill')/.N, # percentage of sample-assayendpoints where the hill model won (may not all be 'actives')
  gnls  = lw(modl=='gnls')/.N, # percentage of sample-assayendpoints where the gain-loss model won (may not all be 'actives')
  rmse  = median(modl_rmse, na.rm = TRUE) # median root mean squared error across all model winners for an assay endpoint
), by = list(aeid, aenm, resp_unit)]

setkeyv(agg,"aenm")

agg2 <- dat[hitc >= 0 ,
            list(n = .N,
                 acnt = sum(hitc)
            )
            , by = list(aeid, chid)]

agg3 <- agg2[n > 1, list(
  ocnc = lw(acnt==n | acnt==0)/.N,  # overall concordance among chemical-replicates 
  hcnc = lw(acnt==n)/lw(acnt>0)  # hit concordance among chemical-replicates
  # (may be samples from different sources)
), by = aeid]


setkey(agg3,"aeid")
setkey(agg, "aeid")
agg <- agg3[agg]

#median(agg3$ocnc) # 0.7954545 - so approximately 80% concordance in replicate testing in the MEA DEV assay endpoints
overall.mea.mc5.df <- as.data.frame(agg)

#-----------------------------------------------------------------------------------#
# potency table for the MEA NFA reference chemicals

mc5.mea.dev[use.me==0 & hitc==1]

mc5.ref <- mc5.mea.dev[dsstox_substance_id %in% ref.dtxsid]
mc3.ref <- mc3.mea.dev[dsstox_substance_id %in% ref.dtxsid]
mc5.mea.rep <- as.data.table(mc5.mea.dev)
mc5.mea.rep[,n := .N, by=list(chid,aeid)]
mc5.mea.rep2 <- mc5.mea.rep[n>1 & !spid %in% c('DMSO','Water', 'DMSO/ethanol','Ethanol') 
                            & !chnm %in% c('Glufosinate-P')]

mc5.mea.rep2[,modl.ga.mean.aeid := mean(modl_ga, na.rm=TRUE), by=list(chid,aeid)]
mc5.mea.rep2[,modl.ga.sd.aeid := sd(modl_ga, na.rm=TRUE), by=list(chid,aeid)]
mc5.mea.rep2[,avg.modl.ga.sd.chid := mean(modl.ga.sd.aeid,na.rm=TRUE),by=list(chid)]
mc5.mea.rep2[,hitcprob := sum(hitc)/n, by=list(chid,aeid)]
mc5.mea.rep2[,assay.pos := sum(hitc), by=list(spid)]
mc5.mea.rep2[,rep.assay.pos := mean(assay.pos), by=list(chid)]
mc5.mea.rep2[,min.modl.ga.spid := min(modl_ga, na.rm=TRUE), by=list(spid)]
mc5.mea.rep2[,mean.modl.ga.spid := mean(modl_ga, na.rm=TRUE), by=list(spid)]

repeated.stats <- unique(mc5.mea.rep2[order(chnm),c('dsstox_substance_id','chnm','spid','assay.pos',
                                'min.modl.ga.spid','mean.modl.ga.spid','avg.modl.ga.sd.chid')])

repeated.stats.summary <- repeated.stats %>% mutate_at(vars(min.modl.ga.spid,
                                                            mean.modl.ga.spid,
                                                            avg.modl.ga.sd.chid), ~signif(.,3)) %>% data.table()
colnames(repeated.stats.summary)
setnames(repeated.stats.summary,
         c("dsstox_substance_id","chnm","spid" ,"assay.pos","min.modl.ga.spid","mean.modl.ga.spid", "avg.modl.ga.sd.chid"),
         c("DTXSID","Chemical","Sample","Positive AEIDs","Minimum log10-AC50", "Mean log10-AC50","SD(log10-AC50), average"))

repeat.chem.summary.df <- as.data.frame(repeated.stats.summary)



#### positives
mc5.mea.dev.pos <- mc5.mea.dev[dsstox_substance_id %in% ref.dtxsid]

mc5.mea.dev.pos[ ,number.pos := sum(use.me, na.rm=TRUE), by=list(spid)]

mc5.mea.dev.pos[ , min.pot := min(modl_ga,na.rm=TRUE), by=list(spid)]
mc5.mea.dev.pos[ , max.pot := max(modl_ga,na.rm=TRUE), by=list(spid)]
mc5.mea.dev.pos[ , med.pot := median(modl_ga, na.rm=TRUE), by=list(spid)]

mc5.mea.dev.pos[, min.pot.um := ifelse(!is.na(min.pot), 10^min.pot, NA)]
mc5.mea.dev.pos[, max.pot.um := ifelse(!is.na(max.pot), 10^max.pot, NA)]
mc5.mea.dev.pos[, med.pot.um := ifelse(!is.na(med.pot), 10^med.pot, NA)]

summary <- unique(mc5.mea.dev.pos[,c('spid', 'dsstox_substance_id','chnm',
'number.pos','min.pot','med.pot','max.pot', 
'min.pot.um', 'med.pot.um', 'max.pot.um')])
summary2 <- summary %>% mutate_if(is.numeric, ~round(.,2)) %>% data.table()
#write.csv(summary2, 'nheerl_mea_dev_ref_controls_performance_20mar2020.csv')
assay.ctrl.mea.perf.df <- as.data.frame(summary2)

# suggest taking this data.table to make heatmap of hitc
mc5.mea.dev.pos <- mc5.mea.dev[dsstox_substance_id %in% ref.dtxsid]


mea.tbl[acid %in% c(2471,2472,2473,2474), activity := 'General']
mea.tbl[acid %in% c(2475, 2476, 2477, 2478, 2481), activity := 'Bursting']
mea.tbl[acid %in% c(2479,2480,2482,2483,2484,2485,2486,2487), activity := 'Network Connectivity']
mea.tbl[acid %in% c(2488,2489), activity := 'Cytotoxicity']

mea.tbl[activity=='General', number :=1]
mea.tbl[activity=='Bursting', number :=2]
mea.tbl[activity=='Network Connectivity', number :=3]
mea.tbl[activity=='Cytotoxicity', number :=4]
#mea.dev <- unique(mea.tbl[,c('aenm', 'activity')])

colnames(mc5.mea.dev)
mat.ref <- dcast.data.table(mc5.mea.dev[dsstox_substance_id %in% ref.dtxsid],
                           chnm + + casn + dsstox_substance_id + spid ~ aenm,
                           value.var = c('modl_ga')
)

#mat1 <- mat1[dnt.evid %in% c('Positive','Negative')|dsstox_substance_id %in% op.list[,DTXSID]]

mat.ref[, names := paste0(chnm)]

mat.ref$names
mat.ref[names=="4-(4-Chlorophenyl)-4-hydroxy-N,N-dimethyl-alpha,alpha-diphenylpiperidine-1-butyramide monohydrochloride", names:= 'Loperamide HCl']
length(unique(mat.ref$names)) # 35 samples for 27 substances
head(mat.ref)

mat2.ref <- mat.ref[,lapply(.SD, function(x){ifelse(is.na(x),6,x)}), .SDcol=c(5:41)]

matrix <- as.matrix(mat2.ref[,1:36])
rownames(matrix) <- mat2.ref[,names]

mea.dev.2 <- as.data.table(colnames(matrix))
mea.dev.2$number <- mea.tbl$number[match(mea.dev.2$V1, mea.tbl$aenm)]
mea.dev.2$activity <- mea.tbl$activity[match(mea.dev.2$V1, mea.tbl$aenm)]

library(gplots)
library(viridis)

file.dir <- paste("./figures/", sep="")
file.name <- paste("/fig_mea_nfa_ref_chem_perf", Sys.Date(), ".png", sep="")
file.path <- paste(file.dir, file.name, sep="")
dir.create(path=file.dir, showWarnings = FALSE, recursive = TRUE)
png(file.path, 
    width = 10, 
    height = 8, 
    units = "in",
    res = 300)

heatmap.2(matrix, scale='none', 
          col=viridis(20,option='D'), 
          trace='none', density.info = 'none',
          colsep = c(1:35), rowsep = c(1:5), sepcolor='white', sepwidth=c(0.05,0.05),
          hclustfun = function(x) hclust(x, method="ward.D2"),
          labRow = row.names(matrix),
          labCol = substr(colnames(matrix),16,50),
          #labCol = NA,
          margins = c(10,16),
          cexRow =1,
          cexCol=0.8,
          ColSideColors = as.character(as.numeric(mea.dev.2$number)),
          srtCol=45,
          keysize=0.8)

legend(
  xpd=TRUE, x=0.8, y=1.05,
       title='Activity Type',
       legend = unique(mea.dev.2$activity),
       col = unique(as.numeric(mea.dev.2$number)), 
       bty='n',
       lty= 1,             
       lwd = 5,           
       cex=0.8
)

dev.off()

list.mea.data <- list(
                      "overall.mea.mc5" = overall.mea.mc5.df,
                      "zprm.ssmd.mea.mc3" = zprm.ssmd.mea.mc3.df, 
                      "dmso.cv.mea.mc0" = dmso.cv.mea.mc0.df, # Table 5
                      "assay.ctrl.mea.perf" = assay.ctrl.mea.perf.df, # Table 8
                      "repeated.chem.perf" = repeat.chem.summary.df) # Table 10/Supp Appendix Table 1
write.xlsx(list.mea.data, file='./output/mea_assay_validation_13Apr2020.xlsx')

#-----------------------------------------------------------------------------------#
# Assay reproducibility for HCI assays
#------------------------------------------------------------------------------------#

## loading libraries

rm(list = ls())

library(data.table)
library(openxlsx)
library(dplyr)
library(magrittr)
library(reshape2)
library(stringr)

load(file='./source/HCI_13Apr2020.RData')
write.csv(hci.tbl, 'hci_assay_info.csv')
#-----------------------------------------------------------------------------------#
# identify control chemicals that can be used to look at assay quality
#-----------------------------------------------------------------------------------#

hci.mc0[spid %in% c('DMSO','Staurosporine'), chnm := spid]
positive.table <- unique(hci.mc0[wllt %in% c('c','p','m'), c('acnm','acid','wllt','dsstox_substance_id','spid','chnm', 'conc')]) # staurosporine is 'p' for cell titer and caspase assays
positive.table[, n.conc := .N, by=list(spid, acid)]
# variable number of concs, so recommend taking max(conc), by spid

positive.table[, max.p.conc := max(conc), by=list(acid,spid)]

positive.table.df <- as.data.frame(positive.table)

hci.mc0[wllt=='p', max.p.conc := max(conc), by=list(acid,spid)]

#Get assay reproducibility based on hci mc0#-----------------------------------------------------------------------------------#

acids <- hci.tbl[,acid]
aq <- function(ac){
  dat <- hci.mc0
  dat <- dat[wllt %in% c('n','p') & wllq==1]
  agg <- dat[ , list(
    nmed = median(rval[wllt=="n"], na.rm=TRUE),
    nmad = mad(rval[wllt=="n"], na.rm=TRUE),
    pmed = median(rval[wllt=="p" & conc==max.p.conc], na.rm=TRUE),
    pmad = mad(rval[wllt=="p" & conc==max.p.conc], na.rm=TRUE)
  ), by = list(acid, acnm, apid)]
  
  agg[ , zprm.p := 1 - ((3 * (pmad + nmad)) / abs(pmed - nmed))]
  agg[ , ssmd.p := (pmed - nmed) / sqrt(pmad^2 + nmad^2 )]
  agg[ , cv     := (nmad / nmed)*100]
  agg[ , sn.p :=  (pmed - nmed) / nmad]
  agg[ , sb.p :=  pmed / nmed]
  
  agg[zprm.p<0, zprm.p := 0]

  acqu <- agg[ , list( nmed   = signif(median(nmed, na.rm = TRUE)),
                       nmad   = signif(median(nmad, na.rm = TRUE)),
                       pmed   = signif(median(pmed, na.rm = TRUE)),
                       pmad   = signif(median(pmad, na.rm = TRUE)),
                       zprm.p = round(median(zprm.p, na.rm=TRUE),2),
                       ssmd.p = round(median(ssmd.p, na.rm=TRUE),0),
                       cv = round(median(cv, na.rm=TRUE),2),
                       sn.p = round(median(sn.p, na.rm=TRUE),2),
                       sb.p = round(median(sb.p, na.rm=TRUE),2)
  ), by = list(acid, acnm)]
  
  return(acqu)
} #per acid

aqList <- lapply(acids, aq)
aqd <- rbindlist(aqList)
aqd <- unique(aqd)
aqd$p_chem <- positive.table$chnm[match(aqd$acid,positive.table$acid)]
aqd$p_dtxsid <- positive.table$dsstox_substance_id[match(aqd$acid,positive.table$acid)]

aqd <- aqd %>% mutate_at(vars(nmed,nmad,cv), ~signif(.,3)) %>% data.table()


hci.mc0.quant.qual.df <- as.data.frame(aqd)

#Get assay reproducibility based on hci mc3#-----------------------------------------------------------------------------------#
aeids <- hci.tbl[,aeid]
colnames(hci.mc3)
hci.mc3[wllt=='p', max.p.conc := max(logc), by=list(aeid,spid)]
hci.mc3[spid=='Staurosporine', chnm:= spid]
mc3.aq <- function(ac){
  dat <- hci.mc3
  dat <- dat[wllt %in% c('p','n')]
  agg <- dat[ , list(
    spid = spid,
    chnm = chnm,
    dsstox_substance_id = dsstox_substance_id,
    nmed = median(resp[wllt=="n"], na.rm=TRUE),
    nmad = mad(resp[wllt=="n"], na.rm=TRUE),
    pmed = median(resp[wllt=="p" & logc==max.p.conc], na.rm=TRUE),
    pmad = mad(resp[wllt=="p" & logc==max.p.conc], na.rm=TRUE)
  ), by = list(aeid, aenm, apid)]
  
  agg[ , zprm.p := 1 - ((3 * (pmad + nmad)) / abs(pmed - nmed))]
  agg[ , ssmd.p := (pmed - nmed) / sqrt(pmad^2 + nmad^2 )]
  agg[ , cv     := nmad / nmed]
  agg[ , sn.p :=  (pmed - nmed) / nmad]
  agg[ , sb.p :=  pmed / nmed]
  
  agg[zprm.p<0, zprm.p := 0]
  
  acqu <- agg[ , list( spid = spid,
                       dsstox_substance_id = dsstox_substance_id,
                       nmed   = signif(median(nmed, na.rm = TRUE)),
                       nmad   = signif(median(nmad, na.rm = TRUE)),
                       pmed   = signif(median(pmed, na.rm = TRUE)),
                       pmad   = signif(median(pmad, na.rm = TRUE)),
                       zprm.p = round(median(zprm.p, na.rm=TRUE),2),
                       ssmd.p = round(median(ssmd.p, na.rm=TRUE),0),
                       cv = round(median(cv, na.rm=TRUE),2),
                       sn.p = round(median(sn.p, na.rm=TRUE),2),
                       sb.p = round(median(sb.p, na.rm=TRUE),2)
  ), by = list(aeid, aenm, chnm)]
  
  return(acqu)
} #per acid
colnames(dat)
aqList <- lapply(aeids, mc3.aq)
mc3.aqd <- rbindlist(aqList)
mc3.aqd <- unique(mc3.aqd)
positive.table$aeid <- hci.tbl$aeid[match(positive.table$acid, hci.tbl$acid)]
mc3.aqd$p_chem <- positive.table$chnm[match(mc3.aqd$aeid, positive.table$aeid)]
mc3.aqd$p_dtxsid <- positive.table$dsstox_substance_id[match(mc3.aqd$aeid,positive.table$aeid)]
hci.mc3.quant.qual.df <- as.data.frame(mc3.aqd)

#Get assay reproducibility based on hci mc5#-----------------------------------------------------------------------------------#

dat <- hci.mc5

agg <- dat[ , list(
  bmad  = max(bmad, na.rm = TRUE),    #baseline median absolute deviation (mad around the first 2 tested concentrations
  nconc = as.double(median(nconc, na.rm = TRUE)), #nominal number of concentrations tested for the assay endpoint
  coff  = max(coff, na.rm = TRUE),  #global response cutoff established for the assay (methods available within pipeline)
  test  = .N, #total number of samples tested in concentration response
  acnt  = as.double(lw(hitc==1)),  # active count
  apct  = lw(hitc==1)/.N,  # active percentage
  icnt  = as.double(lw(hitc==0)),  #inactive count
  ipct  = lw(hitc==0)/.N,  #inactive percentage
  ncnt  = as.double(lw(hitc==-1)), # could not model count (<=3 concentrations with viable data)
  npct  = lw(hitc==-1)/.N, # could not model percentage
  mmed  = max(max_med, na.rm = TRUE), # maximum response (median at any given concentration) across entire assay endpoint
  cmax  = 10^median(logc_max, na.rm = TRUE), # nominal maximum tested concentration (target concentration)
  cmin  = 10^median(logc_min, na.rm = TRUE), # nominal minimum tested concentration (target concentration)
  mtop  = max(modl_tp, na.rm = TRUE), # maximum modeled response (top of curve) across entire assay endpoint
  nrep  = as.double(median(nrep, na.rm = TRUE)), # nominal number of replicates per sample (target number of replicates)
  npts  = as.double(median(npts, na.rm = TRUE)), # nominal number of data points per sample
  cnst  = lw(modl=='cnst')/.N, # percentage of sample-assayendpoints where the constant model won (may not all be 'actives')
  hill  = lw(modl=='hill')/.N, # percentage of sample-assayendpoints where the hill model won (may not all be 'actives')
  gnls  = lw(modl=='gnls')/.N, # percentage of sample-assayendpoints where the gain-loss model won (may not all be 'actives')
  rmse  = median(modl_rmse, na.rm = TRUE) # median root mean squared error across all model winners for an assay endpoint
), by = list(aeid, aenm, resp_unit)]

setkeyv(agg,"aenm")

agg2 <- dat[hitc >= 0 ,
            list(n = .N,
                 acnt = sum(hitc)
            )
            , by = list(aeid, chid)]

agg3 <- agg2[n > 1, list(
  ocnc = lw(acnt==n | acnt==0)/.N,  # overall concordance among chemical-replicates 
  hcnc = lw(acnt==n)/lw(acnt>0)  # hit concordance among chemical-replicates
  # (may be samples from different sources)
), by = aeid]


setkey(agg3,"aeid")
setkey(agg, "aeid")
agg <- agg3[agg]

median(agg3$ocnc) # nothin with same chid tested twice

mc5.apd.df <- as.data.frame(agg)

#-----------------------------------------------------------------------------------#
# potency table for the reference chemicals
colnames(hci.mc5)

# filter the dataset, with coarse filters
hci.mc5[hitc==1 & flag.length < 3, use.me := 1]
hci.mc5[hitc==1 & is.na(flag.length), use.me := 1]
hci.mc5[hitc==1 & flag.length >= 3, use.me := 0]
hci.mc5[fitc %in% c(36,45), use.me := 0]
hci.mc5[hitc==-1, use.me := 0] # make hitc interpretable as a positive sum
hci.mc5[use.me==0, modl_ga := as.numeric(NA)]
hci.mc5[use.me==0, hitc := 0]

# are any spids replicated in hci.mc5?
mc5.hci.rep <- as.data.table(hci.mc5)
mc5.hci.rep[,n := .N, by=list(chid,aeid)]
mc5.hci.rep[n>1]
mc5.hci.rep2 <- mc5.hci.rep[n>1 & !spid %in% c('DMSO','Water', 'DMSO/ethanol','Ethanol') 
                            & !chnm %in% c('Glufosinate-P')]


unique(positive.table$spid)
hci.mc5.pos <- hci.mc5[spid %in% positive.table$spid]
# none of these make it to mc5 because there are not enough concentrations to curve-fit/model for the controls

hci.mc3.pos <- hci.mc3[spid %in% positive.table$spid]
unique(hci.mc3.pos[spid=='Staurosporine']$assay.num)
unique(hci.mc3.pos[spid=='Staurosporine' & med.hitc.sum > 2]$apid) #6 aeid == 2794
unique(hci.mc3.pos[spid=='Staurosporine' & med.hitc.sum < 2]$apid) #39

#hci.mc5.pos[ ,number.pos := sum(use.me, na.rm=TRUE), by=list(spid)]
colnames(hci.mc3.pos)
hci.mc3.pos[wllt=='p', max.p.conc := max(logc), by=list(aeid,spid)]
hci.mc3.pos <- hci.mc3.pos[max.p.conc == logc]
hci.mc3.pos[agg, coff := coff, on='aeid'] # get the cutoff for the aeid
hci.mc3.pos[ , med.resp := median(resp, na.rm=TRUE), by=list(spid,aeid)] # median response per spid + aeid combination
hci.mc3.pos[ , med.hitc := 0] # median hitcall default
hci.mc3.pos[ med.resp > coff , med.hitc := 1] # median hitcall==1 if the median response > cutoff
hci.mc3.pos[ , apid.sum := .N, by=list(spid, aeid)] # number of plates per spid + aeid combination
hci.mc3.pos[ , med.hitc.sum := (sum(med.hitc)/apid.sum), by=list(spid)] # median number of hitc==1 per spid

hci.mc3.pos[ , assay.num := length(unique(aeid)), by=list(spid)] # assays screened in the set

summary4 <- unique(hci.mc3.pos[,c('spid','chnm','dsstox_substance_id', 'casn', 'aeid','aenm','max.p.conc','logc','coff','med.resp','med.hitc', 'med.hitc.sum', 'assay.num')])


summary5 <- summary4 %>% mutate_at(vars(max.p.conc, logc, coff, med.resp, med.hitc.sum), ~round(.,2)) %>% data.table()
summary5[,conc.um := ifelse(!is.na(logc), 10^logc, NA)]

mc3.aqd
mc3.aqd[,key:= paste0(aeid,'_',p_dtxsid)]
colnames(summary5)
summary5[,key:= paste0(aeid,'_',dsstox_substance_id)]

summary6 <- merge(mc3.aqd,
      summary5,
      by=c('key'))
summary6 <- summary6[!spid.x=='DMSO']
colnames(summary6)
summary7 <- summary6[,c('aeid.x','aenm.x','coff', 'med.resp','med.hitc',
                        'chnm.x','dsstox_substance_id.x','conc.um',
                        'zprm.p', 'ssmd.p','sn.p')]
setnames(summary7, c('aeid.x','aenm.x','coff', 'med.resp','med.hitc',
                     'chnm.x','dsstox_substance_id.x','conc.um',
                     'zprm.p', 'ssmd.p','sn.p'),
         c('aeid', 'AENM', 'COFF', 'MED.RESP', 'MED.HITC', 'CHEM', 'DTXSID','CONC.UM','Z', 'SSMD', 'SN'))

summary7[aeid %in% c(2793:2794), index := 1]
summary7[aeid %in% c(2795:2797), index := 2]
summary7[aeid %in% c(2789:2792), index := 3]
summary7[aeid %in% c(2777:2780), index := 4]
summary7[aeid %in% c(2781:2788), index := 5]
summary7 <- summary7[order(index, CHEM)]


hci.mc5.ctrls.df <- as.data.frame(summary7)

hci.ref.chem <- as.data.table(summary5)
head(hci.ref.chem)
hci.ref.hm <- dcast.data.table(hci.ref.chem,
                               spid + chnm + casn + dsstox_substance_id ~ aenm,
                               value.var = c('med.hitc'))
hci.ref.hm[spid=='Staurosporine', chnm := "Staurosporine"]
hci.ref.hm[, names := paste0(chnm)]

colnames(hci.ref.hm)
setcolorder(hci.ref.hm,
            c(1:4,
              21:22,
              23:25,
              17:20,
              5:8,
              9:16,
              26))

hci.ref.hm[chnm=='Staurosporine', index :=1]
hci.ref.hm[chnm=='Aphidicolin', index :=2]
hci.ref.hm[chnm=='NSC 23766 trihydrochloride', index := 3]
hci.ref.hm[chnm=='Lithium chloride', index := 4]
hci.ref.hm[chnm=='Bisindolylmaleimide I', index := 5]
hci.ref.hm[chnm=='Sodium orthovanadate', index := 6]

hci.ref.hm <- hci.ref.hm[order(index)]
hci.ref.hm.2 <- hci.ref.hm[,lapply(.SD, function(x){ifelse(is.na(x),2,x)}), .SDcol=c(5:26)]

matrix <- as.matrix(hci.ref.hm.2[,1:21])
rownames(matrix) <- hci.ref.hm.2[,names]

# now make the information for activity type
#hci.tbl <- tcplLoadAeid(val=31, fld='asid', add.fld = 'acid')
hci.tbl[aeid %in% c(2777:2780), activity := 'NOG initiation, rat']
hci.tbl[aeid %in% c(2781:2788), activity := 'Synaptogenesis/maturation, rat']
hci.tbl[aeid %in% c(2789:2792), activity := 'NOG initiation, hN2']
hci.tbl[aeid %in% c(2793:2794), activity := 'Apoptosis/viability, hNP1']
hci.tbl[aeid %in% c(2795:2797), activity := 'Proliferation, hNP1']

hci.tbl[activity == 'NOG initiation, rat', number :=1]
hci.tbl[activity == 'Synaptogenesis/maturation, rat', number :=2]
hci.tbl[activity == 'NOG initiation, hN2', number :=3]
hci.tbl[activity == 'Apoptosis/viability, hNP1', number :=4]
hci.tbl[activity == 'Proliferation, hNP1', number :=5]

hci.tbl.2 <- as.data.table(colnames(matrix))
hci.tbl.2$number <- hci.tbl$number[match(hci.tbl.2$V1, hci.tbl$aenm)]
hci.tbl.2$activity <- hci.tbl$activity[match(hci.tbl.2$V1, hci.tbl$aenm)]


library(gplots)
library(viridis)

colors <- c("#FDE725FF", "#21908CFF", "gray")

file.dir <- paste("./figures/", sep="")
file.name <- paste("/fig_hci_ref_chem_perf", Sys.Date(), ".png", sep="")
file.path <- paste(file.dir, file.name, sep="")
dir.create(path=file.dir, showWarnings = FALSE, recursive = TRUE)
png(file.path, 
    width = 10, 
    height = 8, 
    units = "in",
    res = 450)

heatmap.2(matrix, scale='none', 
          col=colors, 
          trace='none', density.info = 'none',
          #dendogram='none', 
          Rowv=FALSE, 
          Colv=FALSE, #no clustering
          colsep = c(1:20), rowsep = c(1:5), sepcolor='white', sepwidth=c(0.05,0.05),
          #hclustfun = function(x) hclust(x, method="ward.D2"),
          labRow = row.names(matrix),
          labCol = substr(colnames(matrix),11,50),
          #labCol = NA,
          margins = c(15,15),
          cexRow =1,
          cexCol=0.8,
          ColSideColors = as.character(as.numeric(hci.tbl.2$number)),
          srtCol=45,
          keysize=0.7,
          lwid = c(1,4),
          lhei = c(0.7,4,1))
#xpd=TRUE, x=0.8, y=1.1,
legend(
xpd=TRUE, x=0.75, y=1.13,
  title='Activity Type',
  legend = unique(hci.tbl.2$activity),
  col = unique(as.numeric(hci.tbl.2$number)), 
  bty='n',
  lty= 1,             
  lwd = 5,           
  cex=0.8
)

dev.off()


list.hci.data <- list("assay.ctrl.hci" = positive.table.df,
                      "overall.hci.mc5" = mc5.apd.df,
                      "zprm.ssmd.hci.mc3" = hci.mc3.quant.qual.df, #Table 9
                      "dmso.cv.hci.mc0" = hci.mc0.quant.qual.df, # Table 6
                      "assay.ctrl.hci.perf" = hci.mc5.ctrls.df)
write.xlsx(list.hci.data, file='./output/hci_assay_validation_13Apr2020.xlsx')

