Klasyfikacja komórek nowotworowych z pakietem RStemnessScorer

Dane

PCBC
TCGA

1. Wilcoxon-Mann-Whitney Test

wmct <- WMCT(pcbc, tcga[TCGA$normal,], G='SC', njob=3, adjust='fdr')
signature <- signatureWMCT(wmct,n=7000)

rss$wmct_plot + scale_fill_manual(values=c("#999999", "salmon", "#56B4E9"))

2. Klasyfikacja

learn <- setupLearningSets(pcbc,tcga[TCGA$normal,], G='SC',
                               signature = signature, cutoff=0.7)

rss$balance

## [1] "healthy:SC 168:30" "healthy:SC 73:14"

2a. Regresja Logistyczna

objectiveFun <- c("Class","AUC","Deviance")
models <- lapply(objectiveFun, function(f)
      buildScorer(learn$train$X, learn$train$Y, model="LR",
                  cv.measure=tolower(f), intercept=TRUE, standardize=FALSE,
                  njob = nthreads))
names(models) <- objectiveFun

grid.arrange(rss$glmnet_gg$Class,
             rss$glmnet_gg$AUC + ylab(''),
             rss$glmnet_gg$Deviance + ylab(''), ncol=3)

rss$glmnet_coefs

grid.arrange(rss$glmnet_features)

2b. Lasy losowe

rf <- buildScorer(learn$train$X, learn$train$Y,
                  model="RF", ntree=5000, njob = 3)

names(rss$models)

## [1] "Class"         "AUC"           "Deviance"      "Random Forest"

rss$models$`Random Forest`

## 
## Call:
##  randomForest(x = xdata, y = ydata, ntree = ntree, nodesize = 1) 
##                Type of random forest: classification
##                      Number of trees: 5000
## No. of variables tried at each split: 83

3. Walidacja

scores <- scorer(learn$test$X, models$AUC)

grid.arrange(rss$validation$hist + theme(legend.position='bottom'), 
             rss$validation$roc + theme(legend.position='bottom'), ncol=2)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Dodatkowe analizy

dane <- rss$validation$hist$data
df <- data.frame(dane, model.type = ifelse(dane$f == 'Random Forest', 'RF', 'LR'))
x = df[df$model.type == 'LR',c('value','f')]
y = df[df$model.type == 'RF',c('value','f')]
library(ggplot2)
ggplot() + geom_point(aes(x=rep(y$value,3), y=x$value, color = x$f)) + labs(title='predictions', ylab='LR', xlab='RF')

4. Test: klasyfikacja obserwacji nowotworowych ze zbiorów RTCGA.

test <- testTumor(tcga[TCGA$tumor,], models, verbose=FALSE)

rss$test

Wyniki

perform(data = c('mRNA', 'methylation', ...), 
        group = c('meso','ecto','endo'), 
        G = c('SC','EB','ENDO', ...),
        FUN = function(x) rank(x, na.last='keep')/length(x),
        pcbc.dir = '~/RStemnessScorer_results/DATA/',
        meta_class = 'Diffname_short', meta_id = 'UID',
        nthreads = 3, ntrees = 5000) -> rss

names(rss)

## [1] "wmct_signature"  "wmct_plot"       "balance"         "glmnet_gg"      
## [5] "glmnet_features" "glmnet_coefs"    "models"          "validation"     
## [9] "test"

MESO_EB

## TableGrob (2 x 1) "arrange": 2 grobs
##   z     cells    name            grob
## 1 1 (1-1,1-1) arrange gtable[arrange]
## 2 2 (2-2,1-1) arrange gtable[arrange]

ENDO SC

## TableGrob (2 x 1) "arrange": 2 grobs
##   z     cells    name            grob
## 1 1 (1-1,1-1) arrange gtable[arrange]
## 2 2 (2-2,1-1) arrange gtable[arrange]

ENDO EB

## TableGrob (2 x 1) "arrange": 2 grobs
##   z     cells    name            grob
## 1 1 (1-1,1-1) arrange gtable[arrange]
## 2 2 (2-2,1-1) arrange gtable[arrange]

mRNA - MESO - SC

##            used  (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells  1672679  89.4    3886542  207.6   3886542  207.6
## Vcells 94740697 722.9  371018113 2830.7 285600182 2179.0

## TableGrob (2 x 1) "arrange": 2 grobs
##   z     cells    name            grob
## 1 1 (1-1,1-1) arrange gtable[arrange]
## 2 2 (2-2,1-1) arrange gtable[arrange]