1. Wilcoxon-Mann-Whitney Test
wmct <- WMCT(pcbc, tcga[TCGA$normal,], G='SC', njob=3, adjust='fdr')
signature <- signatureWMCT(wmct,n=7000)
rss$wmct_plot + scale_fill_manual(values=c("#999999", "salmon", "#56B4E9"))
2. Klasyfikacja
learn <- setupLearningSets(pcbc,tcga[TCGA$normal,], G='SC',
signature = signature, cutoff=0.7)
rss$balance
## [1] "healthy:SC 168:30" "healthy:SC 73:14"
2a. Regresja Logistyczna
objectiveFun <- c("Class","AUC","Deviance")
models <- lapply(objectiveFun, function(f)
buildScorer(learn$train$X, learn$train$Y, model="LR",
cv.measure=tolower(f), intercept=TRUE, standardize=FALSE,
njob = nthreads))
names(models) <- objectiveFun
grid.arrange(rss$glmnet_gg$Class,
rss$glmnet_gg$AUC + ylab(''),
rss$glmnet_gg$Deviance + ylab(''), ncol=3)
rss$glmnet_coefs
grid.arrange(rss$glmnet_features)
2b. Lasy losowe
rf <- buildScorer(learn$train$X, learn$train$Y,
model="RF", ntree=5000, njob = 3)
names(rss$models)
## [1] "Class" "AUC" "Deviance" "Random Forest"
rss$models$`Random Forest`
##
## Call:
## randomForest(x = xdata, y = ydata, ntree = ntree, nodesize = 1)
## Type of random forest: classification
## Number of trees: 5000
## No. of variables tried at each split: 83
3. Walidacja
scores <- scorer(learn$test$X, models$AUC)
grid.arrange(rss$validation$hist + theme(legend.position='bottom'),
rss$validation$roc + theme(legend.position='bottom'), ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Dodatkowe analizy
dane <- rss$validation$hist$data
df <- data.frame(dane, model.type = ifelse(dane$f == 'Random Forest', 'RF', 'LR'))
x = df[df$model.type == 'LR',c('value','f')]
y = df[df$model.type == 'RF',c('value','f')]
library(ggplot2)
ggplot() + geom_point(aes(x=rep(y$value,3), y=x$value, color = x$f)) + labs(title='predictions', ylab='LR', xlab='RF')
4. Test: klasyfikacja obserwacji nowotworowych ze zbiorów RTCGA.
test <- testTumor(tcga[TCGA$tumor,], models, verbose=FALSE)
rss$test
Wyniki
perform(data = c('mRNA', 'methylation', ...),
group = c('meso','ecto','endo'),
G = c('SC','EB','ENDO', ...),
FUN = function(x) rank(x, na.last='keep')/length(x),
pcbc.dir = '~/RStemnessScorer_results/DATA/',
meta_class = 'Diffname_short', meta_id = 'UID',
nthreads = 3, ntrees = 5000) -> rss
names(rss)
## [1] "wmct_signature" "wmct_plot" "balance" "glmnet_gg"
## [5] "glmnet_features" "glmnet_coefs" "models" "validation"
## [9] "test"
MESO_EB
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[arrange]
## 2 2 (2-2,1-1) arrange gtable[arrange]
ENDO SC
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[arrange]
## 2 2 (2-2,1-1) arrange gtable[arrange]
ENDO EB
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[arrange]
## 2 2 (2-2,1-1) arrange gtable[arrange]
mRNA - MESO - SC
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1672679 89.4 3886542 207.6 3886542 207.6
## Vcells 94740697 722.9 371018113 2830.7 285600182 2179.0
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[arrange]
## 2 2 (2-2,1-1) arrange gtable[arrange]