rm(list = ls())
#setwd(F:/C1 20170725/lou/monocle)
getwd()
##package intall
install.packages(c(VGAM, irlba, matrixStats, igraph,
combinat, fastICA, grid, ggplot2,
reshape2, plyr, parallel, methods))
source(http://bioconductor.org/biocLite.R)
biocLite()
biocLite(monocle)
library(Biobase)
library(knitr)
library(reshape2)
library(ggplot2)
library(HSMMSingleCell)
library(monocle)
###read data
all2<-read.csv(zhouxia_cd19.csv,head=T,row.names = 1)
all2<-all2[,-26]
colnames(all2)
##phenomenon data and feather data
sample_sheet <- data.frame(row.names=colnames(all2), source = colnames(all2),state = c(rep(B1,9), rep(B2,16),rep(R,82)))
gene_annotation <- data.frame(row.names=rownames(all2), gene_short_name = rownames(all2),biotype=protein_coding)
pd <- new(AnnotatedDataFrame, data = sample_sheet)
fd <- new(AnnotatedDataFrame, data = gene_annotation)
##create HSMM
HSMM <- newCellDataSet(as.matrix(all2), phenoData = pd, featureData = fd)
HSMM <- detectGenes(HSMM, min_expr = 1)
print(head(fData(HSMM)))
# gene_short_name biotype num_cells_expressed
# RP11-34P13.7 RP11-34P13.7 protein_coding 5
# FO538757.1 FO538757.1 protein_coding 15
# AP006222.2 AP006222.2 protein_coding 6
# RP5-857K21.4 RP5-857K21.4 protein_coding 4
# LINC01128 LINC01128 protein_coding 13
# RP11-54O7.16 RP11-54O7.16 protein_coding 2
###secleting genes accroding expression levels at cells
expressed_genes <- row.names(subset(fData(HSMM), num_cells_expressed >= 40))
length(expressed_genes)
# [1] 3656
print(head(pData(HSMM)))
# source state Size_Factor num_genes_expressed
# Sb_01_ROW04 Sb_01_ROW04 befor NA 301
# Sb_01_ROW12 Sb_01_ROW12 befor NA 4417
# Sb_01_ROW17 Sb_01_ROW17 befor NA 2645
# Sb_01_ROW21 Sb_01_ROW21 befor NA 3261
# Sb_01_ROW25 Sb_01_ROW25 befor NA 772
# Sb_01_ROW26 Sb_01_ROW26 befor NA 768
HSMM
L <- log(exprs(HSMM[expressed_genes,]))
head(L[,1:5])
# Sb_01_ROW04 Sb_01_ROW12 Sb_01_ROW17 Sb_01_ROW21 Sb_01_ROW25
# ISG15 -Inf 3.734656 -Inf -Inf -Inf
# UBE2J2 -Inf 1.256210 -Inf -Inf -Inf
# AURKAIP1 -Inf -Inf -Inf -Inf -Inf
# CCNL2 -Inf -Inf -Inf -Inf -Inf
# SSU72 -Inf -Inf -Inf -Inf 0.5760293
# CDK11B -Inf -Inf -Inf 5.766534 -Inf
L[is.infinite(L)] <- log(10^-3) ###change -inf into log(10^-3)
head(L[,1:5])
# Sb_01_ROW04 Sb_01_ROW12 Sb_01_ROW17 Sb_01_ROW21 Sb_01_ROW25
# ISG15 -6.907755 3.734656 -6.907755 -6.907755 -6.9077553
# UBE2J2 -6.907755 1.256210 -6.907755 -6.907755 -6.9077553
# AURKAIP1 -6.907755 -6.907755 -6.907755 -6.907755 -6.9077553
# CCNL2 -6.907755 -6.907755 -6.907755 -6.907755 -6.9077553
# SSU72 -6.907755 -6.907755 -6.907755 -6.907755 0.5760293
##data melt
library(reshape)
melted_dens_df <- melt(t(scale(t(L))))
head(melted_dens_df)
# Var1 Var2 value
# 1 ISG15 Sb_01_ROW04 -0.5429312
# 2 UBE2J2 Sb_01_ROW04 -0.4120066
# 3 AURKAIP1 Sb_01_ROW04 -0.7943053
# 4 CCNL2 Sb_01_ROW04 -0.3876273
qplot(value, geom=density, data=melted_dens_df)
qplot(value, geom=density, data=melted_dens_df) + stat_function(fun = dnorm, size=0.5, color='red')
HSMM <- estimateSizeFactors(HSMM)
HSMM <- setOrderingFilter(HSMM, expressed_genes)
HSMM <- reduceDimension(HSMM,use_irlba=FALSE)
HSMM <- orderCells(HSMM, num_paths=3, reverse=TRUE)
plot_spanning_tree(HSMM,cell_size = 3)
diff_test_res <- differentialGeneTest(HSMM[expressed_genes,],fullModelFormulaStr=expression~Media)
head(diff_test_res)
# status family pval qval gene_short_name biotype num_cells_expressed
# A2MP1 FAIL negbinomial.size 1 1 A2MP1 protein_coding 165
# AAED1 FAIL negbinomial.size 1 1 AAED1 protein_coding 40
# AAK1 FAIL negbinomial.size 1 1 AAK1 protein_coding 61
# AAMP FAIL negbinomial.size 1 1 AAMP protein_coding 44
# AAR2 FAIL negbinomial.size 1 1 AAR2 protein_coding 54
# AARS FAIL negbinomial.size 1 1 AARS protein_coding 42
HSMM <- setOrderingFilter(HSMM, expressed_genes)
HSMM <- reduceDimension(HSMM, use_irlba=FALSE)
HSMM <- orderCells(HSMM, num_paths=2, reverse=TRUE)
plot_spanning_tree(HSMM,color_by=pData(HSMM)$state,cell_size = 4)