[11]:
data("ifnb")
# use seurat for variable gene selection
ifnb <- NormalizeData(ifnb, normalization.method = "LogNormalize", scale.factor = 10000)
ifnb[["percent.mt"]] <- PercentageFeatureSet(ifnb, pattern = "^MT-")
ifnb <- subset(ifnb, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
ifnb <- FindVariableFeatures(ifnb, selection.method = "vst", nfeatures = 2000)
top2000 <- head(VariableFeatures(ifnb), 2000)
ifnb <- ifnb[top2000]
adata <- sc$AnnData(
X = t(as.matrix(GetAssayData(ifnb, slot='counts'))),
obs = ifnb[[]]
print(adata)
# run seteup_anndata, use column stim for batch
scvi$data$setup_anndata(adata, batch_key = 'stim')
# create the model
model = scvi$model$SCVI(adata)
# train the model
model$train()
# to specify the number of epochs when training:
# model$train(n_epochs = as.integer(400))
latent = model$get_latent_representation()
# put it back in our original Seurat object
latent <- as.matrix(latent)
rownames(latent) = colnames(ifnb)
ifnb[['scvi']] <- CreateDimReducObject(embeddings = latent, key = "scvi_", assay = DefaultAssay(ifnb))
library(cowplot)
# for jupyter notebook
options(repr.plot.width=10, repr.plot.height=8)
ifnb <- RunUMAP(ifnb, dims = 1:10, reduction = 'scvi', n.components = 2)
p1 <- DimPlot(ifnb, reduction = "umap", group.by = "stim", pt.size=2)
plot_grid(p1)
10:10:30 UMAP embedding parameters a = 0.9922 b = 1.112
10:10:30 Read 13997 rows and found 10 numeric columns
10:10:30 Using Annoy for neighbor search, n_neighbors = 30
10:10:30 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
10:10:34 Writing NN index file to temp file /tmp/RtmpIX2mNg/file4cf831829dad
10:10:34 Searching Annoy index using 1 thread, search_k = 3000
10:10:43 Annoy recall = 100%
10:10:44 Commencing smooth kNN distance calibration using 1 thread
10:10:45 Initializing from normalized Laplacian + noise
10:10:46 Commencing optimization for 200 epochs, with 517208 positive edges
10:10:57 Optimization finished
options(repr.plot.width=12, repr.plot.height=10)
FeaturePlot(ifnb, features = c("CD3D", "SELL", "CREM", "CD8A", "GNLY", "CD79A", "FCGR3A",
"CCL2", "PPBP"), min.cutoff = "q9")
FeaturePlot(ifnb, features = c("GNLY", "IFI6"), split.by = "stim", max.cutoff = 3,
cols = c("grey", "red"))