-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprojectionOfInsitu.Rmd
More file actions
119 lines (106 loc) · 3.67 KB
/
Copy pathprojectionOfInsitu.Rmd
File metadata and controls
119 lines (106 loc) · 3.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
---
title: "projectionOfInsitu"
author: "Gaurav"
date: "12/12/2019"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(CoGAPS)
library(pheatmap)
library(projectR)
raw.data = read.csv("dge_raw.txt",sep = "\t",header = F)
raw.data.genes = raw.data$V1
raw.data$V1 = NULL
print(grep("'",raw.data.genes,value = T,fixed = T))
raw.data.genes = gsub("'","",raw.data.genes,fixed = T)
raw.data = as.matrix(raw.data)
rownames(raw.data) = raw.data.genes
normalized.data = read.csv("dge_normalized.txt", sep = "\t")
print(grep("'",rownames(normalized.data),value = T,fixed = T))
normalized.data.genes<-gsub("'","",rownames(normalized.data),fixed = T)
normalized.data <- as.matrix(normalized.data)
insitu.matrix = read.csv("binarized_bdtnp.csv",check.names=F)
insitu.genes_orig <- colnames(insitu.matrix)
missingGenes = insitu.genes_orig[which(!insitu.genes_orig %in% normalized.data.genes)]
print(missingGenes)
insitu.genes = gsub(".","-",insitu.genes_orig,fixed = T)
insitu.genes = gsub("-spl-","(spl)",insitu.genes,fixed = T)
stopifnot(all(insitu.genes %in% raw.data.genes))
stopifnot(all(insitu.genes %in% normalized.data.genes))
insitu.matrix = as.matrix(insitu.matrix)
colnames(insitu.matrix) = insitu.genes
geometry <- read.csv("geometry.txt",sep = " ")
```
Load the cogaps result on the insitu matrix
```{r}
files <- list.files('insituCgps/')
files <- files[-1]
files <- paste0('insituCgps/',files)
insituCgps <- lapply(files,readRDS)
```
Do projections
```{r}
projections <- lapply(insituCgps,function(icgp) projectR(normalized.data,loadings = icgp,full = T))
```
```{r}
saveRDS(projections,'projectionsOfInsitu.rds')
```
Look at the projection
```{r}
pheatmap(projections[[1]]$projection)
```
### Test if patterns capture spatially close cells
Since these patterns were learnt from the insitu matrix which is the spatial distribution of the gene expression of 84 marker genes. These patterns can be thought of as spatial patterns. Thus, a quick test to check if the patterns are meaningful is to compare the mean distance of top quartile of cells with randomly chosen quarter of cells. If the patterns are spatially meaningful than the mean distance of top quartile will be significantly less than randomly chosen cells.
### Get the location of the cells
```{r}
library(DistMap)
dm = new("DistMap",
raw.data=raw.data,
data=normalized.data,
insitu.matrix=insitu.matrix,
geometry=as.matrix(geometry))
dm <- binarizeSingleCellData(dm, seq(0.15, 0.5, 0.01))
dm <- mapCells(dm)
# Assign location using highest mcc.scores
location <- sapply(1:ncol(dm@mcc.scores),function(j){
which.max(dm@mcc.scores[,j])
})
```
```{r}
distances <- dist(geometry[1:3039,])
dv <- as.vector(distances)
meanDistance <- mean(dv)
```
For nPatterns = 10 get the distances for top quartile cells
```{r}
meanDistPat <- sapply(1:10,function(i){
tp <- projections[[1]]$projection[i,]
tp <- unname(tp)
topQuanTp <- which(tp>quartile(tp,0.75))
distTp <- dist(geometry[location[topQuanTp],])
distTp <- as.vector(distTp)
mean(distTp)
})
```
Generate a sampling distribution of distances between top quartile cells
```{r warning=FALSE,message=F,error=FALSE}
randMeanDists <- sapply(1:1e4,function(i){
dr <- dist(geometry[sample(3039,324),])
mean(dr)
})
hist(randMeanDists,breaks = 100,main="Distribution of distances between random 1/4 of total cells")
meanRD <- mean(randMeanDists)
sdRD <- sd(randMeanDists)
```
With $H_0$ = Mean distance >= meanRD, and <br/>
$H_a$ = Mean distance < meanRD, and <br/>
$\alpha$ = 0.05
```{r}
pVals <- sapply(meanDistPat,function(md){
pnorm(md,mean = meanRD,sd=sdRD)
})
print(pVals)
sum(pVals*10<0.05)
```
8 out of 10 patterns are significant.