A collection of main functions useful in R
Higher category: 【RStudio】 R Studio Index, 【Bioinformatics】 Bioinformatics Analysis Index
1. Overview
2. Basics
3. File I/O
5. gsub
a. GitHub (executable .R file)
1. Overview
⑴ The functions defined below can be called as follows.
source("https://github.com/JB243/nate9389/blob/main/RStudio/A_Collection_of_Useful_Functions_in_RStudio.R?raw=true")
① However, the following problems may occur due to the above code.
② Problem 1. Error in sum (dim(scRNaseqData):argument “b” is missing, with no default
③ Problem 2. “do.call” r argument “b” is missing, with no default
⑵ .R file can be used as follows:
Rscript /numbat/inst/bin/pileup_and_phase.R \
--label possorted_genome_bam \
--samples possorted_genome_bam \
--bams possorted_genome_bam.bam \
--barcodes possorted_genome_bam_barcodes.tsv \
--outdir possorted_genome_bam \
--gmap /Eagle_v2.4.1/tables/genetic_map_hg38_withX.txt.gz \
--snpvcf /data/genome1K.phase3.SNP_AF5e2.chr1toX.hg38.vcf \
--paneldir /data/1000G_hg38 \
--ncores 1
⑶ The functions defined below can be used as follows:
SUM <- function(a, b){
return (a + b)
}
A <- 1
B <- 2
print(SUM(A, B))
# 3
2. Basics
⑴ Check data type
# x is a variable
typeof(x)
⑵ A function that fills in the number of zeros before a natural number to give a total of five digits.
n = 55
sprintf("%05d", n)
⑶ A function waiting for enter to hit. (reference)
readline(prompt="Press [enter] to continue")
⑷ Multiple processing
# Reference : https://cran.r-project.org/web/packages/future/vignettes/future-3-topologies.html
plan("multisession", workers = 10)
⑸ %in% operator
x <- c('A','B','C','D','E','F','G')
x %in% c('F','G')
# FALSE FALSE FALSE FALSE FALSE TRUE TRUE
3. File I/O
⑴ Check whether destination folder exists
if (!file.exists(destination.folder)) {
stop(.wrap("The destination folder could not be found. Please change",
"the path specified in", sQuote(destination.folder)))
}
⑵ Check for write permissions in the destination folder
if (file.access(destination.folder, 2) == -1) {
stop(.wrap("You do not have write permission in the destination",
"folder."))
⑶ Create a folder of ‘Filtered_bams’ under ‘destination.folder’
destination.folder <- file.path(destination.folder, "Filtered_bams")
tryCatch({
if (!file.exists(file.path(destination.folder))) {
dir.create(file.path(destination.folder),
recursive = TRUE)
} else {
stop(.wrap("The folder",
sQuote(file.path(destination.folder, "Filtered_bams")),
"already exists. Please remove it, or (in case you",
"still need it), rename it to prevent files from being",
"overwritten."))
}
}, warning = function(e) {
stop(.wrap("You do not have write permissions in the destination",
"folder. Stopping execution of the remaining part of the",
"script..."))
})
⑷ Code to read a .gz
file and decompress it
tmp = readMM('matrix.mtx.gz')
writeMM(tmp, 'matrix.mtx')
4. Vectors and Matrices
⑴ Sort the given vectors in the order of a, b, and c.
my_vec <- c('c', 'b', 'a')
print( my_vec[order(my_vec)] )
# "a" "b" "c"
⑵ Whether a given string (given_str) contains a particular string (partial_str): grepl (reference)
grepl(partial_str, given_str, fixed = TRUE)
⑶ An index output containing a particular string “str” from a given string vector.
has_string_arr <- function(str, arr){
flag <- 0
ar <- array()
for(i in 1:length(arr)){
if( grepl(str, arr[i], fixed=TRUE) ){
flag <- flag + 1
ar[flag] <- i
}
}
return(ar)
}
⑷ Intersection of three vectors
tri_intersect <- function(A, B, C){
a <- intersect(A, B)
b <- intersect(a, C)
return(b)
}
⑸ A function that converts the NA portion of a given vector into zero
trim_na <- function(vector){
for(i in 1: length(vector)){
if(is.na(vector[i])){
vector[i] = 0
}
}
return (vector)
}
⑹ A function that excludes the NA portion of a given vector
ignore_na <- function(vector){
flag = 0
ar = array()
for(i in 1: length(vector)){
if(! is.na(vector[i])){
flag = flag + 1
ar[flag] = vector[i]
}
}
return (ar)
}
⑺ A function that replaces a particular element with another element in a given vector
replace_in_vector <- function(v, from_element, to_element){
ar <- array(dim = length(v))
for(i in 1 : length(v)){
if(v[i] == from_element){
ar[i] = to_element
} else{
ar[i] = v[i]
}
}
return(ar)
}
⑻ A function that returns the left corner of a matrix or data frame
corner <- function(x, num = 10){
return(x[1:min( num, dim(x)[1] ),
1:min( num, dim(x)[2] )])
}
⑼ A function that outputs cbind when given any two matrices
my.cbind <- function(data1, data2){
a <- intersect(rownames(data1), rownames(data2))
return (cbind(data1[a, ], data2[a, ]))
}
⑽ A function that outputs rbind when given any two matrices
my.rbind <- function(data1, data2){
a <- intersect(colnames(data1), colnames(data2))
return (rbind(data1[, a], data2[, a]))
}
⑾ A function that outputs lines A and B in a given matrix or data frame
switch_A_B_row <- function(mat0, A, B){
mat <- mat0
mat[A, ] = mat0[B, ]
mat[B, ] = mat0[A, ]
rownames(mat)[A] = rownames(mat0)[B]
rownames(mat)[B] = rownames(mat0)[A]
return(mat)
}
⑿ A function that switches columns A and B in a given matrix or data frame
switch_A_B_col <- function(mat0, A, B){
mat <- mat0
mat[, A] = mat0[, B]
mat[, B] = mat0[, A]
colnames(mat)[A] = colnames(mat0)[B]
colnames(mat)[B] = colnames(mat0)[A]
return(mat)
}
5. gsub
⑴ A function that removes the hyphen (-) from a given string
eliminate_hyphene <- function(str){
return(gsub("-", "", str))
}
⑵ A function of removing special symbols such as “(“) from a given string (reference)
eliminate_symbol <- function(str){
return(gsub("(", "", str, fixed = TRUE))
}
⑶ A function that removes all characters after x and x from a given string (reference)
eliminate_backward <- function(str){
return(gsub('x.*$', '', str))
}
⑷ A function that removes all numbers following . and . from a given string, for “ABCD.123”
eliminate_backward <- function(str){
return(gsub('*.[0-9]*$', '', str))
}
⑸ A function that removes all characters before x and x from a given string
eliminate_forward <- function(str){
return(gsub('.*x', '', str))
}
⑹ A function that fills the characters from start to end with “x” in a given string-type variable (‘given_str’) and returns them
str_substitute <- function(given_str, start, end){
library(stringr)
library(stringi)
result = paste0(
str_sub(given_str, 1, start-1),
stri_dup("x", (end-start+1) ),
str_sub(given_str, end+1, str_length(given_str))
)
return(result)
}
⑺ Code for collecting numbers following a particular pattern within a given string (‘given_str’)
number_after_pattern_in_str <- function(given_str, pattern){
# reference : https://cran.r-project.org/web/packages/stringr/vignettes/stringr.html
library(stringr)
phone1 <- paste0(pattern, '([1-9]{1})')
phone2 <- paste0(pattern, '([1-9][0-9]{1})')
phone3 <- paste0(pattern, '([1-9][0-9]{2})')
phone4 <- paste0(pattern, '([1-9][0-9]{3})')
arr <- array()
flag = 0
for(i in 1 : dim(str_locate_all(given_str, phone4)[[1]])[1] ){
if (dim(str_locate_all(given_str, phone4)[[1]])[1] > 0){
flag <- flag + 1
arr[flag] = str_sub(
given_str,
str_locate_all(given_str, phone4)[[1]][1, 1], #start
str_locate_all(given_str, phone4)[[1]][1, 2] #end
)
given_str = str_substitute(
given_str,
str_locate_all(given_str, phone4)[[1]][1, 1], #start
str_locate_all(given_str, phone4)[[1]][1, 2] #end
)
}
}
for(i in 1 : dim(str_locate_all(given_str, phone3)[[1]])[1] ){
if (dim(str_locate_all(given_str, phone3)[[1]])[1] > 0){
flag <- flag + 1
arr[flag] = str_sub(
given_str,
str_locate_all(given_str, phone3)[[1]][1, 1], #start
str_locate_all(given_str, phone3)[[1]][1, 2] #end
)
given_str = str_substitute(
given_str,
str_locate_all(given_str, phone3)[[1]][1, 1], #start
str_locate_all(given_str, phone3)[[1]][1, 2] #end
)
}
}
for(i in 1 : dim(str_locate_all(given_str, phone2)[[1]])[1] ){
if (dim(str_locate_all(given_str, phone2)[[1]])[1] > 0){
flag <- flag + 1
arr[flag] = str_sub(
given_str,
str_locate_all(given_str, phone2)[[1]][1, 1], #start
str_locate_all(given_str, phone2)[[1]][1, 2] #end
)
given_str = str_substitute(
given_str,
str_locate_all(given_str, phone2)[[1]][1, 1], #start
str_locate_all(given_str, phone2)[[1]][1, 2] #end
)
}
}
for(i in 1 : dim(str_locate_all(given_str, phone1)[[1]])[1] ){
if (dim(str_locate_all(given_str, phone1)[[1]])[1] > 0){
flag <- flag + 1
arr[flag] = str_sub(
given_str,
str_locate_all(given_str, phone1)[[1]][1, 1], #start
str_locate_all(given_str, phone1)[[1]][1, 2] #end
)
given_str = str_substitute(
given_str,
str_locate_all(given_str, phone1)[[1]][1, 1], #start
str_locate_all(given_str, phone1)[[1]][1, 2] #end
)
}
}
arr <- str_sub(arr, str_length(pattern)+1, str_length(arr))
arr <- as.numeric(arr)
return(arr)
}
6. Statistical Analysis
⑴ Code for percentile quantiles from 10% to 90%
# reference: https://www.statology.org/percentiles-in-r/
quantile(data, probs = seq(.1, .9, by = .1))
⑵ A value obtained by taking log10 for n!
log10_factorial <- function(n){
if(n == 0){
return(0)
}
out <- 0
for(i in 1 : n){
out <- out + log(i) / log(10)
}
return(out)
}
⑶ A function to obtain the binomial coefficient nCk, which is the number of cases where k out of n is selected: Improves code because simply using factorial can create inf
my.combination <- function(n, k){
# return nCk = n! / ((n-k)! k!)
if (n == k || n == 0 || k == 0){
return(1)
}
A = log10_factorial(n)
B = log10_factorial(n-k)
C = log10_factorial(k)
log10_nCk = A - B - C
return(10^(log10_nCk))
}
⑷ t test between two groups
t.test(v1, v2, paired = FALSE)
# maybe you can activate 'paired' in a special condition
⑸ ANOVA test between two groups
one_way_2_factor_anova <- function(v1, v2){
dat <- matrix(0, nrow = ( length(v1) + length(v2) ), ncol = 2 )
for(i in 1 : length(v1) ){
dat[i, 1] <- v1[i]
dat[i, 2] <- 'v1'
}
for(i in 1 : length(v2) ){
dat[i + length(v1), 1] <- v2[i]
dat[i + length(v1), 2] <- 'v2'
}
dat <- as.data.frame(dat)
colnames(dat) <- c('val', 'factor')
anova_IS <- aov(val ~ factor, data = dat)
print(summary(anova_IS))
anova_residuals <- anova_IS$residuals
print(summary(anova_residuals))
}
⑹ ANOVA test among three groups
one_way_3_factor_anova <- function(v1, v2, v3){
dat <- matrix(0, nrow = ( length(v1) + length(v2) + length(v3) ), ncol = 2 )
for(i in 1 : length(v1) ){
dat[i, 1] <- v1[i]
dat[i, 2] <- 'v1'
}
for(i in 1 : length(v2) ){
dat[i + length(v1), 1] <- v2[i]
dat[i + length(v1), 2] <- 'v2'
}
for(i in 1 : length(v3) ){
dat[i + length(v1) + length(v2), 1] <- v3[i]
dat[i + length(v1) + length(v2), 2] <- 'v3'
}
dat <- as.data.frame(dat)
colnames(dat) <- c('val', 'factor')
anova_IS <- aov(val ~ factor, data = dat)
print(summary(anova_IS))
anova_residuals <- anova_IS$residuals
print(summary(anova_residuals))
}
⑺ ANOVA test among four groups
one_way_4_factor_anova <- function(v1, v2, v3, v4){
dat <- matrix(0, nrow = ( length(v1) + length(v2) + length(v3) + length(v4) ), ncol = 2 )
for(i in 1 : length(v1) ){
dat[i, 1] <- v1[i]
dat[i, 2] <- 'v1'
}
for(i in 1 : length(v2) ){
dat[i + length(v1), 1] <- v2[i]
dat[i + length(v1), 2] <- 'v2'
}
for(i in 1 : length(v3) ){
dat[i + length(v1) + length(v2), 1] <- v3[i]
dat[i + length(v1) + length(v2), 2] <- 'v3'
}
for(i in 1 : length(v4) ){
dat[i + length(v1) + length(v2) + length(v3), 1] <- v4[i]
dat[i + length(v1) + length(v2) + length(v3), 2] <- 'v4'
}
dat <- as.data.frame(dat)
colnames(dat) <- c('val', 'factor')
anova_IS <- aov(val ~ factor, data = dat)
print(summary(anova_IS))
anova_residuals <- anova_IS$residuals
print(summary(anova_residuals))
}
⑻ ANOVA test among five groups
one_way_5_factor_anova <- function(v1, v2, v3, v4, v5){
dat <- matrix(0, nrow = ( length(v1) + length(v2) + length(v3) + length(v4) + length(v5) ), ncol = 2 )
for(i in 1 : length(v1) ){
dat[i, 1] <- v1[i]
dat[i, 2] <- 'v1'
}
for(i in 1 : length(v2) ){
dat[i + length(v1), 1] <- v2[i]
dat[i + length(v1), 2] <- 'v2'
}
for(i in 1 : length(v3) ){
dat[i + length(v1) + length(v2), 1] <- v3[i]
dat[i + length(v1) + length(v2), 2] <- 'v3'
}
for(i in 1 : length(v4) ){
dat[i + length(v1) + length(v2) + length(v3), 1] <- v4[i]
dat[i + length(v1) + length(v2) + length(v3), 2] <- 'v4'
}
for(i in 1 : length(v5) ){
dat[i + length(v1) + length(v2) + length(v3) + length(v4), 1] <- v5[i]
dat[i + length(v1) + length(v2) + length(v3) + length(v4), 2] <- 'v5'
}
dat <- as.data.frame(dat)
colnames(dat) <- c('val', 'factor')
anova_IS <- aov(val ~ factor, data = dat)
print(summary(anova_IS))
anova_residuals <- anova_IS$residuals
print(summary(anova_residuals))
}
⑼ ANOVA test among six groups
one_way_6_factor_anova <- function(v1, v2, v3, v4, v5, v6){
dat <- matrix(0, nrow = ( length(v1) + length(v2) + length(v3) + length(v4) + length(v5) + length(v6) ), ncol = 2 )
for(i in 1 : length(v1) ){
dat[i, 1] <- v1[i]
dat[i, 2] <- 'v1'
}
for(i in 1 : length(v2) ){
dat[i + length(v1), 1] <- v2[i]
dat[i + length(v1), 2] <- 'v2'
}
for(i in 1 : length(v3) ){
dat[i + length(v1) + length(v2), 1] <- v3[i]
dat[i + length(v1) + length(v2), 2] <- 'v3'
}
for(i in 1 : length(v4) ){
dat[i + length(v1) + length(v2) + length(v3), 1] <- v4[i]
dat[i + length(v1) + length(v2) + length(v3), 2] <- 'v4'
}
for(i in 1 : length(v5) ){
dat[i + length(v1) + length(v2) + length(v3) + length(v4), 1] <- v5[i]
dat[i + length(v1) + length(v2) + length(v3) + length(v4), 2] <- 'v5'
}
for(i in 1 : length(v6) ){
dat[i + length(v1) + length(v2) + length(v3) + length(v4) + length(v5), 1] <- v6[i]
dat[i + length(v1) + length(v2) + length(v3) + length(v4) + length(v5), 2] <- 'v6'
}
dat <- as.data.frame(dat)
colnames(dat) <- c('val', 'factor')
anova_IS <- aov(val ~ factor, data = dat)
print(summary(anova_IS))
anova_residuals <- anova_IS$residuals
print(summary(anova_residuals))
}
⑽ Investigate FC, p value of two vectors
comparison_of_two_vectors <- function(v1, v2, paired = FALSE){
p.val = t.test(v1, v2, paired = paired)
print(p.val)
log2FC = log( mean(v1 + 0.000000000001)/mean(v2 + 0.000000000001) ) / log(2)
print(log2FC)
}
⑾ How to test the statistical equivalence of two sets using Fisher’s effect test (ver. 1)
total.gene <- 32285
ST <- 31
scRNAseq <- 14
cross <- 5
a <- cross
b <- scRNAseq - a
c <- ST - a
d <- total.gene - a - b - c
A <- a + b
B <- c + d
C <- a + c
D <- b + d
group<-c("A","A","B","B")
cancer<-c("1.Yes","2.No","1.Yes","2.No")
count<-c(a,b,c,d)
dat<-data.frame(group,cancer,count)
tab<-xtabs(count~group+cancer,data=dat)
tab
chisq.test(tab)$observed
chisq.test(tab)$expected
fisher.test(tab)
-log(fisher.test(tab)$p.value, 10)
if(cross > ST * scRNAseq / total.gene){
print("Enrichment")
} else if(cross < ST * scRNAseq / total.gene){
print("Depletion")
}
⑿ How to test the statistical equivalence of two sets using Fisher’s effect test (ver. 2)
my.Fisher.exact.test <- function(total, A, B, cross){
a1 <- log10_factorial(A)
a2 <- log10_factorial(total - A)
a3 <- log10_factorial(B)
a4 <- log10_factorial(total - B)
b1 <- log10_factorial(cross)
b2 <- log10_factorial(A - cross)
b3 <- log10_factorial(B - cross)
b4 <- log10_factorial(total - cross - (A - cross) - (B - cross))
b5 <- log10_factorial(total)
out = a1 + a2 + a3 + a4 - b1 - b2 - b3 - b4 - b5
return(10^out)
}
⒀ MIA assay (enrichment)
my.MIA.assay.enrichment <- function(total, A, B, cross){
out <- 0
for(i in cross:min(A, B)){
out = out + my.Fisher.exact.test(total, A, B, i)
}
return(out)
}
⒁ MIA assay (depletion)
my.MIA.assay.depletion <- function(total, A, B, cross){
out <- 0
for(i in 0:cross-1){
out = out + my.Fisher.exact.test(total, A, B, i)
}
return(out)
}
7. Bioinformatics
⑴ Explore genes that begin with a specific keyword
gene_starting_with <- function(keyword){
human = read.csv("https://blog.kakaocdn.net/dn/29YTj/btrS5iG9QOH/Di6RQKxHOPDii7EjkdHN30/human_genes_36601.tsv?attach=1&knm=tfile.tsv", sep = '\t', header = F)
mouse = read.csv("https://blog.kakaocdn.net/dn/wkjwJ/btrS1QSgrpD/VS8ELANCQyeZAA3vL8JQP0/mouse_genes_32285.tsv?attach=1&knm=tfile.tsv", sep = '\t', header = F)
ar = array()
flag = 0
if(keyword == toupper(keyword)){ # human genes
for(i in 1:dim(human)[1]){
if(grepl(keyword, human[i, 2], fixed = TRUE)){
flag = flag + 1
ar[flag] = human[i, 2]
}
}
}
else{ # mouse genes
for(i in 1:dim(mouse)[1]){
if(grepl(keyword, mouse[i, 2], fixed = TRUE)){
flag = flag + 1
ar[flag] = mouse[i, 2]
}
}
}
return(ar)
}
⑵ Conversion between enembl_gene_id and gene_symbol
① convert from ensembl.gene to gene.symbol
library(EnsDb.Hsapiens.v79)
ensembl.genes <- c("ENSG00000150676", "ENSG00000099308", "ENSG00000142676", "ENSG00000180776", "ENSG00000108848", "ENSG00000277370", "ENSG00000103811", "ENSG00000101473")
geneIDs1 <- ensembldb::select(EnsDb.Hsapiens.v79, keys= ensembl.genes, keytype = "GENEID", columns = c("SYMBOL","GENEID"))
② Convert from enembl.gene to gene.symbol (using the table)
ensembl_to_gene <- function(ensembl_list){
ar = array(dim = length(ensembl_list))
human = read.csv("https://blog.kakaocdn.net/dn/29YTj/btrS5iG9QOH/Di6RQKxHOPDii7EjkdHN30/human_genes_36601.tsv?attach=1&knm=tfile.tsv", sep = '\t', header = F)
mouse = read.csv("https://blog.kakaocdn.net/dn/wkjwJ/btrS1QSgrpD/VS8ELANCQyeZAA3vL8JQP0/mouse_genes_32285.tsv?attach=1&knm=tfile.tsv", sep = '\t', header = F)
for(i in 1:length(ensembl_list)){
if(grepl('ENSG', ensembl_list[i], fixed = TRUE)){ # human gene
index = match(ensembl_list[i], human[, 1])
ar[i] = human[index, 2]
}
else if(grepl('ENSMUSG', ensembl_list[i], fixed = TRUE)){ # mouse gene
index = match(ensembl_list[i], mouse[, 1])
ar[i] = mouse[index, 2]
}
}
return(ar)
}
③ Convert from gene.symbol to ensembl.gene
library(EnsDb.Hsapiens.v79)
geneSymbols <- c('DDX26B','CCDC83', 'MAST3', 'RPL11', 'ZDHHC20', 'LUC7L3', 'SNORD49A', 'CTSH', 'ACOT8')
geneIDs2 <- ensembldb::select(EnsDb.Hsapiens.v79, keys= geneSymbols, keytype = "SYMBOL", columns = c("SYMBOL","GENEID"))
④ Convert from gen.symbol to enembl.gen (using the table)
gene_to_ensembl <- function(gene_list){
ar = array(dim = length(gene_list))
human = read.csv("https://blog.kakaocdn.net/dn/29YTj/btrS5iG9QOH/Di6RQKxHOPDii7EjkdHN30/human_genes_36601.tsv?attach=1&knm=tfile.tsv", sep = '\t', header = F)
mouse = read.csv("https://blog.kakaocdn.net/dn/wkjwJ/btrS1QSgrpD/VS8ELANCQyeZAA3vL8JQP0/mouse_genes_32285.tsv?attach=1&knm=tfile.tsv", sep = '\t', header = F)
for(i in 1:length(gene_list)){
if(gene_list[i] == toupper(gene_list[i])){ # human gene
index = match(gene_list[i], human[, 2])
ar[i] = human[index, 1]
}
else{ # mouse gene
index = match(gene_list[i], mouse[, 2])
ar[i] = mouse[index, 1]
}
}
# return(ignore_na(ar))
## if possible, ignore_na should be used
return(ar)
}
⑤ human ensembl transcript to gene name (ref.)
library(biomaRt)
mart <- useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
ensembl_transcript_to_gene <- function(transcript_ids){
# reference : https://support.bioconductor.org/p/106253/#106256
res <- getBM(attributes = c('ensembl_transcript_id_version',
'ensembl_gene_id',
'external_transcript_name',
'external_gene_name'),
filters = 'ensembl_transcript_id_version',
values = transcript_ids,
mart = mart)
if(dim(res)[1] == 0){
return("")
}
return(res[, 'external_gene_name'])
}
⑥ mouse gene to MGI symbol (using the table)
mouse_gene_to_MGI <- function(mouse_gene_list){
ar = array(dim = length(mouse_gene_list))
dat <- read.csv("https://blog.kakaocdn.net/dn/cVeqsA/btrS1JMnxyX/HtVhPmqtxdgt7LQlGkeql0/HOM_MouseHumanSequence.csv?attach=1&knm=tfile.csv")
for(i in 1:length(mouse_gene_list)){
index = match(mouse_gene_list[i], dat[,'Symbol'])
ar[i] = dat[index, 'Mouse.MGI.ID']
}
return(ar)
}
⑦ MGI symbol to mouse gene (using the table)
MGI_to_mouse_gene <- function(MGI_list){
ar = array(dim = length(MGI_list))
dat <- read.csv("https://blog.kakaocdn.net/dn/cVeqsA/btrS1JMnxyX/HtVhPmqtxdgt7LQlGkeql0/HOM_MouseHumanSequence.csv?attach=1&knm=tfile.csv")
for(i in 1:length(MGI_list)){
index = match(MGI_list[i], dat[,'Mouse.MGI.ID'])
ar[i] = dat[index, 'Symbol']
}
return(ar)
}
⑧ human gene to HGNC symbol (using the table)
human_gene_to_HGNC <- function(human_gene_list){
ar = array(dim = length(human_gene_list))
dat <- read.csv("https://blog.kakaocdn.net/dn/cVeqsA/btrS1JMnxyX/HtVhPmqtxdgt7LQlGkeql0/HOM_MouseHumanSequence.csv?attach=1&knm=tfile.csv")
for(i in 1:length(human_gene_list)){
index = match(human_gene_list[i], dat[,'Symbol'])
ar[i] = dat[index, 'HGNC.ID']
}
return(ar)
}
⑨ HGNC symbol to human gene (using the table)
HGNC_to_human_gene <- function(HGNC_list){
ar = array(dim = length(HGNC_list))
dat <- read.csv("https://blog.kakaocdn.net/dn/cVeqsA/btrS1JMnxyX/HtVhPmqtxdgt7LQlGkeql0/HOM_MouseHumanSequence.csv?attach=1&knm=tfile.csv")
for(i in 1:length(HGNC_list)){
index = match(HGNC_list[i], dat[,'HGNC.ID'])
ar[i] = dat[index, 'Symbol']
}
return(ar)
}
⑩ Transformation table for animals other than humans and mice
⑶ Affymetrix probe ID, enembl_gen_id, gain_name
① convert Affymetrix probe ID to ensembl_gene_id, gene_name
#Convert Affymetrix probe ID to ensembl_gene_id, gene_name
## https://www.biostars.org/p/328065/#328328
## https://www.biostars.org/p/332461/#332474
BiocManager::install("biomaRt", force=TRUE)
library(biomaRt)
dat<-c('1007_s_at', '1053_at', '117_at', '121_at', '1255_g_at', '1294_at')
require("biomaRt")
mart <- useMart("ENSEMBL_MART_ENSEMBL")
mart <- useDataset("hsapiens_gene_ensembl", mart)
annotLookup <- getBM(
mart = mart,
attributes = c(
"affy_hg_u133_plus_2",
"ensembl_gene_id",
"gene_biotype",
"external_gene_name"),
filter = "affy_hg_u133_plus_2",
values = dat,
uniqueRows=TRUE)
⑷ Gene homology between human and mouse
① Overview
○ Human genes are 36601
○ There are 32285 genes in the mouse.
○ Human and mouse genes are almost the same, but there are many differences.
○ Therefore, it is necessary to know how to convert human genes into mouse genes or vice versa.
○ In many cases, the mouse gene is not a human gene that simply capitalizes the mouse gene.
② Method 1. BiomaRt
install.packages("BiocManager")
BiocManager::install("biomaRt")
library(biomaRt)
genes <- c("Xkr4", "Gm1992", "Gm37381")
human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl")
hh <- getLDS(attributes = c("mgi_symbol", "ensembl_gene_id", "entrezgene_id"), filters = "mgi_symbol", values = genes, mart = mouse, attributesL =
c("hgnc_symbol", "ensembl_gene_id", "entrezgene_id"), martL = human, uniqueRows = T)
○ attribute 1. Determine the correspondence between MGI symbol and HGNC symbol.
○ attribute 2. Determine the corresponding relationship between ENSMUSG code and ENSG code.
○ attribute 3. Determine the correspondence between NCBI gain ID and NCBI gain ID.
○ For each attribute, information about the mouse on the left and information about the person on the right.
○ currently inoperative: Currently, the following error message has been found and is not working.
#1. Error in getLDS(attributes = c("mgi_symbol", "ensembl_gene_id", "entrezgene_id"), : Query ERROR: caught BioMart::Exception::Database: Could not connect to mysql database ensembl_mart_106: DBI connect ('database =ensembl_mart_106;host=127.0.0.1;port=5316','ensro',...) failed: Can't connect to MySQL server on '127.0.0.1' (111) at /nfs/public/ro/ensweb/live/mart/www_106/biomart-perl/lib/BioMart/Configuration/DBLocation.pm line 98.
#2. Error in textConnection(attrfilt) : invalid 'text' argument
#3. Ensembl site unresponsive, trying useast mirror. Ensembl site unresponsive, trying asia mirror
#4. Error: biomaRt has encountered an unexpected server error. Consider trying one of the Ensembl mirrors (for more details look at ?useEnsembl)
③ Method 2. Use of the human-mouse conversion table provided on the MGI website
human_to_mouse <- function(human_gene){
hom <- read.csv("https://blog.kakaocdn.net/dn/cVeqsA/btrS1JMnxyX/HtVhPmqtxdgt7LQlGkeql0/HOM_MouseHumanSequence.csv?attach=1&knm=tfile.csv")
mouse_gene = array()
flag = 0
for(i in 1 : length(human_gene)){
index = match(human_gene[i], hom[hom$Common.Organism.Name == 'human', 'Symbol'])
key = hom[hom$Common.Organism.Name == 'human', 'DB.Class.Key'][index]
flag = flag + 1
mouse_gene[flag] = hom[hom$DB.Class.Key == key
& hom$Common.Organism.Name == 'mouse, laboratory'
, 'Symbol'][1] # duplicate mouse genes can be found
}
return(mouse_gene)
}
mouse_to_human <- function(mouse_gene){
hom <- read.csv("https://blog.kakaocdn.net/dn/cVeqsA/btrS1JMnxyX/HtVhPmqtxdgt7LQlGkeql0/HOM_MouseHumanSequence.csv?attach=1&knm=tfile.csv")
human_gene = array()
flag = 0
for(i in 1 : length(mouse_gene)){
index = match(mouse_gene[i], hom[hom$Common.Organism.Name == 'mouse, laboratory', 'Symbol'])
key = hom[hom$Common.Organism.Name == 'mouse, laboratory', 'DB.Class.Key'][index]
flag = flag + 1
human_gene[flag] = hom[hom$DB.Class.Key == key
& hom$Common.Organism.Name == 'human'
, 'Symbol'][1] # duplicate human genes can be found
}
return(human_gene)
}
⑸ chromosome position to hgnc_symbol
ChromosomePosition_to_hgnc_symbol <- function(chromosome, start, end){
# reference : https://support.bioconductor.org/p/127035/
library(biomaRt)
positions <- data.frame(chromosome = chromosome,
start = start,
end = end)
ensembl = useEnsembl(biomart='ensembl',
dataset="hsapiens_gene_ensembl")
results <- getBM(attributes = c("hgnc_symbol", "chromosome_name", "start_position", "end_position"),
filters = c("chromosome_name", "start", "end"),
values = list(positions[,1], positions[,2], positions[,3]),
mart = ensembl)
print(results)
postions_combined <- apply(as.matrix(positions), 1, paste, collapse = ":")
results2 <- getBM(attributes = c("hgnc_symbol", "chromosome_name", "start_position", "end_position"),
filters = c("chromosomal_region"),
values = postions_combined,
mart = ensembl)
print(results2)
}
⑹ gene name to chromosome position: Use of the conversion table(human, mouse)
gene_to_chromosome_position <- function(gene_list){
# gene_list : list of genes
human <- read.csv("https://blog.kakaocdn.net/dn/lTbKq/btrWjlmGho8/eWwWHbfLOlVGKAVeuDMKt1/human%20gene%20annotation.csv?attach=1&knm=tfile.csv")
mouse = read.csv("https://blog.kakaocdn.net/dn/clSwT7/btrWcrWmS41/mNLCUuBlQxfJFhG1U2JQNk/mouse%20gene%20annotation.csv?attach=1&knm=tfile.csv")
result = array()
for(i in 1:length(gene_list)){
if(gene_list[i] == toupper(gene_list[i])){ # human gene
idx = match(gene_list[i], human[, 1])
note = paste('Gene.ID: ', gene_list[i],
', chromosome: ', human[idx, 'chromosome'],
', start: ', human[idx, 'start'],
', end: ', human[idx, 'end'])
result[i] = note
}
else{ # mouse gene
idx = match(gene_list[i], mouse[, 1])
note = paste('Gene.ID: ', gene_list[i],
', chromosome: ', mouse[idx, 'chromosome'],
', start: ', mouse[idx, 'start'],
', end: ', mouse[idx, 'end'])
result[i] = note
}
}
return(result)
}
⑺ gene name to description
gene_to_description <- function(gene_list){
# gene_list : list of genes
human <- read.csv("https://blog.kakaocdn.net/dn/lTbKq/btrWjlmGho8/eWwWHbfLOlVGKAVeuDMKt1/human%20gene%20annotation.csv?attach=1&knm=tfile.csv")
mouse = read.csv("https://blog.kakaocdn.net/dn/clSwT7/btrWcrWmS41/mNLCUuBlQxfJFhG1U2JQNk/mouse%20gene%20annotation.csv?attach=1&knm=tfile.csv")
result = array()
for(i in 1:length(gene_list)){
if(gene_list[i] == toupper(gene_list[i])){ #human
idx = match(gene_list[i], human[, 1])
result[i] = human[idx, 'Description']
}
else{ #mouse
idx = match(gene_list[i], mouse[, 1])
result[i] = mouse[idx, 'Description']
}
}
return(result)
}
⑻ gene name to bioType
gene_to_bioType <- function(gene_list){
# gene_list : list of mouse genes
human <- read.csv("https://blog.kakaocdn.net/dn/lTbKq/btrWjlmGho8/eWwWHbfLOlVGKAVeuDMKt1/human%20gene%20annotation.csv?attach=1&knm=tfile.csv")
mouse = read.csv("https://blog.kakaocdn.net/dn/clSwT7/btrWcrWmS41/mNLCUuBlQxfJFhG1U2JQNk/mouse%20gene%20annotation.csv?attach=1&knm=tfile.csv")
result = array()
for(i in 1:length(gene_list)){
if(gene_list[i] == toupper(gene_list[i])){ # human
idx = match(gene_list[i], human[, 1])
result[i] = human[idx, 'bioType']
}
else { # mouse
idx = match(gene_list[i], mouse[, 1])
result[i] = mouse[idx, 'bioType']
}
}
return(result)
}
⑼ Change the name of the Seurat object object gene: Need to create a new object (reference)
# RenameGenesSeurat ------------------------------------------------------------------------------------
RenameGenesSeurat <- function(obj = ls.Seurat[[i]], newnames = HGNC.updated[[i]]$Suggested.Symbol) { # Replace gene names in different slots of a Seurat object. Run this before integration. Run this before integration. It only changes obj@assays$RNA@counts, @data and @scale.data.
print("Run this before integration. It only changes obj@assays$RNA@counts, @data and @scale.data.")
RNA <- obj@assays$RNA
if (nrow(RNA) == length(newnames)) {
if (length(RNA@counts)) RNA@counts@Dimnames[[1]] <- newnames
if (length(RNA@data)) RNA@data@Dimnames[[1]] <- newnames
if (length(RNA@scale.data)) RNA@scale.data@Dimnames[[1]] <- newnames
} else {"Unequal gene sets: nrow(RNA) != nrow(newnames)"}
obj@assays$RNA <- RNA
return(obj)
}
# RenameGenesSeurat(obj = SeuratObj, newnames = HGNC.updated.genes)
⑽ Change cluster information
update_cluster_in_seurat_obj <- function(seurat_obj, barcode, cluster){
# dim(seurat_obj)[1] = length(barcode) = length(cluster)
mat <- matrix(0, nrow = length(barcode), ncol = 2)
mat[, 1] = barcode
mat[, 2] = cluster
mat = as.data.frame(mat)
rownames(mat) = barcode
seurat_obj@meta.data$orig.ident = mat[rownames(seurat_obj@meta.data), 2]
# you may need to modify the above code
seurat_obj@active.ident = as.factor(seurat_obj@meta.data$orig.ident)
return (seurat_obj)
}
⑾ ‘Idents’ allows you to write FindAllMarkers for the metadata you want (reference 1, reference 2)
Idents(pbmc) <- pbmc$celltype
markers = FindAllMarkers(object = pbmc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
⑿ Reorder the place within the plot of the ‘orig.ident’ of the Seurat object
# Reference : https://github.com/satijalab/seurat/issues/2471
object$orig.ident <- factor(x = object$orig.ident, levels = c("A", "B", "C"))
⒀ Save Seurat object pbmc3k as h5ad (reference)
library(Seurat)
library(SeuratData)
library(SeuratDisk)
SaveH5Seurat(pbmc3k, filename = "~/Downloads/pbmc3k.h5Seurat")
Convert("~/Downloads/pbmc3k.h5Seurat", dest = "h5ad")
① The generated .h5ad file can be open with the following code in Python
import scanpy as sc
pbmc = sc.read_h5ad("pbmc3k.h5ad")
⒁ When there are matrix.mtx, barcodes.tsv, features.tsv files and a spatial folder within the tissue_dir directory, provide the code for reading Visium data in R.
library(Seurat)
b_data <-ReadMtx('./matrix.mtx.gz',
'./barcodes.tsv.gz',
'./features.tsv.gz',
feature.column=1)
b_data = CreateSeuratObject(b_data, assay='Spatial')
b_image = Read10X_Image(paste('./spatial/',sep=''))
b_image_ = b_image[Cells(b_data)]
DefaultAssay(object = b_image_) <- 'Spatial'
b_data[["slice1"]]=b_image_
# you can change "slice1"
b_data <- SCTransform(b_data, assay = "Spatial", verbose = FALSE, variable.features.n = 1000)
# Some errors might occur when running SpatialFeaturePlot without SCTransform
SpatialFeaturePlot(b_data, rownames(b_data)[1])
# check whether it works properly
⒂ Provide the code for reading Visium data saved in .h5ad format in R.
library(Seurat)
library(anndata)
library(png)
library(jsonlite)
adata = read_h5ad("adata.h5ad")
hires = adata$uns$spatial$SAMPLE$images$hires
# you must change "SAMPLE"
png_file_path_hires = "~/Downloads/data/spatial/tissue_hires_image.png"
png(filename = png_file_path_hires, width = dim(hires)[1], height = dim(hires)[2])
par(mar = c(0, 0, 0, 0)) # Remove margins
plot.new()
rasterImage(hires, 0, 0, 1, 1)
dev.off()
lowres = adata$uns$spatial$SAMPLE$images$lowres
# you must change "SAMPLE"
png_file_path_lowres = "~/Downloads/data/spatial/tissue_lowres_image.png"
png(filename = png_file_path_lowres, width = dim(lowres)[1], height = dim(lowres)[2])
par(mar = c(0, 0, 0, 0)) # Remove margins
plot.new()
rasterImage(lowres, 0, 0, 1, 1)
dev.off()
scale_factors <- adata$uns$spatial$SAMPLE$scalefactors
# you must change "SAMPLE"
json_file_path <- "~/Downloads/data/spatial/scalefactors_json.json"
write_json(scale_factors, json_file_path)
df = adata$obs[,1:3]
df[,4] = adata$obsm$spatial[,2]
df[,5] = adata$obsm$spatial[,1]
write.csv(df, "~/Downloads/data/spatial/tissue_positions_list.csv")
b_data = CreateSeuratObject(t(as.matrix(adata$X)), assay='Spatial')
b_image = Read10X_Image(paste('~/Downloads/data/spatial/',sep=''))
b_image_ = b_image[Cells(b_data)]
DefaultAssay(object = b_image_) <- 'Spatial'
b_data[["slice1"]]=b_image_
# you can change "slice1"
b_data <- SCTransform(b_data, assay = "Spatial", verbose = FALSE, variable.features.n = 1000)
# Some errors might occur when running SpatialFeaturePlot without SCTransform
SpatialFeaturePlot(b_data, rownames(b_data)[1])
# An error occurs...
⒃ Code for acquiring highly variable genes
library(dplyr)
library(Seurat)
pbmc.data <- Read10X(data.dir = "./outs")
pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200)
pbmc[["percent.mt"]] <- PercentageFeatureSet(object = pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 15)
pbmc <- NormalizeData(object = pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(object = pbmc, selection.method = "vst", nfeatures = 2000)
gene_list = rownames(pbmc)[pbmc@assays$RNA@meta.features$vst.variable]
⒄ PCA
# reference : https://kkokkilkon.tistory.com/144
#install_github("devtools")
library(devtools)
#install_github("ggbiplot", "vqv")
library(ggbiplot)
PCA <- function(dt, dt_group, scale=T){
# rownames(dt) : our interest
# colnames(dt) : the dimensional space of each sample
pca_dt <- prcomp(dt,
center = T,
scale. = scale)
ggbiplot(pca_dt,
choices = c(1, 2),
obs.scale = 1,
var.scale = 1,
groups = dt_group,
circle = TRUE,
varname.size=0,
var.axes = F)
}
# Example
dt <- iris[, -5]
dt_group <- iris[, 5]
scale = TRUE
PCA(dt, dt_group, scale)
⒅ Kaplan-Meier Survival Analysis
# Install and Load Required Packages
install.packages("survival")
install.packages("survminer")
library(survival)
library(survminer)
# Create modified data frames
# Accurately reflect all deaths and sensored cases
surv_data <- data.frame(
time = c(6, 12, 21, 27, 32, 39, 43, 43, 43, 89, 89, 89, 89, 89, 89, 261, 263, 270, 270, 311),
status = c(1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1) # 사망은 1, censoring은 0
)
# Create a Surv object
surv_obj <- Surv(time = surv_data$time, event = surv_data$status)
# Estimate a Kaplan-Meier survival curve
fit <- survfit(surv_obj ~ 1, data = surv_data)
# Draw a survival curve
ggsurvplot(
fit,
data = surv_data,
xlab = "Time",
ylab = "Survival probability",
title = "Kaplan-Meier Survival Curve",
surv.median.line = "hv", # Add a median line for the survival curve
ggtheme = theme_minimal(), # Set a theme
risk.table = TRUE, # Add a risk table
palette = "Dark2" # Set a color palette
)
8. Plot Generation
⑴ Example of storing images in png
png(file = "my plot.png", width = 1500, height = 300)
DimPlot(pbmc, reduction = "umap", label = TRUE, repel = TRUE)
dev.off()
⑵ A function that draws the confidence interval between the scatter plot and the slope given x and y
scatter_plot <- function(x, y, xlab = "x", ylab = "y", point_size = 2, lab_size = 4, png=TRUE){
library(ggplot2)
# the lenth(x) must be same with the length(y)
mat <- matrix(0, nrow = length(x), ncol = 2)
mat[, 1] = x
mat[, 2] = y
colnames(mat) = c(xlab, ylab)
mat <- as.data.frame(mat)
if(png){
png("./scatter_plot.png",width=2000,height=2000,res=500)
ggplot(mat, aes(x=x, y=y)) + geom_point(shape=19, size=point_size, color="blue") + theme(plot.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(size =1)) + stat_smooth(method = lm, level=.95, color="grey") + labs(x=xlab, y=ylab, size=lab_size)
dev.off()
} else{
ggplot(mat, aes(x=x, y=y)) + geom_point(shape=19, size=point_size, color="blue") + theme(plot.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(size =1)) + stat_smooth(method = lm, level=.95, color="grey") + labs(x=xlab, y=ylab, size=lab_size)
}
}
⑶ A function that draws a Spatial FeaturePlot when x, y, and color are given
my.plot <- function(x, y, col){
# assume that length(x) = length(y) = length(col)
plot(x, y, t="n")
colfunc <- colorRampPalette(c("#000000", "#EB4600", "#FFF800"))
coll = array(dim = length(col))
for(i in 1 : length(col)){
coll[i] <- colfunc(100) [as.integer( col[i] / max(col) * 99 + 1)]
}
text(x, y, labels = "●", col = coll, cex = 1)
}
⑷ spatial feature plot
# tissue_dir : the directory that contains a filtered_feature_bc_matrix.h5
tissue_dir <- './outs/'
# Tgenes : genes of interest
Tgenes <- c('Slc2a1', 'Slc2a3')
conv_spatial_feature_plot <- function(tissue_dir, Tgenes, quality.control = FALSE){
library(Seurat)
library(SeuratData)
library(ggplot2)
library(cowplot)
library(dplyr)
# reference : https://nbisweden.github.io/workshop-scRNAseq/labs/compiled/seurat/seurat_07_spatial.html
br.sp = Load10X_Spatial(tissue_dir, slice= 'slice1')
br.sp <- SCTransform(br.sp, assay = "Spatial", verbose = FALSE, variable.features.n = 1000)
if(quality.control){
br.sp <- PercentageFeatureSet(br.sp, "^mt-", col.name = "percent_mito")
br.sp <- PercentageFeatureSet(br.sp, "^Hb.*-", col.name = "percent_hb")
br.sp <- br.sp[, br.sp$nFeature_Spatial > 500 & br.sp$percent_mito < 25 & br.sp$percent_hb < 20]
}
SpatialFeaturePlot(br.sp, features = Tgenes)
}
conv_spatial_feature_plot(tissue_dir, Tgenes)
⑸ enhanced volcano plot given gene.list, log FC value vector, and adjusted p value value vector
my.EnhancedVolcano <- function(gene.name, logFC, adj.P.Val,
pCutoff = 0.05, FCcutoff = 0.3,
xlim = c(-0.5, 0.5), ylim = c(-0.5, 0.5)){
# install.packages("BiocManager")
# BiocManager::install("EnhancedVolcano")
library(EnhancedVolcano)
tested <- matrix(0, nrow = length(gene.name), ncol = 2)
tested <- as.data.frame(tested)
for(i in 1:length(gene.name)){
tested[i, 1] <- logFC[i]
tested[i, 2] <- adj.P.Val[i]
}
rownames(tested) <- gene.name
colnames(tested) <- c('logFC', 'adj.P.Val')
EnhancedVolcano(tested, lab = rownames(tested),
x='logFC', y='adj.P.Val', xlim = xlim, ylim = ylim,
pCutoff = pCutoff, FCcutoff = FCcutoff)
}
⑹ How to find the gene ontology (GO) from the gene list
GO <- function(gene){
library(EnhancedVolcano)
library(clusterProfiler)
library(org.Hs.eg.db)
library(org.Mm.eg.db)
library(enrichplot)
# ont = "ALL", "BP", "CC", "MF"
# showCategory is not mandatory
gene <- gsub('.*-', '', gene) # -와 그 앞에 있는 것들을 제거 (예 : "GRCh38-")
gene <- gsub('\\ .*$', '', gene) # 'KLK2 ENSG00000167751' 같은 것을 해결
if (gene[1] == toupper(gene[1])){ ## Human gene
gene.df <- bitr(gene, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
gene.df <- as.vector(gene.df[[2]])
GO <- enrichGO(gene.df, OrgDb = 'org.Hs.eg.db',keyType = "ENTREZID", ont = "ALL", pvalueCutoff = 0.05, pAdjustMethod = "BH")
return(GO)
} else{ ## Mouse gene?
gene.df <- bitr(gene, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Mm.eg.db)
gene.df <- as.vector(gene.df[[2]])
GO <- enrichGO(gene.df, OrgDb = 'org.Mm.eg.db',keyType = "ENTREZID", ont = "ALL", pvalueCutoff = 0.05, pAdjustMethod = "BH")
return(GO)
}
}
⑺ How to draw a gene onology (GO) plot from the gene list (▶ How to Interpret GO Plot)
GO.plot <- function(gene){
library(EnhancedVolcano)
library(clusterProfiler)
library(org.Hs.eg.db)
library(org.Mm.eg.db)
library(enrichplot)
# ont = "ALL", "BP", "CC", "MF"
# showCategory is not mandatory
gene <- gsub('.*-', '', gene) # -와 그 앞에 있는 것들을 제거 (예 : "GRCh38-")
gene <- gsub('\\ .*$', '', gene) # 'KLK2 ENSG00000167751' 같은 것을 해결
if (gene[1] == toupper(gene[1])){ ## Human gene
gene.df <- bitr(gene, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
gene.df <- as.vector(gene.df[[2]])
GO <- enrichGO(gene.df, OrgDb = 'org.Hs.eg.db',keyType = "ENTREZID", ont = "ALL", pvalueCutoff = 0.05, pAdjustMethod = "BH")
dotplot(GO,split="ONTOLOGY", showCategory = 5)+facet_grid(ONTOLOGY~., scale="free")
} else{ ## Mouse gene?
gene.df <- bitr(gene, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Mm.eg.db)
gene.df <- as.vector(gene.df[[2]])
GO <- enrichGO(gene.df, OrgDb = 'org.Mm.eg.db',keyType = "ENTREZID", ont = "ALL", pvalueCutoff = 0.05, pAdjustMethod = "BH")
dotplot(GO,split="ONTOLOGY", showCategory = 5)+facet_grid(ONTOLOGY~., scale="free")
}
}
### Example
GO.plot(c("COL1A1", "COL1A2", "COL3A1", "COL6A3"))
⑻ How to draw a cnetplot from the gene list
my.cnetplot <- function(gene.list){
GO <- function(gene){
library(EnhancedVolcano)
library(clusterProfiler)
library(org.Hs.eg.db)
library(org.Mm.eg.db)
library(enrichplot)
# ont = "ALL", "BP", "CC", "MF"
# showCategory is not mandatory
gene <- gsub('.*-', '', gene) # -와 그 앞에 있는 것들을 제거 (예 : "GRCh38-")
gene <- gsub('\\ .*$', '', gene) # 'KLK2 ENSG00000167751' 같은 것을 해결
if (gene[1] == toupper(gene[1])){ ## Human gene
gene.df <- bitr(gene, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
gene.df <- as.vector(gene.df[[2]])
GO <- enrichGO(gene.df, OrgDb = 'org.Hs.eg.db',keyType = "ENTREZID", ont = "ALL", pvalueCutoff = 0.05, pAdjustMethod = "BH")
return(GO)
} else{ ## Mouse gene?
gene.df <- bitr(gene, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Mm.eg.db)
gene.df <- as.vector(gene.df[[2]])
GO <- enrichGO(gene.df, OrgDb = 'org.Mm.eg.db',keyType = "ENTREZID", ont = "ALL", pvalueCutoff = 0.05, pAdjustMethod = "BH")
return(GO)
}
}
go = GO(gene.list)
ego_ <- 0
if (gene.list[1] == toupper(gene.list[1])){ ## Human gene
ego_ <- setReadable(go, org.Hs.eg.db, keyType='ENTREZID')
} else { ## Mouse gene?
ego_ <- setReadable(go, org.Mm.eg.db, keyType='ENTREZID')
}
cnetplot(ego_, categorySize="pvalue", foldChange= gene.list )
}
my.cnetplot(c("COL1A1", "COL1A2", "COL3A1", "COL6A3"))
⑼ Violin plot with statistical analysis
VlnPlot(object = br.sp, features = c('Col1a1'),
group.by = 'orig.ident', pt.size = 0.1) +
facet_grid(.~tnbc.merge@active.ident)+
fill_palette(palette='npg')+
stat_compare_means(method = "anova", label='p')+
theme(axis.text.x = element_text(angle = 90, hjust = 1),
strip.text.x = element_text(size = rel(0.7)))
Input: 2022.05.03 00:13