Korean, Edit

A collection of main functions useful in R

Higher category : 【RStudio】 R Studio Index, 【Bioinformatics】 Bioinformatics Analysis Index


1. Overview

2. Basics

3. File I/O

4. Vectors and Matrices

5. gsub

6. Statistical Analysis

7. Bioinformatics

8. Plot Generation


a. GitHub (executable .R file)

b. Useful Functions in Python



1. Overview

⑴ The functions defined below can be called as follows.


source("https://github.com/JB243/nate9389/blob/main/RStudio/A_Collection_of_Useful_Functions_in_RStudio.R?raw=true")


① However, the following problems may occur due to the above code.

Problem 1. Error in sum (dim(scRNaseqData):argument “b” is missing, with no default

Problem 2. “do.call” r argument “b” is missing, with no default

⑵ The functions defined below can be used as follows.


SUM <- function(a, b){
    return (a + b)
}

A <- 1
B <- 2
print(SUM(A, B))
# 3



2. Basics

⑴ Check data type



# x is a variable
typeof(x)


⑵ A function that fills in the number of zeros before a natural number to give a total of five digits.


n = 55
sprintf("%05d", n)


⑶ A function waiting for enter to hit. (reference)


readline(prompt="Press [enter] to continue")


⑷ Multiple processing


# Reference : https://cran.r-project.org/web/packages/future/vignettes/future-3-topologies.html

plan("multisession", workers = 10)


⑸ %in% operator


x <- c('A','B','C','D','E','F','G')
x %in% c('F','G')
# FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE



3. File I/O

⑴ Check whether destination folder exists


if (!file.exists(destination.folder)) {
    stop(.wrap("The destination folder could not be found. Please change",
               "the path specified in", sQuote(destination.folder)))
}


⑵ Check for write permissions in the destination folder


if (file.access(destination.folder, 2) == -1) {
    stop(.wrap("You do not have write permission in the destination",
               "folder."))


⑶ Create a folder of ‘Filtered_bams’ under ‘destination.folder’


destination.folder <- file.path(destination.folder, "Filtered_bams")
tryCatch({
    if (!file.exists(file.path(destination.folder))) {
        dir.create(file.path(destination.folder),
                   recursive = TRUE)
    } else {
        stop(.wrap("The folder",
                   sQuote(file.path(destination.folder, "Filtered_bams")),
                   "already exists. Please remove it, or (in case you",
                   "still need it), rename it to prevent files from being",
                   "overwritten."))
    }
}, warning = function(e) {
    stop(.wrap("You do not have write permissions in the destination",
               "folder. Stopping execution of the remaining part of the",
               "script..."))
})


⑷ Code to read a .gz file and decompress it


tmp = readMM('matrix.mtx.gz')
writeMM(tmp, 'matrix.mtx')



4. Vectors and Matrices

⑴ Sort the given vectors in the order of a, b, and c.


my_vec <- c('c', 'b', 'a')

print( my_vec[order(my_vec)] )
# "a" "b" "c"


⑵ Whether a given string (given_str) contains a particular string (partial_str) : grepl (reference)


grepl(partial_str, given_str, fixed = TRUE)


⑶ An index output containing a particular string “str” from a given string vector.


has_string_arr <- function(str, arr){
    flag <- 0
    ar <- array()
    for(i in 1:length(arr)){
        if( grepl(str, arr[i], fixed=TRUE) ){
            flag <- flag + 1
            ar[flag] <- i
        }
    }
    return(ar)
}


⑷ Intersection of three vectors


tri_intersect <- function(A, B, C){
    a <- intersect(A, B)
    b <- intersect(a, C)
    return(b)
}


⑸ A function that converts the NA portion of a given vector into zero


trim_na <- function(vector){
  for(i in 1: length(vector)){
    if(is.na(vector[i])){
      vector[i] = 0
    }
  }
  return (vector)
}


⑹ A function that excludes the NA portion of a given vector


ignore_na <- function(vector){
  flag = 0
  ar = array()
  for(i in 1: length(vector)){
    if(! is.na(vector[i])){
      flag = flag + 1
      ar[flag] = vector[i]
    }
  }
  return (ar)
}


⑺ A function that replaces a particular element with another element in a given vector


replace_in_vector <- function(v, from_element, to_element){
  ar <- array(dim = length(v))

  for(i in 1 : length(v)){
    if(v[i] == from_element){
      ar[i] = to_element
    } else{
      ar[i] = v[i]
    }
  }

  return(ar)
}


⑻ A function that returns the left corner of a matrix or data frame


corner <- function(x, num = 10){
  return(x[1:min(  num, dim(x)[1]  ), 
           1:min(  num, dim(x)[2]  )])
}


⑼ A function that outputs cbind when given any two matrices


my.cbind <- function(data1, data2){
  a <- intersect(rownames(data1), rownames(data2))
  return (cbind(data1[a, ], data2[a, ]))
}


⑽ A function that outputs rbind when given any two matrices


my.rbind <- function(data1, data2){
  a <- intersect(colnames(data1), colnames(data2))
  return (rbind(data1[, a], data2[, a]))
}


⑾ A function that outputs lines A and B in a given matrix or data frame


switch_A_B_row <- function(mat0, A, B){
  mat <- mat0
  mat[A, ] = mat0[B, ]
  mat[B, ] = mat0[A, ] 
  rownames(mat)[A] = rownames(mat0)[B]
  rownames(mat)[B] = rownames(mat0)[A]
  return(mat)
}


⑿ A function that switches columns A and B in a given matrix or data frame


switch_A_B_col <- function(mat0, A, B){
  mat <- mat0
  mat[, A] = mat0[, B]
  mat[, B] = mat0[, A] 
  colnames(mat)[A] = colnames(mat0)[B]
  colnames(mat)[B] = colnames(mat0)[A]
  return(mat)
}



5. gsub

⑴ A function that removes the hyphen (-) from a given string


eliminate_hyphene <- function(str){
    return(gsub("-", "", str))
}


⑵ A function of removing special symbols such as “(“) from a given string (reference)


eliminate_symbol <- function(str){
    return(gsub("(", "", str, fixed = TRUE))
}


⑶ A function that removes all characters after x and x from a given string (reference)


eliminate_backward <- function(str){
    return(gsub('x.*$', '', str))
}


⑷ A function that removes all numbers following . and . from a given string, for “ABCD.123”


eliminate_backward <- function(str){
    return(gsub('*.[0-9]*$', '', str))
}


⑸ A function that removes all characters before x and x from a given string


eliminate_forward <- function(str){
    return(gsub('.*x', '', str))
}


⑹ A function that fills the characters from start to end with “x” in a given string-type variable (‘given_str’) and returns them


str_substitute <- function(given_str, start, end){
    library(stringr)
    library(stringi)
  
    result = paste0(
        str_sub(given_str, 1, start-1),    
        stri_dup("x", (end-start+1) ),
        str_sub(given_str, end+1, str_length(given_str))
    )

    return(result)
}


⑺ Code for collecting numbers following a particular pattern within a given string (‘given_str’)


number_after_pattern_in_str <- function(given_str, pattern){
    # reference : https://cran.r-project.org/web/packages/stringr/vignettes/stringr.html

    library(stringr)

    phone1 <- paste0(pattern, '([1-9]{1})')
    phone2 <- paste0(pattern, '([1-9][0-9]{1})')
    phone3 <- paste0(pattern, '([1-9][0-9]{2})')
    phone4 <- paste0(pattern, '([1-9][0-9]{3})')

    arr <- array()
    flag = 0

    for(i in 1 : dim(str_locate_all(given_str, phone4)[[1]])[1] ){
        if (dim(str_locate_all(given_str, phone4)[[1]])[1] > 0){
            flag <- flag + 1
            arr[flag] = str_sub(
                given_str, 
                str_locate_all(given_str, phone4)[[1]][1, 1], #start
                str_locate_all(given_str, phone4)[[1]][1, 2]  #end
            )
            given_str = str_substitute(
                given_str, 
                str_locate_all(given_str, phone4)[[1]][1, 1], #start
                str_locate_all(given_str, phone4)[[1]][1, 2]  #end
            )
        }
    }

    for(i in 1 : dim(str_locate_all(given_str, phone3)[[1]])[1] ){
        if (dim(str_locate_all(given_str, phone3)[[1]])[1] > 0){
            flag <- flag + 1
            arr[flag] = str_sub(
                given_str, 
                str_locate_all(given_str, phone3)[[1]][1, 1], #start
                str_locate_all(given_str, phone3)[[1]][1, 2]  #end
            )
            given_str = str_substitute(
                given_str, 
                str_locate_all(given_str, phone3)[[1]][1, 1], #start
                str_locate_all(given_str, phone3)[[1]][1, 2]  #end
            )
        }
    }

    for(i in 1 : dim(str_locate_all(given_str, phone2)[[1]])[1] ){
        if (dim(str_locate_all(given_str, phone2)[[1]])[1] > 0){
            flag <- flag + 1
            arr[flag] = str_sub(
                given_str, 
                str_locate_all(given_str, phone2)[[1]][1, 1], #start
                str_locate_all(given_str, phone2)[[1]][1, 2]  #end
            )
            given_str = str_substitute(
                given_str, 
                str_locate_all(given_str, phone2)[[1]][1, 1], #start
                str_locate_all(given_str, phone2)[[1]][1, 2]  #end
            )
        }
    }

    for(i in 1 : dim(str_locate_all(given_str, phone1)[[1]])[1] ){
        if (dim(str_locate_all(given_str, phone1)[[1]])[1] > 0){
            flag <- flag + 1
            arr[flag] = str_sub(
                given_str, 
                str_locate_all(given_str, phone1)[[1]][1, 1], #start
                str_locate_all(given_str, phone1)[[1]][1, 2]  #end
            )
            given_str = str_substitute(
                given_str, 
                str_locate_all(given_str, phone1)[[1]][1, 1], #start
                str_locate_all(given_str, phone1)[[1]][1, 2]  #end
            )
        }
    }	

    arr <- str_sub(arr, str_length(pattern)+1, str_length(arr))
    arr <- as.numeric(arr)
    return(arr)
}



6. Statistical Analysis

⑴ Code for percentile quantiles from 10% to 90%


# reference: https://www.statology.org/percentiles-in-r/

quantile(data, probs = seq(.1, .9, by = .1))


⑵ A value obtained by taking log10 for n!


log10_factorial <- function(n){
  if(n == 0){
    return(0)
  }

  out <- 0
  for(i in 1 : n){
    out <- out + log(i) / log(10)
  }
  return(out)
}


⑶ A function to obtain the binomial coefficient nCk, which is the number of cases where k out of n is selected : Improves code because simply using factorial can create inf


my.combination <- function(n, k){
  # return nCk = n! / ((n-k)! k!)
  
  if (n == k || n == 0 || k == 0){
    return(1)
  }

  A = log10_factorial(n)
  B = log10_factorial(n-k)
  C = log10_factorial(k)
  
  log10_nCk = A - B - C
  return(10^(log10_nCk))
}


⑷ t test between two groups


t.test(v1, v2, paired = FALSE)
# maybe you can activate 'paired' in a special condition


⑸ ANOVA test between two groups


one_way_2_factor_anova <- function(v1, v2){
  dat <- matrix(0, nrow = ( length(v1) + length(v2) ), ncol = 2 )
  for(i in 1 : length(v1) ){
    dat[i, 1] <- v1[i]
    dat[i, 2] <- 'v1'
  }
  for(i in 1 : length(v2) ){
    dat[i + length(v1), 1] <- v2[i]
    dat[i + length(v1), 2] <- 'v2'
  }
  dat <- as.data.frame(dat)
  
  colnames(dat) <- c('val', 'factor')

  anova_IS <- aov(val ~ factor, data = dat)
  print(summary(anova_IS))

  anova_residuals <- anova_IS$residuals
  print(summary(anova_residuals))
}


⑹ ANOVA test among three groups


one_way_3_factor_anova <- function(v1, v2, v3){
  dat <- matrix(0, nrow = ( length(v1) + length(v2) + length(v3) ), ncol = 2 )
  for(i in 1 : length(v1) ){
    dat[i, 1] <- v1[i]
    dat[i, 2] <- 'v1'
  }
  for(i in 1 : length(v2) ){
    dat[i + length(v1), 1] <- v2[i]
    dat[i + length(v1), 2] <- 'v2'
  }
  for(i in 1 : length(v3) ){
    dat[i + length(v1) + length(v2), 1] <- v3[i]
    dat[i + length(v1) + length(v2), 2] <- 'v3'
  }
  dat <- as.data.frame(dat)
  
  colnames(dat) <- c('val', 'factor')

  anova_IS <- aov(val ~ factor, data = dat)
  print(summary(anova_IS))

  anova_residuals <- anova_IS$residuals
  print(summary(anova_residuals))
}


⑺ ANOVA test among four groups


one_way_4_factor_anova <- function(v1, v2, v3, v4){
  dat <- matrix(0, nrow = ( length(v1) + length(v2) + length(v3) + length(v4) ), ncol = 2 )
  for(i in 1 : length(v1) ){
    dat[i, 1] <- v1[i]
    dat[i, 2] <- 'v1'
  }
  for(i in 1 : length(v2) ){
    dat[i + length(v1), 1] <- v2[i]
    dat[i + length(v1), 2] <- 'v2'
  }
  for(i in 1 : length(v3) ){
    dat[i + length(v1) + length(v2), 1] <- v3[i]
    dat[i + length(v1) + length(v2), 2] <- 'v3'
  }
  for(i in 1 : length(v4) ){
    dat[i + length(v1) + length(v2) + length(v3), 1] <- v4[i]
    dat[i + length(v1) + length(v2) + length(v3), 2] <- 'v4'
  }
  dat <- as.data.frame(dat)
  
  colnames(dat) <- c('val', 'factor')

  anova_IS <- aov(val ~ factor, data = dat)
  print(summary(anova_IS))

  anova_residuals <- anova_IS$residuals
  print(summary(anova_residuals))
}


⑻ ANOVA test among five groups


one_way_5_factor_anova <- function(v1, v2, v3, v4, v5){
  dat <- matrix(0, nrow = ( length(v1) + length(v2) + length(v3) + length(v4) + length(v5) ), ncol = 2 )
  for(i in 1 : length(v1) ){
    dat[i, 1] <- v1[i]
    dat[i, 2] <- 'v1'
  }
  for(i in 1 : length(v2) ){
    dat[i + length(v1), 1] <- v2[i]
    dat[i + length(v1), 2] <- 'v2'
  }
  for(i in 1 : length(v3) ){
    dat[i + length(v1) + length(v2), 1] <- v3[i]
    dat[i + length(v1) + length(v2), 2] <- 'v3'
  }
  for(i in 1 : length(v4) ){
    dat[i + length(v1) + length(v2) + length(v3), 1] <- v4[i]
    dat[i + length(v1) + length(v2) + length(v3), 2] <- 'v4'
  }
  for(i in 1 : length(v5) ){
    dat[i + length(v1) + length(v2) + length(v3) + length(v4), 1] <- v5[i]
    dat[i + length(v1) + length(v2) + length(v3) + length(v4), 2] <- 'v5'
  }
  dat <- as.data.frame(dat)
  
  colnames(dat) <- c('val', 'factor')

  anova_IS <- aov(val ~ factor, data = dat)
  print(summary(anova_IS))

  anova_residuals <- anova_IS$residuals
  print(summary(anova_residuals))
}


⑼ ANOVA test among six groups


one_way_6_factor_anova <- function(v1, v2, v3, v4, v5, v6){
  dat <- matrix(0, nrow = ( length(v1) + length(v2) + length(v3) + length(v4) + length(v5) + length(v6) ), ncol = 2 )
  for(i in 1 : length(v1) ){
    dat[i, 1] <- v1[i]
    dat[i, 2] <- 'v1'
  }
  for(i in 1 : length(v2) ){
    dat[i + length(v1), 1] <- v2[i]
    dat[i + length(v1), 2] <- 'v2'
  }
  for(i in 1 : length(v3) ){
    dat[i + length(v1) + length(v2), 1] <- v3[i]
    dat[i + length(v1) + length(v2), 2] <- 'v3'
  }
  for(i in 1 : length(v4) ){
    dat[i + length(v1) + length(v2) + length(v3), 1] <- v4[i]
    dat[i + length(v1) + length(v2) + length(v3), 2] <- 'v4'
  }
  for(i in 1 : length(v5) ){
    dat[i + length(v1) + length(v2) + length(v3) + length(v4), 1] <- v5[i]
    dat[i + length(v1) + length(v2) + length(v3) + length(v4), 2] <- 'v5'
  }
  for(i in 1 : length(v6) ){
    dat[i + length(v1) + length(v2) + length(v3) + length(v4) + length(v5), 1] <- v6[i]
    dat[i + length(v1) + length(v2) + length(v3) + length(v4) + length(v5), 2] <- 'v6'
  }
  dat <- as.data.frame(dat)
  
  colnames(dat) <- c('val', 'factor')

  anova_IS <- aov(val ~ factor, data = dat)
  print(summary(anova_IS))

  anova_residuals <- anova_IS$residuals
  print(summary(anova_residuals))
}


⑽ Investigate FC, p value of two vectors


comparison_of_two_vectors <- function(v1, v2, paired = FALSE){
  p.val = t.test(v1, v2, paired = paired)
  print(p.val)
  
  log2FC = log( mean(v1 + 0.000000000001)/mean(v2 + 0.000000000001) ) / log(2)
  print(log2FC)
}


⑾ How to test the statistical equivalence of two sets using Fisher’s effect test (ver. 1)


total.gene <- 32285
ST <- 31
scRNAseq <- 14
cross <- 5

a <- cross
b <- scRNAseq - a
c <- ST - a
d <- total.gene - a - b - c
A <- a + b
B <- c + d
C <- a + c
D <- b + d

group<-c("A","A","B","B")
cancer<-c("1.Yes","2.No","1.Yes","2.No")
count<-c(a,b,c,d)
dat<-data.frame(group,cancer,count)
tab<-xtabs(count~group+cancer,data=dat)
tab

chisq.test(tab)$observed
chisq.test(tab)$expected
fisher.test(tab)
-log(fisher.test(tab)$p.value, 10)

if(cross > ST * scRNAseq / total.gene){
  print("Enrichment")
} else if(cross < ST * scRNAseq / total.gene){
  print("Depletion")
}


⑿ How to test the statistical equivalence of two sets using Fisher’s effect test (ver. 2)


my.Fisher.exact.test <- function(total, A, B, cross){
  a1 <- log10_factorial(A)
  a2 <- log10_factorial(total - A)
  a3 <- log10_factorial(B)
  a4 <- log10_factorial(total - B)

  b1 <- log10_factorial(cross)
  b2 <- log10_factorial(A - cross)
  b3 <- log10_factorial(B - cross)
  b4 <- log10_factorial(total - cross - (A - cross) - (B - cross))
  b5 <- log10_factorial(total)

  out = a1 + a2 + a3 + a4 - b1 - b2 - b3 - b4 - b5
  return(10^out)
}


⒀ MIA assay (enrichment)


my.MIA.assay.enrichment <- function(total, A, B, cross){
  out <- 0
  for(i in cross:min(A, B)){
    out = out + my.Fisher.exact.test(total, A, B, i)
  }
  return(out)
}


⒁ MIA assay (depletion)


my.MIA.assay.depletion <- function(total, A, B, cross){
  out <- 0
  for(i in 0:cross-1){
    out = out + my.Fisher.exact.test(total, A, B, i)
  }
  return(out)
}



7. Bioinformatics

⑴ Explore genes that begin with a specific keyword


gene_starting_with <- function(keyword){
  human = read.csv("https://blog.kakaocdn.net/dn/29YTj/btrS5iG9QOH/Di6RQKxHOPDii7EjkdHN30/human_genes_36601.tsv?attach=1&knm=tfile.tsv", sep = '\t', header = F)
  mouse = read.csv("https://blog.kakaocdn.net/dn/wkjwJ/btrS1QSgrpD/VS8ELANCQyeZAA3vL8JQP0/mouse_genes_32285.tsv?attach=1&knm=tfile.tsv", sep = '\t', header = F)
  ar = array()
  flag = 0

  if(keyword == toupper(keyword)){ # human genes
    for(i in 1:dim(human)[1]){
      if(grepl(keyword, human[i, 2], fixed = TRUE)){
        flag = flag + 1
        ar[flag] = human[i, 2]
      }
    }   
  }   
  else{ # mouse genes
    for(i in 1:dim(mouse)[1]){
      if(grepl(keyword, mouse[i, 2], fixed = TRUE)){
        flag = flag + 1
        ar[flag] = mouse[i, 2]
      }
    }   
  }
  
  return(ar)
}


⑵ Conversion between enembl_gene_id and gene_symbol

① convert from ensembl.gene to gene.symbol


library(EnsDb.Hsapiens.v79)
ensembl.genes <- c("ENSG00000150676", "ENSG00000099308", "ENSG00000142676", "ENSG00000180776", "ENSG00000108848", "ENSG00000277370", "ENSG00000103811", "ENSG00000101473")
geneIDs1 <- ensembldb::select(EnsDb.Hsapiens.v79, keys= ensembl.genes, keytype = "GENEID", columns = c("SYMBOL","GENEID"))


② Convert from enembl.gene to gene.symbol (using the table)

human_genes_36601.tsv

mouse_genes_32285.tsv


ensembl_to_gene <- function(ensembl_list){
  ar = array(dim = length(ensembl_list))

  human = read.csv("https://blog.kakaocdn.net/dn/29YTj/btrS5iG9QOH/Di6RQKxHOPDii7EjkdHN30/human_genes_36601.tsv?attach=1&knm=tfile.tsv", sep = '\t', header = F)
  mouse = read.csv("https://blog.kakaocdn.net/dn/wkjwJ/btrS1QSgrpD/VS8ELANCQyeZAA3vL8JQP0/mouse_genes_32285.tsv?attach=1&knm=tfile.tsv", sep = '\t', header = F)

  for(i in 1:length(ensembl_list)){
    if(grepl('ENSG', ensembl_list[i], fixed = TRUE)){ # human gene
      index = match(ensembl_list[i], human[, 1])
      ar[i] = human[index, 2]
    } 
    else if(grepl('ENSMUSG', ensembl_list[i], fixed = TRUE)){ # mouse gene
      index = match(ensembl_list[i], mouse[, 1])
      ar[i] = mouse[index, 2]
    } 
  }
  return(ar)
}


③ Convert from gene.symbol to ensembl.gene


library(EnsDb.Hsapiens.v79)
geneSymbols <-  c('DDX26B','CCDC83',  'MAST3', 'RPL11', 'ZDHHC20',  'LUC7L3',  'SNORD49A',  'CTSH', 'ACOT8')
geneIDs2 <- ensembldb::select(EnsDb.Hsapiens.v79, keys= geneSymbols, keytype = "SYMBOL", columns = c("SYMBOL","GENEID"))


④ Convert from gen.symbol to enembl.gen (using the table)


gene_to_ensembl <- function(gene_list){
  ar = array(dim = length(gene_list))

  human = read.csv("https://blog.kakaocdn.net/dn/29YTj/btrS5iG9QOH/Di6RQKxHOPDii7EjkdHN30/human_genes_36601.tsv?attach=1&knm=tfile.tsv", sep = '\t', header = F)
  mouse = read.csv("https://blog.kakaocdn.net/dn/wkjwJ/btrS1QSgrpD/VS8ELANCQyeZAA3vL8JQP0/mouse_genes_32285.tsv?attach=1&knm=tfile.tsv", sep = '\t', header = F)

  for(i in 1:length(gene_list)){
    if(gene_list[i] == toupper(gene_list[i])){ # human gene
      index = match(gene_list[i], human[, 2])
      ar[i] = human[index, 1]
    } 
    else{ # mouse gene
      index = match(gene_list[i], mouse[, 2])
      ar[i] = mouse[index, 1]
    } 
  }

  # return(ignore_na(ar)) 
  ## if possible, ignore_na should be used
  
  return(ar)
}


⑤ human ensembl transcript to gene name (ref.)


library(biomaRt)
mart <- useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")  
ensembl_transcript_to_gene <- function(transcript_ids){
  # reference : https://support.bioconductor.org/p/106253/#106256

  res <- getBM(attributes = c('ensembl_transcript_id_version', 
                              'ensembl_gene_id', 
                              'external_transcript_name',
                              'external_gene_name'),
               filters = 'ensembl_transcript_id_version', 
               values = transcript_ids,
               mart = mart)

  if(dim(res)[1] == 0){
    return("")
  }	

  return(res[, 'external_gene_name'])
}


⑥ mouse gene to MGI symbol (using the table)


mouse_gene_to_MGI <- function(mouse_gene_list){
  ar = array(dim = length(mouse_gene_list))

	dat <- read.csv("https://blog.kakaocdn.net/dn/cVeqsA/btrS1JMnxyX/HtVhPmqtxdgt7LQlGkeql0/HOM_MouseHumanSequence.csv?attach=1&knm=tfile.csv")

  for(i in 1:length(mouse_gene_list)){
	  index = match(mouse_gene_list[i], dat[,'Symbol'])
    ar[i] = dat[index, 'Mouse.MGI.ID']
  }

  return(ar)
}


⑦ MGI symbol to mouse gene (using the table)


MGI_to_mouse_gene <- function(MGI_list){
  ar = array(dim = length(MGI_list))

	dat <- read.csv("https://blog.kakaocdn.net/dn/cVeqsA/btrS1JMnxyX/HtVhPmqtxdgt7LQlGkeql0/HOM_MouseHumanSequence.csv?attach=1&knm=tfile.csv")

  for(i in 1:length(MGI_list)){
	  index = match(MGI_list[i], dat[,'Mouse.MGI.ID'])
    ar[i] = dat[index, 'Symbol']
  }

  return(ar)
}


⑧ human gene to HGNC symbol (using the table)


human_gene_to_HGNC <- function(human_gene_list){
  ar = array(dim = length(human_gene_list))

	dat <- read.csv("https://blog.kakaocdn.net/dn/cVeqsA/btrS1JMnxyX/HtVhPmqtxdgt7LQlGkeql0/HOM_MouseHumanSequence.csv?attach=1&knm=tfile.csv")

  for(i in 1:length(human_gene_list)){
	  index = match(human_gene_list[i], dat[,'Symbol'])
    ar[i] = dat[index, 'HGNC.ID']
  }

  return(ar)
}


⑨ HGNC symbol to human gene (using the table)


HGNC_to_human_gene <- function(HGNC_list){
  ar = array(dim = length(HGNC_list))

	dat <- read.csv("https://blog.kakaocdn.net/dn/cVeqsA/btrS1JMnxyX/HtVhPmqtxdgt7LQlGkeql0/HOM_MouseHumanSequence.csv?attach=1&knm=tfile.csv")

  for(i in 1:length(HGNC_list)){
	  index = match(HGNC_list[i], dat[,'HGNC.ID'])
    ar[i] = dat[index, 'Symbol']
  }

  return(ar)
}


Transformation table for animals other than humans and mice

⑶ Affymetrix probe ID, enembl_gen_id, gain_name

① convert Affymetrix probe ID to ensembl_gene_id, gene_name


#Convert Affymetrix probe ID to ensembl_gene_id, gene_name
## https://www.biostars.org/p/328065/#328328
## https://www.biostars.org/p/332461/#332474
BiocManager::install("biomaRt", force=TRUE)
library(biomaRt)

dat<-c('1007_s_at', '1053_at', '117_at', '121_at', '1255_g_at', '1294_at')
require("biomaRt")
mart <- useMart("ENSEMBL_MART_ENSEMBL")
mart <- useDataset("hsapiens_gene_ensembl", mart)
annotLookup <- getBM(
  mart = mart,
  attributes = c(
    "affy_hg_u133_plus_2",
    "ensembl_gene_id",
    "gene_biotype",
    "external_gene_name"),
  filter = "affy_hg_u133_plus_2",
  values = dat,
  uniqueRows=TRUE)


⑷ Gene homology between human and mouse

① Overview

○ Human genes are 36601

○ There are 32285 genes in the mouse.

○ Human and mouse genes are almost the same, but there are many differences.

○ Therefore, it is necessary to know how to convert human genes into mouse genes or vice versa.

○ In many cases, the mouse gene is not a human gene that simply capitalizes the mouse gene.

Method 1. BiomaRt


install.packages("BiocManager")
BiocManager::install("biomaRt")
library(biomaRt)

genes <- c("Xkr4", "Gm1992", "Gm37381")

human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl")
hh <- getLDS(attributes = c("mgi_symbol", "ensembl_gene_id", "entrezgene_id"), filters = "mgi_symbol", values = genes, mart = mouse, attributesL =
c("hgnc_symbol", "ensembl_gene_id", "entrezgene_id"), martL = human, uniqueRows = T)


attribute 1. Determine the correspondence between MGI symbol and HGNC symbol.

attribute 2. Determine the corresponding relationship between ENSMUSG code and ENSG code.

attribute 3. Determine the correspondence between NCBI gain ID and NCBI gain ID.

○ For each attribute, information about the mouse on the left and information about the person on the right.

○ currently inoperative: Currently, the following error message has been found and is not working.


#1. Error in getLDS(attributes = c("mgi_symbol", "ensembl_gene_id", "entrezgene_id"), : Query ERROR: caught BioMart::Exception::Database: Could not connect to mysql database ensembl_mart_106: DBI connect ('database =ensembl_mart_106;host=127.0.0.1;port=5316','ensro',...) failed: Can't connect to MySQL server on '127.0.0.1' (111) at /nfs/public/ro/ensweb/live/mart/www_106/biomart-perl/lib/BioMart/Configuration/DBLocation.pm line 98.
#2. Error in textConnection(attrfilt) : invalid 'text' argument
#3. Ensembl site unresponsive, trying useast mirror. Ensembl site unresponsive, trying asia mirror
#4. Error: biomaRt has encountered an unexpected server error. Consider trying one of the Ensembl mirrors (for more details look at ?useEnsembl)


Method 2. Use of the human-mouse conversion table provided on the MGI website

HOM_MouseHumanSequence.csv


human_to_mouse <- function(human_gene){
  hom <- read.csv("https://blog.kakaocdn.net/dn/cVeqsA/btrS1JMnxyX/HtVhPmqtxdgt7LQlGkeql0/HOM_MouseHumanSequence.csv?attach=1&knm=tfile.csv")

  mouse_gene = array()
  flag = 0

  for(i in 1 : length(human_gene)){
    index = match(human_gene[i], hom[hom$Common.Organism.Name == 'human', 'Symbol'])
    key = hom[hom$Common.Organism.Name == 'human', 'DB.Class.Key'][index]
    flag = flag + 1
    mouse_gene[flag] = hom[hom$DB.Class.Key == key 
                           & hom$Common.Organism.Name == 'mouse, laboratory'
                           , 'Symbol'][1] # duplicate mouse genes can be found
  }

  return(mouse_gene)
}

mouse_to_human <- function(mouse_gene){
  hom <- read.csv("https://blog.kakaocdn.net/dn/cVeqsA/btrS1JMnxyX/HtVhPmqtxdgt7LQlGkeql0/HOM_MouseHumanSequence.csv?attach=1&knm=tfile.csv")

  human_gene = array()
  flag = 0

  for(i in 1 : length(mouse_gene)){
    index = match(mouse_gene[i], hom[hom$Common.Organism.Name == 'mouse, laboratory', 'Symbol'])
    key = hom[hom$Common.Organism.Name == 'mouse, laboratory', 'DB.Class.Key'][index]
    flag = flag + 1
    human_gene[flag] = hom[hom$DB.Class.Key ==  key
                           & hom$Common.Organism.Name == 'human'
                           , 'Symbol'][1] # duplicate human genes can be found
  }

  return(human_gene)
}


⑸ chromosome position to hgnc_symbol


ChromosomePosition_to_hgnc_symbol <- function(chromosome, start, end){
  # reference : https://support.bioconductor.org/p/127035/

  library(biomaRt)
  positions <- data.frame(chromosome = chromosome,
                          start = start,
                          end = end)

  ensembl = useEnsembl(biomart='ensembl', 
                       dataset="hsapiens_gene_ensembl") 

  results <- getBM(attributes = c("hgnc_symbol", "chromosome_name", "start_position", "end_position"), 
                   filters = c("chromosome_name", "start", "end"),
                   values = list(positions[,1], positions[,2], positions[,3]),
                   mart = ensembl)

  print(results)

  postions_combined <- apply(as.matrix(positions), 1, paste, collapse = ":")

  results2 <- getBM(attributes = c("hgnc_symbol", "chromosome_name", "start_position", "end_position"), 
                   filters = c("chromosomal_region"),
                   values = postions_combined,
                   mart = ensembl)

  print(results2)
}


⑹ gene name to chromosome position : Use of the conversion table(human, mouse)


gene_to_chromosome_position <- function(gene_list){
  # gene_list : list of genes

  human <- read.csv("https://blog.kakaocdn.net/dn/lTbKq/btrWjlmGho8/eWwWHbfLOlVGKAVeuDMKt1/human%20gene%20annotation.csv?attach=1&knm=tfile.csv")
  mouse = read.csv("https://blog.kakaocdn.net/dn/clSwT7/btrWcrWmS41/mNLCUuBlQxfJFhG1U2JQNk/mouse%20gene%20annotation.csv?attach=1&knm=tfile.csv")

  result = array()
  
  for(i in 1:length(gene_list)){
    if(gene_list[i] == toupper(gene_list[i])){ # human gene
      idx = match(gene_list[i], human[, 1])
      note = paste('Gene.ID: ', gene_list[i],
                   ', chromosome: ', human[idx, 'chromosome'],
                   ', start: ', human[idx, 'start'],
                   ', end: ', human[idx, 'end'])
      result[i] = note
    }
    else{ # mouse gene
      idx = match(gene_list[i], mouse[, 1])
      note = paste('Gene.ID: ', gene_list[i],
                   ', chromosome: ', mouse[idx, 'chromosome'],
                   ', start: ', mouse[idx, 'start'],
                   ', end: ', mouse[idx, 'end'])
      result[i] = note
    }
  }
  
  return(result)
}


⑺ gene name to description


gene_to_description <- function(gene_list){
  # gene_list : list of genes

  human <- read.csv("https://blog.kakaocdn.net/dn/lTbKq/btrWjlmGho8/eWwWHbfLOlVGKAVeuDMKt1/human%20gene%20annotation.csv?attach=1&knm=tfile.csv")
  mouse = read.csv("https://blog.kakaocdn.net/dn/clSwT7/btrWcrWmS41/mNLCUuBlQxfJFhG1U2JQNk/mouse%20gene%20annotation.csv?attach=1&knm=tfile.csv")

  result = array()
  
  for(i in 1:length(gene_list)){
    if(gene_list[i] == toupper(gene_list[i])){ #human
      idx = match(gene_list[i], human[, 1])
      result[i] = human[idx, 'Description']
    }
    else{ #mouse 
      idx = match(gene_list[i], mouse[, 1])
      result[i] = mouse[idx, 'Description']
    }    
  }
  
  return(result)
}


⑻ gene name to bioType


gene_to_bioType <- function(gene_list){
  # gene_list : list of mouse genes

  human <- read.csv("https://blog.kakaocdn.net/dn/lTbKq/btrWjlmGho8/eWwWHbfLOlVGKAVeuDMKt1/human%20gene%20annotation.csv?attach=1&knm=tfile.csv")
  mouse = read.csv("https://blog.kakaocdn.net/dn/clSwT7/btrWcrWmS41/mNLCUuBlQxfJFhG1U2JQNk/mouse%20gene%20annotation.csv?attach=1&knm=tfile.csv")

  result = array()
  
  for(i in 1:length(gene_list)){
    if(gene_list[i] == toupper(gene_list[i])){ # human
      idx = match(gene_list[i], human[, 1])
      result[i] = human[idx, 'bioType']
    }
    else { # mouse
      idx = match(gene_list[i], mouse[, 1])
      result[i] = mouse[idx, 'bioType']
    }
  }
  
  return(result)
}


⑼ Change the name of the Seurat object object gene : Need to create a new object (reference)


# RenameGenesSeurat  ------------------------------------------------------------------------------------
RenameGenesSeurat <- function(obj = ls.Seurat[[i]], newnames = HGNC.updated[[i]]$Suggested.Symbol) { # Replace gene names in different slots of a Seurat object. Run this before integration. Run this before integration. It only changes obj@assays$RNA@counts, @data and @scale.data.
  print("Run this before integration. It only changes obj@assays$RNA@counts, @data and @scale.data.")
  RNA <- obj@assays$RNA

  if (nrow(RNA) == length(newnames)) {
    if (length(RNA@counts)) RNA@counts@Dimnames[[1]]            <- newnames
    if (length(RNA@data)) RNA@data@Dimnames[[1]]                <- newnames
    if (length(RNA@scale.data)) RNA@scale.data@Dimnames[[1]]    <- newnames
  } else {"Unequal gene sets: nrow(RNA) != nrow(newnames)"}
  obj@assays$RNA <- RNA
  return(obj)
}
# RenameGenesSeurat(obj = SeuratObj, newnames = HGNC.updated.genes)


⑽ Change cluster information


update_cluster_in_seurat_obj <- function(seurat_obj, barcode, cluster){

  # dim(seurat_obj)[1] = length(barcode) = length(cluster)
  mat <- matrix(0, nrow = length(barcode), ncol = 2)
  mat[, 1] = barcode
  mat[, 2] = cluster
  mat = as.data.frame(mat)
  rownames(mat) = barcode

  seurat_obj@meta.data$orig.ident = mat[rownames(seurat_obj@meta.data), 2]
  # you may need to modify the above code
  seurat_obj@active.ident = as.factor(seurat_obj@meta.data$orig.ident)
  
  return (seurat_obj)
}


⑾ ‘Idents’ allows you to write FindAllMarkers for the metadata you want (reference 1, reference 2)


Idents(pbmc) <- pbmc$celltype
markers = FindAllMarkers(object = pbmc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)


⑿ Reorder the place within the plot of the ‘orig.ident’ of the Seurat object


# Reference : https://github.com/satijalab/seurat/issues/2471

object$orig.ident <- factor(x = object$orig.ident, levels = c("A", "B", "C"))


⒀ Save Seurat object pbmc3k as h5ad (reference)


library(Seurat)
library(SeuratData)
library(SeuratDisk)

SaveH5Seurat(pbmc3k, filename = "~/Downloads/pbmc3k.h5Seurat")
Convert("~/Downloads/pbmc3k.h5Seurat", dest = "h5ad")


① The generated .h5ad file can be open with the following code in Python


import scanpy as sc

pbmc = sc.read_h5ad("pbmc3k.h5ad")


⒁ When there are matrix.mtx, barcodes.tsv, features.tsv files and a spatial folder within the tissue_dir directory, provide the code for reading Visium data in R.


library(Seurat)

b_data <-ReadMtx('./matrix.mtx.gz',
               './barcodes.tsv.gz',
               './features.tsv.gz',
               feature.column=1)
b_data = CreateSeuratObject(b_data, assay='Spatial')
b_image = Read10X_Image(paste('./spatial/',sep=''))
b_image_ = b_image[Cells(b_data)]
DefaultAssay(object = b_image_) <- 'Spatial'
b_data[["slice1"]]=b_image_
# you can change "slice1"

b_data <- SCTransform(b_data, assay = "Spatial", verbose = FALSE, variable.features.n = 1000)
# Some errors might occur when running SpatialFeaturePlot without SCTransform

SpatialFeaturePlot(b_data, rownames(b_data)[1])
# check whether it works properly


⒂ Provide the code for reading Visium data saved in .h5ad format in R.


library(Seurat)
library(anndata)
library(png)
library(jsonlite)

adata = read_h5ad("adata.h5ad")

hires = adata$uns$spatial$SAMPLE$images$hires
# you must change "SAMPLE"
png_file_path_hires = "~/Downloads/data/spatial/tissue_hires_image.png"
png(filename = png_file_path_hires, width = dim(hires)[1], height = dim(hires)[2])
par(mar = c(0, 0, 0, 0))  # Remove margins
plot.new()
rasterImage(hires, 0, 0, 1, 1)
dev.off()

lowres = adata$uns$spatial$SAMPLE$images$lowres
# you must change "SAMPLE"
png_file_path_lowres = "~/Downloads/data/spatial/tissue_lowres_image.png"
png(filename = png_file_path_lowres, width = dim(lowres)[1], height = dim(lowres)[2])
par(mar = c(0, 0, 0, 0))  # Remove margins
plot.new()
rasterImage(lowres, 0, 0, 1, 1)
dev.off()

scale_factors <- adata$uns$spatial$SAMPLE$scalefactors
# you must change "SAMPLE"
json_file_path <- "~/Downloads/data/spatial/scalefactors_json.json"
write_json(scale_factors, json_file_path)

df = adata$obs[,1:3]
df[,4] = adata$obsm$spatial[,2]
df[,5] = adata$obsm$spatial[,1]
write.csv(df, "~/Downloads/data/spatial/tissue_positions_list.csv")

b_data = CreateSeuratObject(t(as.matrix(adata$X)), assay='Spatial')
b_image = Read10X_Image(paste('~/Downloads/data/spatial/',sep=''))
b_image_ = b_image[Cells(b_data)]
DefaultAssay(object = b_image_) <- 'Spatial'
b_data[["slice1"]]=b_image_
# you can change "slice1"

b_data <- SCTransform(b_data, assay = "Spatial", verbose = FALSE, variable.features.n = 1000)
# Some errors might occur when running SpatialFeaturePlot without SCTransform

SpatialFeaturePlot(b_data, rownames(b_data)[1])
# An error occurs...


⒃ Code for acquiring highly variable genes


library(dplyr)
library(Seurat)

pbmc.data <- Read10X(data.dir = "./outs")
pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200)
pbmc[["percent.mt"]] <- PercentageFeatureSet(object = pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 15)
pbmc <- NormalizeData(object = pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(object = pbmc, selection.method = "vst", nfeatures = 2000)

gene_list = rownames(pbmc)[pbmc@assays$RNA@meta.features$vst.variable]


⒄ PCA


# reference : https://kkokkilkon.tistory.com/144

#install_github("devtools")
library(devtools)
#install_github("ggbiplot", "vqv")
library(ggbiplot)

PCA <- function(dt, dt_group, scale=T){
  # rownames(dt) : our interest
  # colnames(dt) : the dimensional space of each sample
  
  pca_dt <- prcomp(dt,
                   center = T,
                   scale. = scale)
  
  ggbiplot(pca_dt,
                choices = c(1, 2),
                obs.scale = 1,
                var.scale = 1,
                groups = dt_group,
                circle = TRUE,
					  		varname.size=0,
					  		var.axes = F)
  
}

# Example
dt <- iris[, -5]
dt_group <- iris[, 5]
scale = TRUE
PCA(dt, dt_group, scale)


Kaplan-Meier Survival Analysis


# Install and Load Required Packages
install.packages("survival")
install.packages("survminer")
library(survival)
library(survminer)

# Create modified data frames
# Accurately reflect all deaths and sensored cases
surv_data <- data.frame(
  time = c(6, 12, 21, 27, 32, 39, 43, 43, 43, 89, 89, 89, 89, 89, 89, 261, 263, 270, 270, 311),
  status = c(1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1) # 사망은 1, censoring은 0
)

# Create a Surv object
surv_obj <- Surv(time = surv_data$time, event = surv_data$status)

# Estimate a Kaplan-Meier survival curve
fit <- survfit(surv_obj ~ 1, data = surv_data)

# Draw a survival curve
ggsurvplot(
  fit, 
  data = surv_data, 
  xlab = "Time", 
  ylab = "Survival probability", 
  title = "Kaplan-Meier Survival Curve",
  surv.median.line = "hv", # Add a median line for the survival curve
  ggtheme = theme_minimal(), # Set a theme 
  risk.table = TRUE, # Add a risk table
  palette = "Dark2" # Set a color palette 
)



8. Plot Generation

⑴ Example of storing images in png


png(file = "my plot.png", width = 1500, height = 300)
DimPlot(pbmc, reduction = "umap", label = TRUE, repel = TRUE)
dev.off()


⑵ A function that draws the confidence interval between the scatter plot and the slope given x and y


scatter_plot <- function(x, y, xlab = "x", ylab = "y", point_size = 2, lab_size = 4, png=TRUE){
  library(ggplot2)

  # the lenth(x) must be same with the length(y)
  mat <- matrix(0, nrow = length(x), ncol = 2)
  mat[, 1] = x
  mat[, 2] = y
  colnames(mat) = c(xlab, ylab)
  mat <- as.data.frame(mat)

  if(png){
    png("./scatter_plot.png",width=2000,height=2000,res=500)
    ggplot(mat, aes(x=x, y=y)) + geom_point(shape=19, size=point_size, color="blue") + theme(plot.background = element_blank(),   panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(size =1)) +   stat_smooth(method = lm, level=.95, color="grey") + labs(x=xlab, y=ylab, size=lab_size)
    dev.off()
  } else{
    ggplot(mat, aes(x=x, y=y)) + geom_point(shape=19, size=point_size, color="blue") + theme(plot.background = element_blank(),   panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(size =1)) +   stat_smooth(method = lm, level=.95, color="grey") + labs(x=xlab, y=ylab, size=lab_size)
  }
}


⑶ A function that draws a Spatial FeaturePlot when x, y, and color are given


my.plot <- function(x, y, col){
	# assume that length(x) = length(y) = length(col)

  plot(x, y, t="n")
  colfunc <- colorRampPalette(c("#000000", "#EB4600", "#FFF800"))
  
  coll = array(dim = length(col))
  for(i in 1 : length(col)){
    coll[i] <- colfunc(100) [as.integer( col[i] / max(col) * 99 + 1)] 
  }
  
  text(x, y, labels = "●", col = coll, cex = 1)
}


⑷ spatial feature plot


# tissue_dir : the directory that contains a filtered_feature_bc_matrix.h5
tissue_dir <- './outs/' 

# Tgenes : genes of interest
Tgenes <- c('Slc2a1', 'Slc2a3')

conv_spatial_feature_plot <- function(tissue_dir, Tgenes, quality.control = FALSE){
  library(Seurat)
  library(SeuratData)
  library(ggplot2)
  library(cowplot)
  library(dplyr)

  # reference : https://nbisweden.github.io/workshop-scRNAseq/labs/compiled/seurat/seurat_07_spatial.html
  
  br.sp = Load10X_Spatial(tissue_dir, slice= 'slice1')
  br.sp <- SCTransform(br.sp, assay = "Spatial", verbose = FALSE, variable.features.n = 1000)

  if(quality.control){
    br.sp <- PercentageFeatureSet(br.sp, "^mt-", col.name = "percent_mito")
    br.sp <- PercentageFeatureSet(br.sp, "^Hb.*-", col.name = "percent_hb")
    br.sp <- br.sp[, br.sp$nFeature_Spatial > 500 & br.sp$percent_mito < 25 & br.sp$percent_hb < 20]
  }

  SpatialFeaturePlot(br.sp, features = Tgenes)
}

conv_spatial_feature_plot(tissue_dir, Tgenes)


⑸ enhanced volcano plot given gene.list, log FC value vector, and adjusted p value value vector


my.EnhancedVolcano <- function(gene.name, logFC, adj.P.Val, 
                               pCutoff = 0.05, FCcutoff = 0.3,
                               xlim = c(-0.5, 0.5), ylim = c(-0.5, 0.5)){
  # install.packages("BiocManager")
  # BiocManager::install("EnhancedVolcano")
  library(EnhancedVolcano)


  tested <- matrix(0, nrow = length(gene.name), ncol = 2)
  tested <- as.data.frame(tested)
  for(i in 1:length(gene.name)){
    tested[i, 1] <- logFC[i]
    tested[i, 2] <- adj.P.Val[i]
  }
  rownames(tested) <- gene.name
  colnames(tested) <- c('logFC', 'adj.P.Val')

  EnhancedVolcano(tested, lab = rownames(tested), 
                  x='logFC', y='adj.P.Val', xlim = xlim, ylim = ylim, 
                  pCutoff = pCutoff, FCcutoff = FCcutoff) 
}


⑹ How to find the gene ontology (GO) from the gene list


GO <- function(gene){
  library(EnhancedVolcano)
  library(clusterProfiler)
  library(org.Hs.eg.db)
  library(org.Mm.eg.db)
  library(enrichplot)

  # ont = "ALL", "BP", "CC", "MF"
  # showCategory is not mandatory

  gene <- gsub('.*-', '', gene) # -와 그 앞에 있는 것들을 제거 (예 : "GRCh38-")
  gene <- gsub('\\ .*$', '', gene) # 'KLK2 ENSG00000167751' 같은 것을 해결 
    
  if (gene[1] == toupper(gene[1])){ ## Human gene
      gene.df <- bitr(gene, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
      gene.df <- as.vector(gene.df[[2]])
      GO <- enrichGO(gene.df, OrgDb = 'org.Hs.eg.db',keyType = "ENTREZID", ont = "ALL", pvalueCutoff = 0.05, pAdjustMethod = "BH")
      return(GO)
  } else{ ## Mouse gene?
      gene.df <- bitr(gene, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Mm.eg.db)
      gene.df <- as.vector(gene.df[[2]])
      GO <- enrichGO(gene.df, OrgDb = 'org.Mm.eg.db',keyType = "ENTREZID", ont = "ALL", pvalueCutoff = 0.05, pAdjustMethod = "BH")
      return(GO)
  }
}


⑺ How to draw a gene onology (GO) plot from the gene list


GO.plot <- function(gene){
  library(EnhancedVolcano)
  library(clusterProfiler)
  library(org.Hs.eg.db)
  library(org.Mm.eg.db)
  library(enrichplot)

  # ont = "ALL", "BP", "CC", "MF"
  # showCategory is not mandatory

  gene <- gsub('.*-', '', gene) # -와 그 앞에 있는 것들을 제거 (예 : "GRCh38-")
  gene <- gsub('\\ .*$', '', gene) # 'KLK2 ENSG00000167751' 같은 것을 해결 
    
  if (gene[1] == toupper(gene[1])){ ## Human gene
      gene.df <- bitr(gene, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
      gene.df <- as.vector(gene.df[[2]])
      GO <- enrichGO(gene.df, OrgDb = 'org.Hs.eg.db',keyType = "ENTREZID", ont = "ALL", pvalueCutoff = 0.05, pAdjustMethod = "BH")
      dotplot(GO,split="ONTOLOGY", showCategory = 5)+facet_grid(ONTOLOGY~., scale="free")
  } else{ ## Mouse gene?
      gene.df <- bitr(gene, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Mm.eg.db)
      gene.df <- as.vector(gene.df[[2]])
      GO <- enrichGO(gene.df, OrgDb = 'org.Mm.eg.db',keyType = "ENTREZID", ont = "ALL", pvalueCutoff = 0.05, pAdjustMethod = "BH")
      dotplot(GO,split="ONTOLOGY", showCategory = 5)+facet_grid(ONTOLOGY~., scale="free")
  }
}

### Example
GO.plot(c("COL1A1", "COL1A2", "COL3A1", "COL6A3"))


⑻ How to draw a cnetplot from the gene list


my.cnetplot <- function(gene.list){
  GO <- function(gene){
    library(EnhancedVolcano)
    library(clusterProfiler)
    library(org.Hs.eg.db)
    library(org.Mm.eg.db)
    library(enrichplot)
  
    # ont = "ALL", "BP", "CC", "MF"
    # showCategory is not mandatory
  
    gene <- gsub('.*-', '', gene) # -와 그 앞에 있는 것들을 제거 (예 : "GRCh38-")
    gene <- gsub('\\ .*$', '', gene) # 'KLK2 ENSG00000167751' 같은 것을 해결 
      
    if (gene[1] == toupper(gene[1])){ ## Human gene
        gene.df <- bitr(gene, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
        gene.df <- as.vector(gene.df[[2]])
        GO <- enrichGO(gene.df, OrgDb = 'org.Hs.eg.db',keyType = "ENTREZID", ont = "ALL", pvalueCutoff = 0.05, pAdjustMethod = "BH")
        return(GO)
    } else{ ## Mouse gene?
        gene.df <- bitr(gene, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Mm.eg.db)
        gene.df <- as.vector(gene.df[[2]])
        GO <- enrichGO(gene.df, OrgDb = 'org.Mm.eg.db',keyType = "ENTREZID", ont = "ALL", pvalueCutoff = 0.05, pAdjustMethod = "BH")
        return(GO)
    }
  }
  
  go = GO(gene.list)
  ego_ <- 0
  if (gene.list[1] == toupper(gene.list[1])){ ## Human gene
    ego_ <- setReadable(go, org.Hs.eg.db, keyType='ENTREZID')
  } else { ## Mouse gene?
    ego_ <- setReadable(go, org.Mm.eg.db, keyType='ENTREZID')
  }
  
  cnetplot(ego_, categorySize="pvalue", foldChange= gene.list )
}

my.cnetplot(c("COL1A1", "COL1A2", "COL3A1", "COL6A3"))


⑼ Violin plot with statistical analysis


VlnPlot(object = br.sp, features = c('Col1a1'),
        group.by = 'orig.ident', pt.size = 0.1) + 
    	facet_grid(.~tnbc.merge@active.ident)+
    	fill_palette(palette='npg')+
    	stat_compare_means(method = "anova", label='p')+
    	theme(axis.text.x = element_text(angle = 90, hjust = 1),
    	strip.text.x = element_text(size = rel(0.7)))



Input : 2022.05.03 00:13

results matching ""

    No results matching ""