Now you have completed the transcript-level quantification using Salmon. The next step is to use txiimport to aggregate the data to gene-level for downstream analysis, e.g. differential expression analysis by DESeq2.

Setwd and load necessary packages

You may need to install some of the packages.

# setwd("~/Documents/RNAseq_Bootcamp")
# getwd()
library("devtools")

## Loading required package: usethis

library("BiocStyle")
library("knitr")
library("rmarkdown")

## 
## Attaching package: 'rmarkdown'

## The following objects are masked from 'package:BiocStyle':
## 
##     html_document, md_document, pdf_document

library("dplyr")

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library("ggplot2")
library("pheatmap")
library("RColorBrewer")

Install txiimport

# if (!requireNamespace("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# BiocManager::install("tximport")
# To view documentation
browseVignettes("tximport")

## starting httpd help server ... done

# Load txiimport
library(tximport)

Prepare coldata table that contains the sample information

# Create a csv file containing the sample information and locate the directory containing the file
dir = "/Users/hanruizhang/Bioinformatics/Zhang_RNAseq_Bootcamp_20190710"
list.files(dir)

##  [1] "gencode.v30_salmon_0.14.0"        "gencode.v30.annotation.gtf.gz"   
##  [3] "gencode.v30.annotation.sqlite"    "gencode.v30.transcripts.fa.gz"   
##  [5] "GSE55536_DESeq2.csv"              "GSE55536_fastq"                  
##  [7] "GSE55536_quants"                  "GSE55536_sample.csv"             
##  [9] "GSE55536.R"                       "multiqc_data"                    
## [11] "multiqc_report.html"              "RNAseq_analysis_workflow.md"     
## [13] "RNAseq_Bootcamp_GSE55536.html"    "RNAseq_Bootcamp_GSE55536.nb.html"
## [15] "RNAseq_Bootcamp_GSE55536.Rmd"

# Read the file and check everything looks correct
coldata <- read.csv("GSE55536_sample.csv", header = TRUE)
head(coldata)

list.files(file.path(dir,"GSE55536_quants"))

## [1] "M0_HMDM_1" "M0_HMDM_2" "M0_HMDM_3" "M1_HMDM_1" "M1_HMDM_2" "M1_HMDM_3"

colnames(coldata)

## [1] "X"         "id"        "subject"   "treatment"

idx <- c("id","subject","treatment")
coldata[,idx]

levels(coldata$id)

## [1] "M0_HMDM_1" "M0_HMDM_2" "M0_HMDM_3" "M1_HMDM_1" "M1_HMDM_2" "M1_HMDM_3"

coldata$subject <- as.factor(coldata$subject) # change numeric number of subject id to factor
levels(coldata$subject)

## [1] "1" "2" "3"

levels(coldata$treatment)

## [1] "M0" "M1"

Define files for tximport to read by create a named vector pointing to the quantification files

files <- file.path(dir,"GSE55536_quants",coldata$id,"quant.sf")
names(files) <- coldata$id
all(file.exists(files)) # the output should be "TRUE"

## [1] TRUE

files

##                                                                                             M0_HMDM_1 
## "/Users/hanruizhang/Bioinformatics/Zhang_RNAseq_Bootcamp_20190710/GSE55536_quants/M0_HMDM_1/quant.sf" 
##                                                                                             M0_HMDM_2 
## "/Users/hanruizhang/Bioinformatics/Zhang_RNAseq_Bootcamp_20190710/GSE55536_quants/M0_HMDM_2/quant.sf" 
##                                                                                             M0_HMDM_3 
## "/Users/hanruizhang/Bioinformatics/Zhang_RNAseq_Bootcamp_20190710/GSE55536_quants/M0_HMDM_3/quant.sf" 
##                                                                                             M1_HMDM_1 
## "/Users/hanruizhang/Bioinformatics/Zhang_RNAseq_Bootcamp_20190710/GSE55536_quants/M1_HMDM_1/quant.sf" 
##                                                                                             M1_HMDM_2 
## "/Users/hanruizhang/Bioinformatics/Zhang_RNAseq_Bootcamp_20190710/GSE55536_quants/M1_HMDM_2/quant.sf" 
##                                                                                             M1_HMDM_3 
## "/Users/hanruizhang/Bioinformatics/Zhang_RNAseq_Bootcamp_20190710/GSE55536_quants/M1_HMDM_3/quant.sf"

Create a data.frame to associate transcript ID with gene ID

This data.frame is required because transcripts IDs in salmon need to be associated with gene IDs for gene-level summarization

## Make TxDb - saveDb to make it quicker next time
# BiocManager::install("rtracklayer")
# BiocManager::install("GenomicFeatures")
library(rtracklayer)

## Loading required package: GenomicRanges

## Loading required package: stats4

## Loading required package: BiocGenerics

## Loading required package: parallel

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB

## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter,
##     Find, get, grep, grepl, intersect, is.unsorted, lapply, Map,
##     mapply, match, mget, order, paste, pmax, pmax.int, pmin,
##     pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
##     setdiff, sort, table, tapply, union, unique, unsplit, which,
##     which.max, which.min

## Loading required package: S4Vectors

## 
## Attaching package: 'S4Vectors'

## The following objects are masked from 'package:dplyr':
## 
##     first, rename

## The following object is masked from 'package:base':
## 
##     expand.grid

## Loading required package: IRanges

## 
## Attaching package: 'IRanges'

## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice

## Loading required package: GenomeInfoDb

library(GenomicFeatures)

## Loading required package: AnnotationDbi

## Loading required package: Biobase

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

## 
## Attaching package: 'AnnotationDbi'

## The following object is masked from 'package:dplyr':
## 
##     select

gtf <- "gencode.v30.annotation.gtf.gz"
txdb.filename <- "gencode.v30.annotation.sqlite"
txdb <- makeTxDbFromGFF(gtf) # This step takes some time.

## Import genomic features from the file as a GRanges object ...

## OK

## Prepare the 'metadata' data frame ... OK
## Make the TxDb object ...

## Warning in .get_cds_IDX(mcols0$type, mcols0$phase): The "phase" metadata column contains non-NA values for features of
##   type stop_codon. This information was ignored.

## OK

saveDb(txdb, txdb.filename)

## TxDb object:
## # Db type: TxDb
## # Supporting package: GenomicFeatures
## # Data source: gencode.v30.annotation.gtf.gz
## # Organism: NA
## # Taxonomy ID: NA
## # miRBase build ID: NA
## # Genome: NA
## # transcript_nrow: 208621
## # exon_nrow: 710152
## # cds_nrow: 274588
## # Db created by: GenomicFeatures package from Bioconductor
## # Creation time: 2019-10-15 15:05:12 -0400 (Tue, 15 Oct 2019)
## # GenomicFeatures version at creation time: 1.36.4
## # RSQLite version at creation time: 2.1.2
## # DBSCHEMAVERSION: 1.2

k <- keys(txdb, keytype = "TXNAME")
tx2gene <- select(txdb, k, "GENEID", "TXNAME")

## 'select()' returned 1:1 mapping between keys and columns

head(tx2gene)

Import data table from Salmon output using txiimport

library("tximport")
library("jsonlite")
library("readr") # the "readr" makes tximport quicker
txi <- tximport(files, type="salmon", tx2gene=tx2gene)

## reading in files with read_tsv

## 1 2 3 4 5 6 
## summarizing abundance
## summarizing counts
## summarizing length

Now check your txi - the SummarizedExperiment object

names(txi)

## [1] "abundance"           "counts"              "length"             
## [4] "countsFromAbundance"

txi$counts[1:3,1:3]

##                    M0_HMDM_1 M0_HMDM_2 M0_HMDM_3
## ENSG00000000003.14         0         0         0
## ENSG00000000005.6          0         0         0
## ENSG00000000419.12        47        31        20

txi$length[1:3,1:3]

##                    M0_HMDM_1 M0_HMDM_2 M0_HMDM_3
## ENSG00000000003.14 3662.2410  3662.241  3662.241
## ENSG00000000005.6   684.7500   684.750   684.750
## ENSG00000000419.12  878.6459   886.474   850.887

txi$abundance[1:3,1:3]

##                    M0_HMDM_1 M0_HMDM_2 M0_HMDM_3
## ENSG00000000003.14   0.00000   0.00000   0.00000
## ENSG00000000005.6    0.00000   0.00000   0.00000
## ENSG00000000419.12  79.98988  49.94774  36.35912

txi$countsFromAbundance

## [1] "no"

Create a DESeqDataSet object

# Construct a DESeqDataSet (dds) using txi
library("DESeq2")

## Loading required package: SummarizedExperiment

## Loading required package: DelayedArray

## Loading required package: matrixStats

## 
## Attaching package: 'matrixStats'

## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians

## The following object is masked from 'package:dplyr':
## 
##     count

## Loading required package: BiocParallel

## 
## Attaching package: 'DelayedArray'

## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges

## The following objects are masked from 'package:base':
## 
##     aperm, apply, rowsum

dds <- DESeqDataSetFromTximport(txi, coldata, design = ~subject + treatment) # dds is now ready for DESeq() see DESeq2 vignette

## using counts and average transcript lengths from tximport

genetable <- data.frame(gene.id = rownames(txi$counts)) 
names(assays(dds))

## [1] "counts"      "avgTxLength"

# EdgeR
library("edgeR")

## Loading required package: limma

## 
## Attaching package: 'limma'

## The following object is masked from 'package:DESeq2':
## 
##     plotMA

## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA

cts <- txi$counts
normMat <- txi$length
normMat <- normMat/exp(rowMeans(log(normMat)))
o <- log(calcNormFactors(cts/normMat)) + log(colSums(cts/normMat))
dge <- DGEList(counts = cts,
               samples = coldata,
               genes = genetable)
dge <- scaleOffset(dge, t(t(log(normMat)) + o))
names(dge)

## [1] "counts"  "samples" "genes"   "offset"

Pre-filtering the DE data

# dds
nrow(dds)

## [1] 58434

dds <- dds[ rowSums(counts(dds)) > 1, ]
nrow(dds)

## [1] 15940

# dge
dge <- dge[rowSums(round(dge$counts)) > 1, ]
all(rownames(dge) == rownames(dds))

## [1] TRUE

dge <- dge[filterByExpr(dge),]

Variance stablizing transformation

library("vsn")
# Transformation with vst (for n>30)
vsd <- vst(dds, blind = FALSE)

## using 'avgTxLength' from assays(dds), correcting for library size

head(assay(vsd), 3)

##                    M0_HMDM_1 M0_HMDM_2 M0_HMDM_3 M1_HMDM_1 M1_HMDM_2
## ENSG00000000419.12  6.209145  5.867141  5.608826  5.794110  5.476258
## ENSG00000000457.14  4.923380  4.878699  4.840362  6.080892  5.902086
## ENSG00000000460.17  5.350176  5.474443  5.323995  4.937214  4.919341
##                    M1_HMDM_3
## ENSG00000000419.12  5.729253
## ENSG00000000457.14  6.215809
## ENSG00000000460.17  4.776723

meanSdPlot(assay(vsd), ranks = FALSE)

# Transformation with rlog (for n<30)
rld <- rlog(dds, blind = FALSE)

## using 'avgTxLength' from assays(dds), correcting for library size

head(assay(rld), 3)

##                    M0_HMDM_1 M0_HMDM_2 M0_HMDM_3 M1_HMDM_1 M1_HMDM_2
## ENSG00000000419.12  5.112337  4.692518  4.348300  4.597367  4.165171
## ENSG00000000457.14  2.953807  2.820133  2.810344  4.652513  4.421047
## ENSG00000000460.17  3.231867  3.398876  3.194962  2.672349  2.646414
##                    M1_HMDM_3
## ENSG00000000419.12  4.512478
## ENSG00000000457.14  4.805362
## ENSG00000000460.17  2.487057

meanSdPlot(assay(rld), ranks = FALSE)

# Plot sample distance

# Plot sample-to-sample distances using the rlog-transformed values
sampleDists <- dist(t(assay(rld)))
sampleDists

##           M0_HMDM_1 M0_HMDM_2 M0_HMDM_3 M1_HMDM_1 M1_HMDM_2
## M0_HMDM_2  54.78074                                        
## M0_HMDM_3  59.58941  75.12378                              
## M1_HMDM_1 111.37075 110.33474 122.91857                    
## M1_HMDM_2 120.02160 116.20741 129.00920  61.14667          
## M1_HMDM_3 112.29956 112.37634 116.69185  61.94952  60.95431

sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(rld$treatment, rld$subject, sep = " - ")
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette(rev(brewer.pal(9, "Blues")))(255)
pheatmap(sampleDistMatrix,
         clustering_distance_rows = sampleDists,
         clustering_distance_cols = sampleDists,
         col = colors)

# Plot sample-to-sample distances using the Poisson distance
library("PoiClaClu")
poisd <- PoissonDistance(t(counts(dds)))
samplePoisDistMatrix <- as.matrix(poisd$dd)
rownames(samplePoisDistMatrix) <- paste(vsd$treatment, vsd$subject, sep = " - ")
colnames(samplePoisDistMatrix) <- NULL
pheatmap(samplePoisDistMatrix,
         clustering_distance_rows = poisd$dd,
         clustering_distance_cols = poisd$dd,
         col = colors)

# PCA plot

### PCA plot using DESeq2
plotPCA(vsd, intgroup = c("treatment", "subject"))

### PCA plot using qqplot
pcaData <- plotPCA(vsd, intgroup = c("treatment", "subject"), returnData = TRUE)
pcaData

percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(x = PC1, y = PC2, color = treatment, shape = subject)) +
  geom_point(size =3) +
  xlab(paste0("PC1: ", percentVar[1], "% variance")) +
  ylab(paste0("PC2: ", percentVar[2], "% variance")) +
  coord_fixed()

# MDS plot

# MDS plot from the VST data
mds <- as.data.frame(colData(vsd))  %>%
  cbind(cmdscale(sampleDistMatrix))
ggplot(mds, aes(x = `1`, y = `2`, color = treatment, shape = subject)) +
  geom_point(size = 3) + coord_fixed()

# MDS plot with poisson distribution
mdsPois <- as.data.frame(colData(dds)) %>%
  cbind(cmdscale(samplePoisDistMatrix))
ggplot(mdsPois, aes(x = `1`, y = `2`, color = treatment, shape = subject)) +
  geom_point(size = 3) + coord_fixed()

# Differential expression (DE) analysis

dds <- DESeq(dds)

## estimating size factors

## using 'avgTxLength' from assays(dds), correcting for library size

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

res <- results(dds)
head(res)

## log2 fold change (MLE): treatment M1 vs M0 
## Wald test p-value: treatment M1 vs M0 
## DataFrame with 6 rows and 6 columns
##                            baseMean      log2FoldChange             lfcSE
##                           <numeric>           <numeric>         <numeric>
## ENSG00000000419.12 24.7985738722766  -0.457154423198636 0.637998030608425
## ENSG00000000457.14 19.5760629521881     3.1207087694737 0.749272065373874
## ENSG00000000460.17 8.39290563946875   -1.69466913322939  1.01901475884969
## ENSG00000000938.13 116.053409790579    -1.5143736980187   0.4326122525949
## ENSG00000000971.15 35.9943841917436    4.01145105983981  1.70662421990445
## ENSG00000001036.13 115.962664360505 -0.0919167365618874 0.380351171423933
##                                  stat               pvalue
##                             <numeric>            <numeric>
## ENSG00000000419.12 -0.716545194916467    0.473654771291156
## ENSG00000000457.14   4.16498747743453 3.11369654741656e-05
## ENSG00000000460.17  -1.66304670124936   0.0963030875514567
## ENSG00000000938.13  -3.50053353536606 0.000464327812874363
## ENSG00000000971.15   2.35051806546165   0.0187472975925626
## ENSG00000001036.13 -0.241662819698374    0.809041441297499
##                                    padj
##                               <numeric>
## ENSG00000000419.12    0.714941486997797
## ENSG00000000457.14 0.000391499267830781
## ENSG00000000460.17    0.267680171310452
## ENSG00000000938.13  0.00397072083277613
## ENSG00000000971.15   0.0817347395918228
## ENSG00000001036.13    0.914251343483738

res <- results(dds, contrast = c("treatment", "M0", "M1"))
mcols(res, use.names = TRUE)

## DataFrame with 6 rows and 2 columns
##                        type                                description
##                 <character>                                <character>
## baseMean       intermediate  mean of normalized counts for all samples
## log2FoldChange      results log2 fold change (MLE): treatment M0 vs M1
## lfcSE               results         standard error: treatment M0 vs M1
## stat                results         Wald statistic: treatment M0 vs M1
## pvalue              results      Wald test p-value: treatment M0 vs M1
## padj                results                       BH adjusted p-values

summary(res)

## 
## out of 15940 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1248, 7.8%
## LFC < 0 (down)     : 1594, 10%
## outliers [1]       : 0, 0%
## low counts [2]     : 4327, 27%
## (mean count < 3)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

res.05 <- results(dds, alpha = 0.05)
table(res.05$padj < 0.05)

## 
## FALSE  TRUE 
##  9303  2310

resLFC1 <- results(dds, lfcThreshold = 1)
table(resLFC1$padj < 0.05)

## 
## FALSE  TRUE 
## 10615   689

dim(resLFC1)

## [1] 15940     6

Multiple testing

sum(res$pvalue < 0.05, na.rm=TRUE)

## [1] 3472

sum(!is.na(res$pvalue))

## [1] 15940

sum(res$padj < 0.1, na.rm=TRUE)

## [1] 2842

resSig <- subset(res, padj < 0.1)
head(resSig[ order(resSig$log2FoldChange), ])

## log2 fold change (MLE): treatment M0 vs M1 
## Wald test p-value: treatment M0 vs M1 
## DataFrame with 6 rows and 6 columns
##                            baseMean    log2FoldChange             lfcSE
##                           <numeric>         <numeric>         <numeric>
## ENSG00000128271.22  129.11167807625 -10.5331563244342  1.24692381029415
## ENSG00000172724.12 429.584688891353 -10.4705674716919  1.39605472067509
## ENSG00000110436.13  97.804574961149 -10.2116187648473  1.25426808759635
## ENSG00000166016.6  99.0987984959861 -10.1661467564108  1.25452298017705
## ENSG00000126353.3  824.105004786337 -10.0381053242188 0.776846924798231
## ENSG00000197272.2  82.6671087129431 -9.89129103393588  1.26131000756679
##                                 stat               pvalue
##                            <numeric>            <numeric>
## ENSG00000128271.22 -8.44731349058884 2.98082527814157e-17
## ENSG00000172724.12 -7.50011250750163 6.37630806976776e-14
## ENSG00000110436.13 -8.14149611700365 3.90423063987907e-16
## ENSG00000166016.6  -8.10359548373997 5.33583134435151e-16
## ENSG00000126353.3  -12.9216001296857 3.39996398476153e-38
## ENSG00000197272.2  -7.84207766100048 4.43152136561998e-15
##                                    padj
##                               <numeric>
## ENSG00000128271.22 3.06339150044762e-15
## ENSG00000172724.12 4.02435139207679e-12
## ENSG00000110436.13 3.46105575732181e-14
## ENSG00000166016.6  4.69431889408743e-14
## ENSG00000126353.3  2.46773635968973e-35
## ENSG00000197272.2  3.38574063282532e-13

head(resSig[ order(resSig$log2FoldChange, decreasing = TRUE), ])

## log2 fold change (MLE): treatment M0 vs M1 
## Wald test p-value: treatment M0 vs M1 
## DataFrame with 6 rows and 6 columns
##                            baseMean   log2FoldChange            lfcSE
##                           <numeric>        <numeric>        <numeric>
## ENSG00000178726.6  76.4924616404605 9.52767595564549 1.26336222011302
## ENSG00000124491.15 182.177927333263 9.52416292440525 1.16823634432668
## ENSG00000153012.12 38.9173565842749 8.20528058937533 1.31018661405471
## ENSG00000171659.15 62.7960740661657 8.06575790043976 1.26579636270717
## ENSG00000153823.18 25.6898689408037  7.8593491553705 1.34609217098699
## ENSG00000282608.1  23.4213886009629   7.811551598063 1.35621993935391
##                                stat               pvalue
##                           <numeric>            <numeric>
## ENSG00000178726.6    7.541523566213 4.64512106585242e-14
## ENSG00000124491.15 8.15259940392849  3.5618395904094e-16
## ENSG00000153012.12 6.26268082833025 3.78414655890105e-10
## ENSG00000171659.15  6.3720817487494 1.86479435068881e-10
## ENSG00000153823.18 5.83864116051417 5.26282822418129e-09
## ENSG00000282608.1  5.75979704426435 8.42151338649086e-09
##                                    padj
##                               <numeric>
## ENSG00000178726.6  2.98031994131183e-12
## ENSG00000124491.15  3.1818187048788e-14
## ENSG00000153012.12 1.33167557540963e-08
## ENSG00000171659.15 6.96329800467818e-09
## ENSG00000153823.18 1.51655643095328e-07
## ENSG00000282608.1  2.36229553037001e-07

Plotting results

topGene <- rownames(res)[which.min(res$padj)]
plotCounts(dds, gene = topGene, intgroup=c("treatment"))

library("ggbeeswarm")
geneCounts <- plotCounts(dds, gene = topGene, intgroup = c("treatment","subject"),
                         returnData = TRUE)
ggplot(geneCounts, aes(x = treatment, y = count, color = subject)) +
  scale_y_log10() +  geom_beeswarm(cex = 3)

# Clustering by the top variable genes

library("genefilter")

## 
## Attaching package: 'genefilter'

## The following objects are masked from 'package:matrixStats':
## 
##     rowSds, rowVars

## The following object is masked from 'package:readr':
## 
##     spec

topVarGenes <- head(order(rowVars(assay(vsd)), decreasing = TRUE), 20)
mat  <- assay(vsd)[ topVarGenes, ]
mat  <- mat - rowMeans(mat)
anno <- as.data.frame(colData(vsd)[, c("subject","treatment")])
pheatmap(mat, annotation_col = anno)

# Annotating and exporting results

# Ordered the results
resOrdered <- res[order(res$pvalue),]
head(resOrdered)

## log2 fold change (MLE): treatment M0 vs M1 
## Wald test p-value: treatment M0 vs M1 
## DataFrame with 6 rows and 6 columns
##                            baseMean    log2FoldChange             lfcSE
##                           <numeric>         <numeric>         <numeric>
## ENSG00000131203.13 2021.57176312256 -9.00664225967249 0.467407974560547
## ENSG00000026751.17 2512.29357163371 -5.18496805783348 0.295683583817159
## ENSG00000123610.5  1207.20705225123 -8.03728284852897 0.481444784946851
## ENSG00000122643.20 416.278336842683 -6.37751165830618 0.411694633274812
## ENSG00000023445.14 580.430347287543 -6.16926419557139 0.400417864109125
## ENSG00000120217.14 492.197957356586  -5.4793351344632 0.359844451315732
##                                 stat               pvalue
##                            <numeric>            <numeric>
## ENSG00000131203.13  -19.269338029888 9.71709680949578e-83
## ENSG00000026751.17 -17.5355289965631 7.67322008581569e-69
## ENSG00000123610.5  -16.6940905786657 1.44707306336257e-62
## ENSG00000122643.20 -15.4908787796831 3.99788277372632e-54
## ENSG00000023445.14 -15.4070653398473 1.46725303036322e-53
## ENSG00000120217.14 -15.2269546311708  2.3425151995977e-52
##                                    padj
##                               <numeric>
## ENSG00000131203.13 1.12844645248674e-78
## ENSG00000026751.17 4.45545524282888e-65
## ENSG00000123610.5  5.60161982827651e-59
## ENSG00000122643.20 1.16068531628209e-50
## ENSG00000023445.14 3.40784188832162e-50
## ENSG00000120217.14 4.53393816882135e-49

# Write csv
write.csv(res, "GSE55536_DESeq2.csv")

Gene-level exploratory analysis and differential expression