Load the tissue atlas data. 15 tissues. Remove some lowly expressed genes.
# Import expression data
TPM_table <- read.table("input-data/TissueAtlas/Ssal_tissue.TPM.txt", header = T)
rownames(TPM_table) <- TPM_table$gene_id
TPM_table <- TPM_table[,-(1:9)]
colnames(TPM_table)[13:15] <- c("ovary","testis","skin")
# Normalize between samples
normFactor <- edgeR::calcNormFactors(object = TPM_table, method="TMM")
TPM_table_norm <- sapply(1:ncol(TPM_table),function(i){TPM_table[,i]/normFactor[i]})
dimnames(TPM_table_norm) <- dimnames(TPM_table)
TPM_table <- as.data.frame(TPM_table_norm)
remove(TPM_table_norm)
TPM_table <- log2(TPM_table+1)
# Switch ids ...
ids <- get.id("*") %>%
filter(gene_biotype=="protein_coding") %>%
separate(gene_id, into=c("gff_id","ncbi_id"))
idx <- match(rownames(TPM_table), ids$gff_id)
TPM_table <- TPM_table[!is.na(idx),]
idx <- idx[!is.na(idx)]
#... and average genes with multiple entries
TPM_table$GeneID <- ids$ncbi_id[idx]
TPM_table <- TPM_table %>%
gather(Samples, Expr, -GeneID) %>%
group_by(GeneID, Samples) %>%
summarise(MeanExpr = mean(Expr)) %>%
spread(Samples, MeanExpr)
TPM_table <- as.data.frame(TPM_table)
rownames(TPM_table) <- TPM_table$GeneID
TPM_table <- TPM_table[,-1]
paste0("Genes x Samples: ", paste0(dim(TPM_table), collapse = " x "))
## [1] "Genes x Samples: 48589 x 15"
plot(density(as.matrix(TPM_table)), xlab = "Expression values", ylab = "", main = "", col = "red", lwd=5)
Analysis of duplicates where one copy is shifted up/down and the other show no shift.
In plots, genes with a shift up are red and genes with a shift down are blue. Genes without a shift are grey.
Bar plots: Distribution of duplicates over the number of tissues where the shifted gene is up/down compared to the gene without a shift. 0 means shift in the wrong direction (i.e. not even liver is shifted) and 15 means that all tissues show a shift
Heatmap:
Note that before the heatmas is drawn, duplicates with shifts in the wrong direction are removed.
The first (left) tissue panel is the shifted duplicate, the second (right) is the one with no shift. Colors are row scaled with red signifying the gene’s highest expression and blue its lowest.
The color bar called Shifted shows the number of tissues where the shifted copy has higher/lower expression than the copy with no shift. Colors go from white, through yellow to red, where dark red means that the shifted copy has higher/lower expression in all tissues.
EVEresTbl <- readRDS("data/runEVE/EVEresTbl.RDS", refhook = NULL)
for (label in c("complete", "partial")) {
if (label == "partial") {
partial <- TRUE
} else {
partial <- FALSE
}
cat('\n##Groups: ', label, '\n')
for (direction in c("down","up")) {
if (direction == "down") {
shift_col <- rgb(59,121,255, maxColorValue = 255)
} else {
shift_col <- rgb(255,0,50, maxColorValue = 255)
}
cat('\n###Direction: ', direction, '\n')
EVE.dup <- EVEresTbl %>% filter(dup_type == "duplicate",
dupSigDir == direction,
N3partial == partial
)
EVE.shift <- EVE.dup %>% filter(sigDir == direction)
EVE.cons <- EVE.dup %>% filter(sigDir == "")
# Duplicates with shift
eve_shift <- EVE.shift$Ssal
# Duplicates with no shift
eve_cons <- EVE.cons$Ssal
idx <- is.na(match(eve_shift, rownames(TPM_table))) | is.na( match(eve_cons, rownames(TPM_table)))
eve_shift <- eve_shift[!idx]
eve_cons <- eve_cons[!idx]
cat('\n', paste0("Genes with EVE shift ", direction, ": ", length(eve_shift)), '\n\n')
# Expression of shifted versus conserved genes
idx <- match(eve_shift, rownames(TPM_table))
expr.shift <- TPM_table[idx,]
expr.shift[is.na(expr.shift)] <- 0
colnames(expr.shift) <- paste0(colnames(expr.shift), ".s")
idx <- match(eve_cons, rownames(TPM_table))
expr.cons <- TPM_table[idx,]
expr.cons[is.na(expr.cons)] <- 0
colnames(expr.cons) <- paste0(colnames(expr.cons), ".c")
pairs <- paste0(rownames(expr.shift), "-", rownames(expr.cons))
expr.eve <- cbind(expr.shift, expr.cons)
rownames(expr.eve) <- pairs
# BOXPLOT: Liver expression in shift versus no shift
p <- wilcox.test(expr.shift$liver.s, expr.cons$liver.c, paired = TRUE)
d1 <- data.frame(Evolution = factor(c(rep(direction, nrow(expr.shift)), rep("no shift", nrow(expr.cons))),
levels = c(direction, "no shift")),
Expression = c(expr.shift$liver.s, expr.cons$liver.c))
fig <- ggplot(d1, aes(x = Evolution, y = Expression, fill = Evolution)) + geom_boxplot() + guides(fill=FALSE) +
ggtitle(label = "", subtitle = paste0("P-value = ", format(p$p.value, digits = 2))) + xlab("") +
theme_bw(base_size = 30) + scale_fill_manual(values = c(shift_col, rgb(150,150,150, maxColorValue = 255)))
print(fig)
pdf(file = paste0(figurePath,"/boxplot_",direction, "_", label, ".pdf")); print (fig); dev.off()
# BAR PLOTS
idx <- c()
if (direction == "down") {
no.shifts <- rowSums(expr.shift < expr.cons)
idx <- rowSums(expr.shift < expr.cons) == ncol(expr.shift)
} else {
no.shifts <- rowSums(expr.shift > expr.cons)
idx <- rowSums(expr.shift > expr.cons) == ncol(expr.shift)
}
d2 <- data.frame(Shifts = no.shifts)
fig <- ggplot(d2, aes(x = Shifts)) + geom_bar(color = shift_col, fill = shift_col) + guides(fill=FALSE) +
ggtitle(label = "", subtitle = "") +
xlab("Tissues with shift in expression") +
ylab("Duplicates") +
theme_bw(base_size = 30) +
scale_fill_manual(values = c(shift_col)) +
geom_text(stat='count', aes(label=..count..), vjust=-0.1, size=6)
print(fig)
pdf(file = paste0(figurePath,"/barplot_",direction, "_", label, ".pdf")); print (fig); dev.off()
# Filter out duplicates with expression change in the "wrong" direction
if (direction == "up") {
idx <- expr.shift$liver.s < expr.cons$liver.c
} else { # down
idx <- expr.shift$liver.s > expr.cons$liver.c
}
eve_shift <- eve_shift[!idx]
eve_cons <- eve_cons[!idx]
expr.eve <- expr.eve[!idx,]
expr.shift <- expr.shift[!idx,]
expr.cons <- expr.cons[!idx,]
no.shifts <- no.shifts[!idx]
# HEATMAP
col.sep <- ncol(TPM_table)
dist.obs <- as.dist(1-cor(t(expr.eve)))
dist.obs.tre <- hclust(dist.obs, method = "ward.D")
names <- colnames(expr.eve)
names <- gsub("\\.c", "", names)
names <- gsub("\\.s", "", names)
annot_row <- data.frame(Shifted = no.shifts,
check.names = FALSE)
rownames(annot_row) <- rownames(expr.eve)
cluster_color_map <- colorRampPalette(c("white","yellow","red"))(col.sep+1)
cluster_color_map[col.sep+1] <- "red4"
names(cluster_color_map) <- 0:col.sep
annot_colors = list(
Shifted = cluster_color_map
)
fig <- pheatmap(mat = as.matrix(expr.eve),
color = colorRampPalette(c("blue","white","red"))(10),
cluster_rows = dist.obs.tre,
cluster_cols = FALSE,
scale = "row",
gaps_col = c(col.sep),
legend = FALSE,
border_color = NA,
annotation_legend = FALSE,
annotation_row = annot_row,
annotation_colors = annot_colors,
fontsize_row = 8,
fontsize_col = 12,
show_rownames = FALSE,
labels_col = names
)
pdf(file = paste0(figurePath,"/heatmap_", direction, "_", label, ".pdf"))
print(fig)
dev.off()
print(fig)
cat('\n')
}
}
##Groups: complete
###Direction: down
Genes with EVE shift down: 858
###Direction: up
Genes with EVE shift up: 306
##Groups: partial
###Direction: down
Genes with EVE shift down: 332
###Direction: up
Genes with EVE shift up: 107
# Save a table of number of shifted tissues
nShiftedTblComplete <-
EVEresTbl %>%
# Find complete (i.e. not partial) duplicates with one down or up
filter( dup_type=="duplicate", dupSigDir %in% c("up","down"),N3partial ==F) %>%
mutate( key=ifelse(sigDir=="","Ssal_cons","Ssal_shifted")) %>%
select( N3, Ssal, dupSigDir, key ) %>%
spread(key = "key",value = Ssal) %>%
# count the number of tissues where the shifted is greater/lower than conserved dup
mutate(nShifted = rowSums(c(up=1,down=-1)[dupSigDir]*(TPM_table[Ssal_shifted,] - TPM_table[Ssal_cons,]) > 0))
saveRDS(nShiftedTblComplete,"data/TissueAtlasAnalysis/nShiftedTblComplete.RDS")