1 Load and filter data

Load the tissue atlas data. 15 tissues. Remove some lowly expressed genes.

# Import expression data
TPM_table <- read.table("input-data/TissueAtlas/Ssal_tissue.TPM.txt", header = T)
rownames(TPM_table) <- TPM_table$gene_id
TPM_table <- TPM_table[,-(1:9)]
colnames(TPM_table)[13:15] <- c("ovary","testis","skin")

# Normalize between samples
normFactor <- edgeR::calcNormFactors(object = TPM_table, method="TMM")
TPM_table_norm <- sapply(1:ncol(TPM_table),function(i){TPM_table[,i]/normFactor[i]})
dimnames(TPM_table_norm) <- dimnames(TPM_table)
TPM_table <- as.data.frame(TPM_table_norm)
remove(TPM_table_norm)

TPM_table <- log2(TPM_table+1)
 
# Switch ids ...
ids <- get.id("*") %>% 
  filter(gene_biotype=="protein_coding") %>%
  separate(gene_id, into=c("gff_id","ncbi_id"))

idx <- match(rownames(TPM_table), ids$gff_id)
TPM_table <- TPM_table[!is.na(idx),]
idx <- idx[!is.na(idx)]

#... and average genes with multiple entries
TPM_table$GeneID <- ids$ncbi_id[idx]
TPM_table <- TPM_table %>%
  gather(Samples, Expr, -GeneID) %>%
  group_by(GeneID, Samples) %>%
  summarise(MeanExpr = mean(Expr)) %>%
  spread(Samples, MeanExpr)
TPM_table <- as.data.frame(TPM_table)
rownames(TPM_table) <- TPM_table$GeneID
TPM_table <- TPM_table[,-1]

paste0("Genes x Samples: ", paste0(dim(TPM_table), collapse = " x "))
## [1] "Genes x Samples: 48589 x 15"
plot(density(as.matrix(TPM_table)), xlab = "Expression values", ylab = "", main = "", col = "red", lwd=5)

2 EVE results

Analysis of duplicates where one copy is shifted up/down and the other show no shift.

In plots, genes with a shift up are red and genes with a shift down are blue. Genes without a shift are grey.

Bar plots: Distribution of duplicates over the number of tissues where the shifted gene is up/down compared to the gene without a shift. 0 means shift in the wrong direction (i.e. not even liver is shifted) and 15 means that all tissues show a shift

Heatmap:

Note that before the heatmas is drawn, duplicates with shifts in the wrong direction are removed.

  • The first (left) tissue panel is the shifted duplicate, the second (right) is the one with no shift. Colors are row scaled with red signifying the gene’s highest expression and blue its lowest.

  • The color bar called Shifted shows the number of tissues where the shifted copy has higher/lower expression than the copy with no shift. Colors go from white, through yellow to red, where dark red means that the shifted copy has higher/lower expression in all tissues.

EVEresTbl <- readRDS("data/runEVE/EVEresTbl.RDS", refhook = NULL)

for (label in c("complete", "partial")) {
   if (label == "partial") {
    partial <- TRUE
  } else {
    partial <- FALSE
  }

  cat('\n##Groups: ', label, '\n')

  for (direction in c("down","up")) {
    if (direction == "down") {
      shift_col <- rgb(59,121,255, maxColorValue = 255)
    } else {
      shift_col <- rgb(255,0,50, maxColorValue = 255)
    }
    
    cat('\n###Direction: ', direction, '\n')
    
    EVE.dup <- EVEresTbl %>% filter(dup_type == "duplicate",
                            dupSigDir == direction,
                            N3partial == partial
                            )
    EVE.shift <- EVE.dup %>% filter(sigDir == direction)
    EVE.cons  <- EVE.dup %>% filter(sigDir == "")
    
    # Duplicates with shift
    eve_shift <- EVE.shift$Ssal
    
    # Duplicates with no shift
    eve_cons <- EVE.cons$Ssal
  
    idx <- is.na(match(eve_shift, rownames(TPM_table))) | is.na( match(eve_cons, rownames(TPM_table)))
    eve_shift <- eve_shift[!idx]
    eve_cons <- eve_cons[!idx]
    
    cat('\n', paste0("Genes with EVE shift ", direction, ": ", length(eve_shift)), '\n\n')
    
    # Expression of shifted versus conserved genes 
    idx <- match(eve_shift, rownames(TPM_table))
    expr.shift <- TPM_table[idx,]
    expr.shift[is.na(expr.shift)] <- 0
    colnames(expr.shift) <- paste0(colnames(expr.shift), ".s")
    idx <- match(eve_cons, rownames(TPM_table))
    expr.cons <- TPM_table[idx,]
    expr.cons[is.na(expr.cons)] <- 0
    colnames(expr.cons) <- paste0(colnames(expr.cons), ".c")
    
    pairs <- paste0(rownames(expr.shift), "-", rownames(expr.cons))
    expr.eve <- cbind(expr.shift, expr.cons)
    rownames(expr.eve) <- pairs
    
    # BOXPLOT: Liver expression in shift versus no shift
    p <- wilcox.test(expr.shift$liver.s, expr.cons$liver.c, paired = TRUE)
    d1 <- data.frame(Evolution = factor(c(rep(direction, nrow(expr.shift)), rep("no shift", nrow(expr.cons))), 
                                        levels = c(direction, "no shift")),
                    Expression = c(expr.shift$liver.s, expr.cons$liver.c))
    
    fig <- ggplot(d1, aes(x = Evolution, y = Expression, fill = Evolution)) + geom_boxplot() + guides(fill=FALSE) +
      ggtitle(label = "", subtitle = paste0("P-value = ", format(p$p.value, digits = 2))) + xlab("") + 
      theme_bw(base_size = 30) + scale_fill_manual(values = c(shift_col, rgb(150,150,150, maxColorValue = 255)))
    print(fig)
    pdf(file = paste0(figurePath,"/boxplot_",direction, "_", label, ".pdf")); print (fig); dev.off()
    
    # BAR PLOTS
    idx <- c()
    if (direction == "down") {
      no.shifts <- rowSums(expr.shift < expr.cons)
      idx <- rowSums(expr.shift < expr.cons) == ncol(expr.shift)
    } else {
      no.shifts <- rowSums(expr.shift > expr.cons)
      idx <- rowSums(expr.shift > expr.cons) == ncol(expr.shift)
    }
    
    d2 <- data.frame(Shifts = no.shifts)
    fig <- ggplot(d2, aes(x = Shifts)) + geom_bar(color = shift_col, fill = shift_col) + guides(fill=FALSE) +
      ggtitle(label = "", subtitle = "") + 
      xlab("Tissues with shift in expression") + 
      ylab("Duplicates") +
      theme_bw(base_size = 30) + 
      scale_fill_manual(values = c(shift_col)) +
      geom_text(stat='count', aes(label=..count..), vjust=-0.1, size=6)
    print(fig)
    
    pdf(file = paste0(figurePath,"/barplot_",direction, "_", label, ".pdf")); print (fig); dev.off()
    
    # Filter out duplicates with expression change in the "wrong" direction
    if (direction == "up") {
      idx <- expr.shift$liver.s < expr.cons$liver.c
    } else { # down
      idx <- expr.shift$liver.s > expr.cons$liver.c
    }
    eve_shift <- eve_shift[!idx]
    eve_cons <- eve_cons[!idx]
    expr.eve <- expr.eve[!idx,]
    expr.shift <- expr.shift[!idx,]
    expr.cons <- expr.cons[!idx,]
    no.shifts <- no.shifts[!idx]
    
    # HEATMAP
    col.sep <- ncol(TPM_table)
    
    dist.obs <- as.dist(1-cor(t(expr.eve)))
    dist.obs.tre <- hclust(dist.obs, method = "ward.D")
    
    names <- colnames(expr.eve)
    names <- gsub("\\.c", "", names)
    names <- gsub("\\.s", "", names)
        
    annot_row <- data.frame(Shifted = no.shifts,
                            check.names = FALSE)
    rownames(annot_row) <- rownames(expr.eve)
    cluster_color_map <- colorRampPalette(c("white","yellow","red"))(col.sep+1)
    cluster_color_map[col.sep+1] <- "red4"
    names(cluster_color_map) <- 0:col.sep
    annot_colors = list(
      Shifted = cluster_color_map
    )
    
    fig <- pheatmap(mat = as.matrix(expr.eve),
         color = colorRampPalette(c("blue","white","red"))(10),
         cluster_rows = dist.obs.tre,
         cluster_cols = FALSE,
         scale = "row",
         gaps_col = c(col.sep),
         legend = FALSE,
         border_color = NA,
         annotation_legend = FALSE,
         annotation_row = annot_row,
         annotation_colors = annot_colors,
         fontsize_row = 8,
         fontsize_col = 12,
         show_rownames = FALSE,
         labels_col = names
    )
   
    pdf(file = paste0(figurePath,"/heatmap_", direction, "_", label, ".pdf"))
    print(fig)
    dev.off()
    
    print(fig)
    cat('\n')
  }
}

##Groups: complete

###Direction: down

Genes with EVE shift down: 858

###Direction: up

Genes with EVE shift up: 306

##Groups: partial

###Direction: down

Genes with EVE shift down: 332

###Direction: up

Genes with EVE shift up: 107

# Save a table of number of shifted tissues

nShiftedTblComplete <- 
  EVEresTbl %>% 
  # Find complete (i.e. not partial) duplicates with one down or up
  filter( dup_type=="duplicate", dupSigDir %in% c("up","down"),N3partial ==F) %>% 
  mutate( key=ifelse(sigDir=="","Ssal_cons","Ssal_shifted")) %>% 
  select( N3, Ssal, dupSigDir, key ) %>% 
  spread(key = "key",value = Ssal) %>% 
  # count the number of tissues where the shifted is greater/lower than conserved dup
  mutate(nShifted = rowSums(c(up=1,down=-1)[dupSigDir]*(TPM_table[Ssal_shifted,] - TPM_table[Ssal_cons,]) > 0)) 

saveRDS(nShiftedTblComplete,"data/TissueAtlasAnalysis/nShiftedTblComplete.RDS")