load('input-data/TFBS_TOBIAS/EVE_duplicates_TFBS.RData')
load('input-data/TFBS_TOBIAS/downdown_promotersimilarity_total.RData')
load('input-data/TFBS_TOBIAS/downdown_promotersimilarity_bound.RData')


gene.pathway <- getGeneKEGGLinks(species.KEGG = "els")
EVEresTbl <- read_rds("data/runEVE/EVEresTbl.RDS")
keggResDuplicate <- read_tsv("data/KEGGanalysis/keggResDuplicate.tsv",col_types = cols())
dups <- 
  EVEresTbl %>%
  filter(dup_type == "duplicate") %>%
  arrange(pval) %>%
  transmute(N3, Eluc,
            dupSigDir = ifelse(dupSigDir == "", "none", dupSigDir),
            Ssal = paste(Ssal, N3partial, sigDir, pval, sep = ":")) %>%
  group_by(N3) %>%
  mutate(dup = paste0("Ssal", row_number())) %>%
  spread(dup, Ssal) %>%
  separate(Ssal1, into = c("Ssal1.ID", "Ssal1.partial", "Ssal1.sigDir", "Ssal1.pval"), sep = ":") %>%
  separate(Ssal2, into = c("Ssal2.ID", "Ssal2.partial", "Ssal2.sigDir", "Ssal2.pval"), sep = ":") %>%
  mutate(Ssal1.pval = as.double(Ssal1.pval), Ssal2.pval = as.double(Ssal2.pval)) %>%
  ungroup() %>%
  filter(dupSigDir != "none") %>%
  arrange(dupSigDir) %>%
  left_join(gene.pathway,
             by = c("Eluc" = "GeneID")) %>%
  left_join(keggResDuplicate %>%
              group_by(Pathway) %>%
              gather(dupSigDir, Nshift, both:upup) %>%
              group_by(Pathway, dupSigDir) %>%
              gather(dupSigDir2, PathwayPval, P.both:P.upup) %>%
              filter(dupSigDir == sub("P.", "", dupSigDir2)) %>%
              dplyr::select(PathwayID = KEGG_ID, Pathway, N, dupSigDir, Nshift, PathwayPval),
            by = c("PathwayID", "dupSigDir"))

The input-data

The input table for these analyses is EVE_duplicates_TFBS.RData.

  • This table holds information about the number of predicted Transcription Factor Binding Sites in promoters (-3000 / +200 bp form TSS) for 5829 salmonid ohnolog pairs.
  • We used the tool TOBIAS with the Jaspar database Position Weight Matrices to perform simple TFBS matches in open chromatin (referred to as ‘total TFBS’) as well as footprinting analyses to predict TFBS ‘bound’ by TFs.
  • The table was generated with the script input-data/TFBS_TOBIAS/pre-processing_TOBIAS_results_for_eve-cre.R.

TFBS numbers per ohnolog gene

When we then intersect the duplicate table with the TOBIAS result we find that using ALL tfbs 94% of promoters of duplicated genes have at least one predicted tfbs in open chromatin, while only 83% of ohnolog gene promoters is predicted to have at least one ‘bound’ tfbs.



par(mfrow=c(1,2))
hist(dups.long$tfbs.total, 30, main='TFBS matches in open chromatin in promoters', xlab = 'Total TFBS matches per promoter')
hist(dups.long$tfbs.bound, 30, main='Predicted TFBS bound by TFs in promoters', xlab = 'Bound TFBS per promoter')

par(mfrow=c(1,1))



Number TFBS stratified on EVE-result categories

Plots of the tfbs numbers for the different categories of regulated dup pairs:

plot_countboxplot = function(dupstable, tfbs.set = c('total', 'bound'), main.text=''){
      
      par(mfrow=c(1,1))
      cols = c('darkred', 'darkblue', 'darkblue', 'darkgrey', 'darkblue', 'darkblue',
               'darkgrey', 'darkgrey', 'darkred', 'darkgrey', 'darkred', 'darkred')
      
      cols <- alpha(cols, 0.5)
    
      if(tfbs.set == 'total'){
      boxplot(tfbs.total ~ subclass + class, data = dupstable, col=cols,
              names=rep("", 12),
            at = c(1:2, 4:5, 7:8, 10:11, 13:14, 16:17), 
            xlab = 'Classes', ylab='TFBS counts per promoter',
            main=main.text)
      
      mtext(side = 1, text = c('both', 'down', 'down+down', 'cons+cons', 'up', 'up+up'),
            line = 0.5,
            at = seq(1.5, 16.5, by=3))
      }
      
      if(tfbs.set == 'bound'){
      boxplot(tfbs.bound ~ subclass + class, data = dupstable, col=cols,
              names=rep("", 12),
            at = c(1:2, 4:5, 7:8, 10:11, 13:14, 16:17), 
            xlab = 'Classes', ylab='TFBS counts per promoter',
            main=main.text, outline=F)
      
      mtext(side = 1, text = c('both', 'down', 'down+down', 'cons+cons', 'up', 'up+up'),
            line = 0.5,
            at = seq(1.5, 16.5, by=3))
      }
}

filt.zero=function(dupstable, type=c('total', 'bound')){
  if(type=='total') { dat <- dupstable %>% filter(tfbs.total>0) }
  if(type=='bound') { dat <- dupstable %>% filter(tfbs.bound>0) }
  pairs.left <- names(table(dat$pair_id))[table(dat$pair_id)==2]
  dat %>% filter(pair_id %in% pairs.left)
}
plot_countboxplot(filt.zero(dups.long, 'bound'), tfbs.set = 'bound', main.text = 'BOUND tfbs per EVE category')

# dev.copy(pdf, 'TFBS_counts_bound_regulatory_categories.pdf', height = 5, width = 10, onefile=T)
# dev.off()
plot_countboxplot(dupstable=filt.zero(dups.long, 'total'), tfbs.set = 'total', main.text = 'Total tfbs per EVE category')

# dev.copy(pdf, 'TFBS_counts_total_regulatory_categories.pdf', height = 5, width = 10, onefile=T)
# dev.off()
test.alternative = c('two.sided', 'less', 'less', 'two.sided', 'greater', 'greater')
names(test.alternative) <- c('both', 'down', 'downdown', 'none', 'up', 'upup')


make_wilcox = function(dupslong, tfbs.set = c('total', 'bound')){
  bind_rows(lapply(unique(dupslong$class), function(i){
    dat <- dplyr::filter(dupslong, class %in% i)
    alt <- as.character(test.alternative[match(i, names(test.alternative))])
    
    if(tfbs.set == 'total'){
    test <- dat %>% wilcox_test(tfbs.total ~ subclass, paired = T,
                        alternative = alt)
    test$.y. <- i
    test$alternative <- alt
    effect <- dat %>% wilcox_effsize(tfbs.total ~ subclass, paired = T,
                        alternative = alt)
    test$effsize <- effect$effsize
    test$magnitude <- effect$magnitude
    return(test)
    
    }
    
    if(tfbs.set == 'bound'){
    test <- dat %>% wilcox_test(tfbs.bound ~ subclass, paired = T,
                        alternative = alt)
    test$.y. <- i
    test$alternative <- alt
    effect <- dat %>% wilcox_effsize(tfbs.bound ~ subclass, paired = T,
                        alternative = alt)
    test$effsize <- effect$effsize
    test$magnitude <- effect$magnitude
    return(test)
    }
    
    }
  ))
}



dups.wilcox.total <- make_wilcox(dupslong = filt.zero(dups.long, 'total'), tfbs.set = 'total')
dups.wilcox.bound <- make_wilcox(dupslong = filt.zero(dups.long, 'bound'), tfbs.set = 'bound')



We then tested using paired Wilcox tests if there was an association between regulatory evolution and number of TFBS in promoters.

kable(dups.wilcox.bound, caption='Wilcox paired test on BOUND tfbs data')
Wilcox paired test on BOUND tfbs data
.y. group1 group2 n1 n2 statistic p alternative effsize magnitude
both 1 2 49 49 964.5 4.71e-04 two.sided 0.5002192 large
down 1 2 884 884 140679.5 0.00e+00 less 0.2368196 small
downdown 1 2 308 308 18631.5 8.64e-04 less 0.1781333 small
none 1 2 2577 2577 1679327.0 4.65e-01 two.sided 0.0143342 small
up 1 2 355 355 38609.0 6.15e-05 greater 0.2036789 small
upup 1 2 62 62 967.0 5.28e-01 greater 0.0084591 small
kable(dups.wilcox.total, caption='Wilcox paired test on TOTAL tfbs data')
Wilcox paired test on TOTAL tfbs data
.y. group1 group2 n1 n2 statistic p alternative effsize magnitude
both 1 2 55 55 1033.5 2.76e-02 two.sided 0.2976965 small
down 1 2 1085 1085 250696.0 1.98e-05 less 0.1247846 small
downdown 1 2 400 400 37309.0 1.49e-01 less 0.0517671 small
none 1 2 3148 3148 2561751.0 5.86e-02 two.sided 0.0337657 small
up 1 2 396 396 44837.0 5.80e-03 greater 0.1269003 small
upup 1 2 66 66 997.5 6.89e-01 greater 0.0617265 small




Jaccard and correlation of similar motifs across down+cons and down+down

If symmetric regulatory evolution of ohnologs in the down+down category is mostly driven by trans-regulatory mutations the TFBS reportoire should be relatively constant across duplicate promoters if we assume strong selection on promoter TFBS content. To test this we parsed the predicted bound tfbs in liver and compared across duplicate pairs using jaccard index and correlations of the counts of each TFBS in promoters.

bound_down_cor <- filter(promotersimilarity_bound, shift == 'down') %>% select(correlation)
bound_downdown_cor <- filter(promotersimilarity_bound, shift == 'downdown') %>% select(correlation)

total_down_cor <- filter(promotersimilarity_total, shift == 'down') %>% select(correlation)
total_downdown_cor <- filter(promotersimilarity_total, shift == 'downdown') %>% select(correlation)


par(mfrow=c(1,2))
boxplot(bound_down_cor$correlation,
        bound_downdown_cor$correlation,
        main='BOUND correlation',
        names=c('down-cons', 'down-down'))

wt.bound = wilcox.test(x=bound_down_cor$correlation,
                       y=bound_downdown_cor$correlation, alternative = 'less')

segments(x0 = 1.1, y0=0.45, y1=0.46)
segments(x0 = 1.9, y0=0.45, y1=0.46)
segments(x0 = 1.1, x1=1.9, y0=0.46)
text(x = 1.5, y = 0.48, labels = paste0('p-value=', round(wt.bound$p.value, 3)), cex = 0.8)

ribo.pairs <- dups %>% filter(Pathway == 'Ribosome', dupSigDir=='downdown') %>% mutate(pair_id = paste(Ssal1.ID, Ssal2.ID)) %>% select(pair_id)
oxy.pairs <- dups %>% filter(Pathway == 'Oxidative phosphorylation', dupSigDir=='down') %>% mutate(pair_id = paste(Ssal1.ID, Ssal2.ID)) %>% select(pair_id)

ribo.pairs.bound_corr <- promotersimilarity_bound %>% filter(pair_id %in% ribo.pairs$pair_id) %>% select(correlation)
oxy.pairs.bound_corr <- promotersimilarity_bound %>% filter(pair_id %in% oxy.pairs$pair_id) %>% select(correlation)

ribo.pairs.total_corr <- promotersimilarity_total %>% filter(pair_id %in% ribo.pairs$pair_id) %>% select(correlation)
oxy.pairs.total_corr  <- promotersimilarity_total %>% filter(pair_id %in% oxy.pairs$pair_id) %>% select(correlation)
  

points(y=ribo.pairs.bound_corr$correlation, x=jitter(rep(2, nrow(ribo.pairs.bound_corr)), 5), pch=21, bg=alpha('darkblue', 0.8))
points(y=oxy.pairs.bound_corr$correlation, x=jitter(rep(1, nrow(oxy.pairs.bound_corr)), 5), pch=21, bg=alpha('darkblue', 0.3))

legend('topleft', fill=c(alpha('darkblue', 0.8), alpha('darkblue', 0.3)), legend=c('Ribosome', 'Oxidative phoshorylation'), bty='n', cex = 0.8)

boxplot(total_down_cor$correlation,
        total_downdown_cor$correlation,
        main='TOTAL correlation',
        names=c('down-cons', 'down-down'))

wt.total = wilcox.test(x=total_down_cor$correlation,
                       y=total_downdown_cor$correlation, alternative = 'less')

segments(x0 = 1.1, y0=0.45, y1=0.46)
segments(x0 = 1.9, y0=0.45, y1=0.46)
segments(x0 = 1.1, x1=1.9, y0=0.46)
text(x = 1.5, y = 0.48, labels = paste0('p-value=', round(wt.total$p.value, 3)), cex = 0.8)

points(y=ribo.pairs.total_corr$correlation, x=jitter(rep(2, nrow(ribo.pairs.total_corr)), 5), pch=21, bg=alpha('darkblue', 0.8))
points(y=oxy.pairs.total_corr$correlation , x=jitter(rep(1, nrow(oxy.pairs.total_corr)), 5), pch=21, bg=alpha('darkblue', 0.3))
legend('topleft', fill=c(alpha('darkblue', 0.8), alpha('darkblue', 0.3)), legend=c('Ribosome', 'Oxidative phoshorylation'), bty='n', cex = 0.8)

wt.oxy.ribo <- wilcox.test(oxy.pairs.bound_corr$correlation,
                           ribo.pairs.bound_corr$correlation,
                           'less')

# dev.copy(device = pdf, 'Figures/SuppFig16 - Similarity of bTFBSs in promoters.pdf', width=12, height=6)
# dev.off()


Comparing conservation of bound and total tfbs in promoters of down-cons VS down-down we find a tendency of slightly higher conservation of the same tfbs-elements in symetrically evolving dups (down-down) compared to asymmetrical dups (down-cons) (bound tfbs P=0.234). Looking at all tfbs in ribosome vs oxidative phosphorylation pathway the same trend is not apparent - the tfbs contens isnt more correlated in ribosome pairs compared to oxy-phospho pairs (P=0.67.