knitr::opts_chunk$set(echo = TRUE)
options(stringsAsFactors = FALSE)
assign("last.warning", NULL, envir = baseenv())

library(tidyverse)
library(Ssa.RefSeq.db)
library(gplots)
library(pheatmap)
library(edgeR)
library(gridExtra)
library(DT)

label <- "upcons"
docompute <- FALSE

dir.create("data/upcons_TFanalysis",showWarnings = F)

1 Select targets

Duplicates in EVE analysis with one copy up, and where that copy is specific to liver: tau > 0.6 and expression in liver within 90% of the highest expression in any tissue.

# Select target genes
EVE <- readRDS("data/runEVE/EVEresTbl.RDS", refhook = NULL)
targets <- EVE %>%
  filter (dup_type == "duplicate", dupSigDir == "up", N3partial == FALSE)

load("input-data/TissueAtlas/tau_all_genes.RData")
up_targets <- targets[targets$sigDir == "up",]
idx <- match(up_targets$Ssal, tau$Genes)
up_targets <- up_targets[tau$Tau[idx] > 0.60 & grepl("liver", tau$Tissue[idx]),]

targets <- targets[targets$N3 %in% up_targets$N3,]
targets.up <- targets[targets$sigDir == "up",]$Ssal
targets.cons <- targets[targets$sigDir != "up",]$Ssal
targets <- targets$Ssal

cat (length(targets), "liver specific targets with up-cons-shift\n")
## 60 liver specific targets with up-cons-shift
# Load gene information
ids <- get.id("*") %>% 
  filter(gene_biotype=="protein_coding") %>%
  separate(gene_id, into=c("gff_id","ncbi_id"), sep = ":", remove = FALSE)

info <- get.genes(ids$gene_id[match(targets,ids$ncbi_id)], match = TRUE)
info$gene <- gsub("LOC","",info$gene)
targets_tab <- data.frame(Target = targets, Symbol = info$gene, Descr = info$product)
targets_tab$Dir <- EVE$sigDir[match(targets_tab$Target, EVE$Ssal)]
targets_tab$Dir[targets_tab$Dir == ""] <- "Cons"
targets_tab$Dir[targets_tab$Dir == "up"] <- "Up"
targets_tab$Pvalue <- EVE$pval[match(targets_tab$Target, EVE$Ssal)]
targets_tab$Pvalue <- format(targets_tab$Pvalue, digits = 3, scientific = TRUE)
targets_tab$Tau <- tau$Tau[match(targets_tab$Target, tau$Genes)]
targets_tab$Tau <- format(targets_tab$Tau, digits = 2)

datatable(targets_tab, caption = "Target genes",
          rownames = FALSE, filter = "top", 
          options = list(pageLength = 10,
                         columnDefs = list(list(className = 'dt-center', targets = 0:(ncol(targets_tab)-1)))))
# Print table for supplement
idx.up <- match(targets.up, targets_tab$Target)
idx.cons <- match(targets.cons, targets_tab$Target)
targets_tab_up <- targets_tab[idx.up,1:6]
colnames(targets_tab_up) <- paste(colnames(targets_tab_up), "up")
targets_tab_cons <- targets_tab[idx.cons,1:6]
colnames(targets_tab_cons) <- paste(colnames(targets_tab_cons), "cons")
tab <- cbind(targets_tab_up, targets_tab_cons)
write.table(tab, file = paste0("data/upcons_TFanalysis/",label, "_tab.txt"), quote = FALSE, row.names = FALSE, sep = "\t")

2 Select TFs

BLAST the protein sequence of the uniprot ids in Jaspar to the salmon proteome, then take the four (two WGDs) best hit (hit: Evalue < 1E-10, alignment length > 100) and finally select liver-specific TFs (same criterium as for target genes).

# Get JASPAR unitprot ids
JASPAR <- read.csv("input-data/JASPAR_TFs/JASPAR.txt", sep="")
JASPAR <- JASPAR[JASPAR$COLLECTION == "CORE",]
JASPAR$NAME <- gsub("::", "", JASPAR$NAME)
JASPAR$NAME <- gsub("\\(", "", JASPAR$NAME)
JASPAR$NAME <- gsub("\\)", "", JASPAR$NAME)
JASPAR$Motif <- paste(JASPAR$BASE_ID, JASPAR$VERSION, sep = ".")
JASPAR$Motif <- paste(JASPAR$NAME, JASPAR$Motif, sep = "_")

# # Export uniprot ids for BLAST at https://blast.ncbi.nlm.nih.gov/Blast.cgi
# write.table(data.frame(JASPAR$ACC), "blast.uniprot.txt",
#             row.names = FALSE, col.names = FALSE, quote = FALSE)

# Unitprot to salmon TFs
tfblast <- read.table("input-data/JASPAR_TFs/BLAST.csv", header = TRUE, sep = ",")
tfblast$query <- unlist(lapply(strsplit(tfblast$query,"[.]"), function(x){x[1]}))

idx <- match(tfblast$subject, ids$protein_id)
tfblast$subject <- ids$ncbi_id[idx]
tfblast <- tfblast[!is.na(tfblast$subject),]

tfblast <- tfblast %>%
  filter(evalue < 1E-10, alignment.length > 100) %>%
  group_by(query, subject) %>%
  top_n(1, bit.score) %>%
  group_by(query) %>%
  top_n(4, bit.score)

tfs <- unique(tfblast$subject)
cat (length(tfs), "TFs with motif in JASPER\n")
## 1697 TFs with motif in JASPER
# Filter for liver specific expression
idx <- match(tfs, tau$Genes)
tfs <- tfs[!is.na(idx)]
idx <- idx[!is.na(idx)]
tfs <- tfs[tau$Tau[idx] > 0.60 & grepl("liver", tau$Tissue[idx])]

cat (length(tfs), "liver specific TFs with motif in JASPER\n")
## 56 liver specific TFs with motif in JASPER
tfblast <- tfblast[tfblast$subject %in% tfs,]
# Query/Uniprot id can have several motifs
tfblast$Motif <- c(NA)
for(i in 1:nrow(tfblast)) {
  tfblast$Motif[i] <- paste(JASPAR$Motif[grep(tfblast$query[i], JASPAR$ACC)], collapse = ",")
}
tfblast <- separate_rows(tfblast, Motif, sep = ",", convert = TRUE)

info <- get.genes(ids$gene_id[match(tfblast$subject,ids$ncbi_id)], match = TRUE)
info$gene <- gsub("LOC","",info$gene)
tfs_tab <- data.frame(TF = tfblast$subject, Symbol = info$gene, Descr = info$product, Motif = tfblast$Motif)
tfs_tab$Dir <- EVE$sigDir[match(tfs_tab$TF, EVE$Ssal)]
tfs_tab$Dir[is.na(tfs_tab$Dir)] <- "unknown"
tfs_tab$Pvalue <- EVE$pval[match(tfs_tab$TF, EVE$Ssal)]
tfs_tab$Pvalue[is.na(tfs_tab$Pvalue)] <- 1.0
tfs_tab$Pvalue <- format(tfs_tab$Pvalue, digits=3, scientific=TRUE)
tfs_tab$Tau <- tau$Tau[match(tfs_tab$TF, tau$Genes)]
tfs_tab$Tau <- format(tfs_tab$Tau, digits = 2)
tfs_tab <- tfs_tab %>% separate(Motif, into = c("TFname","Motif"), sep = "_")

datatable(tfs_tab, caption = "Liver specific TFs",
          rownames = FALSE, filter = "top", 
          options = list(pageLength = 10,
                         columnDefs = list(list(className = 'dt-center', targets = 0:(ncol(tfs_tab)-1)))))

3 JASPER motifs

Find bound motifs in the targets with liver specific TFs. Plot tissue expression.

# Load bound motif matches (in liver)
if (docompute) {
  load("bedtools_closest_TFBS_TSS.Rdata")
  colnames(motif_gene) <- c('TFBS_chrom', 'TFBS_start', 'TFBS_end', 'TFBS_id', 'TFBS_strand', 'signal_liver', 'signal_brain', 'bound_liver', 'bound_brain', 'logfc_liver_brain', 'chrom_gene', 'TSS_start', 'TSS_end', 'gene_id', 'strand', 'distance_tss')
  
  motif_gene <- motif_gene[motif_gene$gene_id %in% EVE$Ssal[EVE$dup_type == "duplicate"], ]

  motif_gene$distance_tss_sign <- motif_gene$distance_tss
  idx1 <- which(motif_gene$strand == '+' & motif_gene$TFBS_start < motif_gene$TSS_start)
  idx2 <- which(motif_gene$strand == '-' & motif_gene$TFBS_start > motif_gene$TSS_start)
  motif_gene$distance_tss_sign[c(idx1, idx2)] <- (-1)*motif_gene$distance_tss[c(idx1, idx2)]
  motif_gene <- motif_gene[motif_gene$distance_tss_sign >= -3000 
                                   & motif_gene$distance_tss_sign <= 200,]
  
  motif_gene <- motif_gene %>% filter(bound_liver == 1)
  
  if (FALSE) {
    # Write motif locations to file
    motif_gene <- motif_gene[motif_gene$TFBS_id %in% paste(tfs_tab$TFname,tfs_tab$Motif, sep = "_"),]
    up <- motif_gene[motif_gene$gene_id %in% targets.up,]
    cons <- motif_gene[motif_gene$gene_id %in% targets.cons,]
    write.table(up, "up_motifs_genome_position.txt", row.names = FALSE, quote = FALSE, sep = "\t")
    write.table(cons, "cons_motifs_genome_position.txt", row.names = FALSE, quote = FALSE, sep = "\t")
  }
  
  motif_gene <- motif_gene %>%
    group_by(TFBS_id, gene_id) %>%
    mutate(Matches = n())
  
  motif_gene <- motif_gene %>%
    group_by(TFBS_id,gene_id) %>%
    slice(1) %>%
    select(TFBS_id, Matches, gene_id)
  colnames(motif_gene) <- c("Motif", "Matches", "Gene")
  
  save(motif_gene, file = "motif_gene.RData")
}
load("input-data/TFBS_TOBIAS/motif_gene.RData")

motif_gene <- motif_gene[motif_gene$Gene %in% targets,] %>%
  separate(Motif, into = c("TFname","Motif"), sep = "_")

idx <- match(motif_gene$Gene, targets_tab$Target)
motif_gene$GeneSymbol <- targets_tab$Symbol[idx]
motif_gene$GeneDescr <- targets_tab$Descr[idx]
# Remove motifs that do not have a liver specific TF
motif_gene <- motif_gene[motif_gene$Motif %in% tfs_tab$Motif,]

# Remove TFs that do not bind motifs in targets
tfs_tab <- tfs_tab[tfs_tab$Motif %in% motif_gene$Motif,]

cat(length(unique(tfs_tab$TF)), "TFs bind",
    length(unique(motif_gene$Motif)), "motifs in", 
    length(unique(motif_gene$Gene)), "target promoters\n")
## 56 TFs bind 70 motifs in 54 target promoters
datatable(motif_gene, caption = "Motifs with liver specific TFs in target genes",
          rownames = FALSE, filter = "top", 
          options = list(pageLength = 10,
                         columnDefs = list(list(className = 'dt-center', targets = 0:(ncol(motif_gene)-1)))))
# Number of different liver specific motifs in targets 
motifs.count <- data.frame()
for (i in 1:length(targets.up)) {
  count.up <- nrow(motif_gene[motif_gene$Gene == targets.up[i],])
  count.cons <- nrow(motif_gene[motif_gene$Gene == targets.cons[i],])
  if (count.up+count.cons > 0) {
    motifs.count <- rbind(motifs.count, data.frame(MotifCount = c(count.up, count.cons),
                                                   Type = c("Up", "Cons")))
  }
}

ggplot(motifs.count, aes(x = Type, y = MotifCount, fill = Type)) + geom_boxplot() +
    theme_bw(base_size = 15) + ylab("Liver-specific TFs") + xlab("") +
  scale_fill_manual(values=c(rgb(150,150,150, maxColorValue = 255), rgb(255,0,50, maxColorValue = 255)))

pdf(file = "Figures/Figure3B - upcons_motif_boxplot.pdf")
ggplot(motifs.count, aes(x = Type, y = MotifCount, fill = Type)) + geom_boxplot() +
    theme_bw(base_size = 30) + ylab("Liver-specific TFs") + xlab("") +
  scale_fill_manual(values=c(rgb(150,150,150, maxColorValue = 255), rgb(255,0,50, maxColorValue = 255)))
invisible(dev.off())

up <- motifs.count$MotifCount[motifs.count$Type == "Up"]
cons <- motifs.count$MotifCount[motifs.count$Type == "Cons"]
wilcox.test(up, cons, paired = TRUE, alternative = "greater", exact = FALSE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  up and cons
## V = 347, p-value = 7.655e-05
## alternative hypothesis: true location shift is greater than 0
# Remove rare motifs
motifs <- names(table(motif_gene$Motif)[table(motif_gene$Motif) >= 20])
motif_gene <- motif_gene[motif_gene$Motif %in% motifs,]
tfs_tab <- tfs_tab[tfs_tab$Motif %in% motif_gene$Motif,]

cat("Motifs in at least 20 targets:", length(unique(tfs_tab$TF)), "TFs bind",
    length(unique(motif_gene$Motif)), "motifs in", 
    length(unique(motif_gene$Gene)), "target promoters\n")
## Motifs in at least 20 targets: 22 TFs bind 17 motifs in 52 target promoters
# Tissue specificity
tfs_tab_unique <- tfs_tab %>%
  group_by(TF) %>%
  summarise(Symbol = unique(Symbol), Descr = unique(Descr), 
            TFname = paste(TFname, collapse = " "), Motif = paste(Motif, collapse = " "), 
            Dir = unique(Dir), Tau = unique(Tau))

t <- data.frame(Tau = c(tfs_tab_unique$Tau, targets_tab$Tau), Type = c(rep("TFs", nrow(tfs_tab_unique)), targets_tab$Dir))
t$Tau <- as.numeric(t$Tau)
t$Type <- factor(t$Type, levels = c("Cons", "Up", "TFs"))
ggplot(t, aes(x = Type, y = Tau, fill = Type)) + geom_boxplot() +
  theme_bw(base_size = 15) + xlab("") +
  scale_fill_manual(values=c(rgb(150,150,150, maxColorValue = 255), rgb(255,0,50, maxColorValue = 255), "darkorchid"))

#Tissue expression

load("input-data/TissueAtlas/TPM_table_log2_tissues.RData")
expr <- TPM_table[match(c(tfs_tab_unique$TF, targets_tab$Target), rownames(TPM_table)),]


dist.obs <- as.dist(1-cor(t(expr)))
dist.obs.tree <- hclust(dist.obs, method = "ward.D")
dist.sam <- as.dist(1-cor(expr))
dist.sam.tree <- hclust(dist.sam, method = "ward.D")
  
annot_row <- data.frame(Type = factor(c(rep("TFs", nrow(tfs_tab_unique)), targets_tab$Dir), levels = c("Cons", "Up", "TFs")), check.names = FALSE)
rownames(annot_row) <- rownames(expr)
  
annot_color_map <- c(rgb(150,150,150, maxColorValue = 255), rgb(255,0,50, maxColorValue = 255), "darkorchid")
names(annot_color_map) <- levels(annot_row$Type)
annot_colors = list(Type = annot_color_map)
  
gene_names <- c(tfs_tab_unique$TFname, rep("", nrow(targets_tab)))

fig <- pheatmap(mat = as.matrix(expr),
     color = colorRampPalette(c("blue","white","red"))(10),
     cluster_rows = dist.obs.tree,
     cluster_cols = dist.sam.tree,
     scale = "row",
     legend = FALSE,
     cellwidth = 12,
     cellheight = 7,
     border_color = NA,
     annotation_legend = FALSE,
     annotation_row = annot_row,
     annotation_colors = annot_colors,
     fontsize_row = 7,
     fontsize_col = 12,
     show_rownames = TRUE,
     labels_row = gene_names,
)

print(fig)

pdf(file = "Figures/Figure3A - upcons_heatmap.pdf", height=11); print(fig); invisible(dev.off())

4 The network

To avoid to much clutter in the network, we:

  1. For each motif, select the TF with the strongest EVE- then tau-result.

  2. Draw a heatmap of motif-similarity, and remove motifs iterativly that have a target-overlap of above 80%. Keep the motif with the TF with the strongest EVE result.

  3. Merge TFs that bind several motifs into one node.

motif_tf <- tfs_tab %>%
  group_by(TFname) %>%
  arrange(as.numeric(Pvalue), desc(as.numeric(Tau))) %>%
  slice(1)
motif_tf$OrthoGroup <- EVE$N3[match(motif_tf$TF, EVE$Ssal)]

motif_gene$OrthoGroup <- EVE$N3[match(motif_gene$Gene, EVE$Ssal)]

########################
sim.thr <- 0.8
for (k in 1:2) {
  TFnames <- motif_tf$TFname
  n <- length(TFnames)
  
  sim <- matrix(NA, nrow = n, ncol = n)
  for (i in 1:n) {
    genesi <- motif_gene$Gene[motif_gene$TFname == TFnames[i]]
    for (j in 1:n) {
      genesj <- motif_gene$Gene[motif_gene$TFname == TFnames[j]]
      sim[i,j] <- length(intersect(genesi, genesj))/length(union(genesi, genesj))
    }
  }
  
  tree <- hclust(as.dist(1-sim), method = "ward.D")
  pheatmap(mat = sim,
         color = colorRampPalette(c("blue","white","red"))(10),
         cluster_rows = tree,
         cluster_cols = tree,
         legend = TRUE,
         cellwidth = 8,
         cellheight = 8,
         border_color = NA,
         annotation_legend = FALSE,
         fontsize_row = 8,
         fontsize_col = 8,
         labels_row = TFnames,
         labels_col = TFnames
    )
  
  # No filtering, just plot the heatmap
  if (sim.thr > 1) {break}
  
  # Delete redundant motifs
  if (k == 1) {
    diag(sim) <- 0
    
    cat("  \n") 
    while (TRUE) {
    
      idx <- data.frame(which(sim == max(sim), arr.ind = TRUE)) %>%
        filter(row > col)
  
      tf1 <- TFnames[idx[1,1]]
      p1 <- motif_tf$Pvalue[idx[1,1]]
      tf2 <- TFnames[idx[1,2]]
      p2 <- motif_tf$Pvalue[idx[1,2]]
      idx_del <- idx[1,1]
      tf_del <- tf1
      tf_keep <- tf2
      if (p1 < p2) {
        idx_del <- idx[1,2]
        tf_del <- tf2
        tf_keep <- tf1
      }
      
      if (sim[idx[1,1], idx[1,2]] < sim.thr) {
        break
      }
      
      cat("Deleting", tf_del, "and keeping", tf_keep, ":", sim[idx[1,1], idx[1,2]]); cat("  \n") 
      
      sim <- sim[-idx_del, -idx_del]
      TFnames <- TFnames[-idx_del]
      motif_gene <- motif_gene[motif_gene$TFname != tf_del,]
      motif_tf <- motif_tf[motif_tf$TFname != tf_del,]
    }
  }
}


Deleting HNF4G and keeping HNF4A : 1
Deleting FOXA3 and keeping FOXA2 : 0.9090909

########################

edges <- data.frame()
for (i in 1:nrow(motif_gene)) {
  idx <- which(motif_tf$Motif == motif_gene$Motif[i])
  for (j in idx) {
    edges <- rbind(edges, data.frame(Regulator = motif_tf$TFname[j],
                                     Regulator2 = motif_tf$TF[j],
                                     Target = motif_gene$Gene[i], 
                                     Target2 = motif_gene$Gene[i], 
                                     Interaction = "TF-gene", 
                                     Matches = motif_gene$Matches[i]))
  }
}

edges <- edges %>%
  group_by(Regulator2, Target, Target2, Interaction) %>%
  summarise(Regulator = paste0(Regulator, collapse = "_"), Matches = sum(Matches)) %>%
  select(Regulator, Regulator2, Target, Target2, Interaction, Matches) %>%
  group_by(Regulator2) %>%
  mutate(Regulator = unique(Regulator[nchar(Regulator) == max(nchar(Regulator))])) %>%
  data.frame()

motif_tf <- motif_tf %>%
  group_by(TF) %>%
  summarise(Symbol = unique(Symbol), Descr = unique(Descr),
            TFname = paste0(TFname, collapse = "_"),
            Motif = paste0(Motif, collapse = "_"),
            Dir = unique(Dir), 
            Pvalue = unique(Pvalue),
            Tau = unique(Tau),
            OrthoGroup = unique(OrthoGroup)
            )

TFs <- motif_tf$TFname
TFs2 <- motif_tf$TF
for (i in 1:(length(TFs)-1)) {
  for (j in (i+1):length(TFs)) {
    if (!is.na(motif_tf$OrthoGroup[i]) & !is.na(motif_tf$OrthoGroup[j]) &
        motif_tf$OrthoGroup[i] == motif_tf$OrthoGroup[j]) {
      edges <- rbind(edges, data.frame(Regulator = TFs[i], Regulator2 = TFs2[i], 
                                       Target = TFs[j], Target2 = TFs2[j], Interaction = "TF-TF", Matches = 1))
    }
  }
}

orthogroups <- unique(EVE[match(unique(motif_gene$Gene),EVE$Ssal),]$N3)
targets_ohnologs <- EVE[EVE$N3 %in% orthogroups,]$Ssal
targets_orthogroups <- EVE[EVE$N3 %in% orthogroups,]$N3
for (g in orthogroups) {
  idx <- which(g == targets_orthogroups)
  edges <- rbind(edges, data.frame(Regulator = targets_ohnologs[idx[1]], Regulator2 = targets_ohnologs[idx[1]], 
                                   Target = targets_ohnologs[idx[2]], Target2 = targets_ohnologs[idx[2]], 
                                   Interaction = "gene-gene", Matches = 1))
}

idx <-match(edges$Regulator2, EVE$Ssal)
pval <- EVE$pval[idx]
pval[is.na(idx)] <- 1
dir <- EVE$shift.direction[idx]
dir[is.na(idx)] <- "unknown"
dir[dir == ""] <- "cons"
edges$Pvalue <- log2(pval)
edges$Pvalue[dir == "up"] <- abs(edges$Pvalue[dir == "up"])
  
write.table(edges[order(edges$Interaction),], paste0("data/upcons_TFanalysis/",label, "_edges.txt"), row.names = FALSE, quote = FALSE, sep = "\t")

tmp <- edges %>%
  separate(Interaction, into=c("T1","T2"),sep = "-")
nodes <- data.frame(Nodes = c(tmp$Regulator,tmp$Target), Genes = c(tmp$Regulator2, tmp$Target2), 
                    Type = c(tmp$T1,tmp$T2))

nodes <- nodes %>%
  group_by(Nodes, Genes, Type) %>%
  slice(1)

idx <-match(nodes$Genes, EVE$Ssal)
pval <- EVE$pval[idx]
pval[is.na(idx)] <- 1
nodes$Pvalue <- log2(pval)
dir <- EVE$shift.direction[idx]
dir[is.na(idx)] <- "unknown"
dir[dir == ""] <- "cons"
nodes$Dir <- dir
dir <- EVE$sigDir[idx]
dir[is.na(idx)] <- "unknown"
dir[dir == ""] <- "cons"
nodes$SigDir <- dir
nodes$Pvalue[nodes$Dir == "up"] <- abs(nodes$Pvalue[nodes$Dir == "up"])

info <- get.genes(ids$gene_id[match(nodes$Genes,ids$ncbi_id)], match = TRUE)
nodes$Symbol <- info$gene
nodes$Symbol <- gsub("LOC","",nodes$Symbol)
nodes$Symbol[nodes$Type == "TF"] <- nodes$Nodes[nodes$Type == "TF"]

write.table(nodes[order(nodes$Type),], paste0("data/upcons_TFanalysis/",label, "_nodes.txt"), row.names = FALSE, quote = FALSE, sep = "\t")

cat("Network with", nrow(edges), "edges between", nrow(nodes), "nodes\n")

Network with 341 edges between 71 nodes

cat("  Targets:", sum(nodes$Type == "gene"))

Targets: 58

for (t in unique(nodes$SigDir)) {
  cat("    ", t, ":", sum(nodes$Type == "gene" & nodes$SigDir == t))
  cat("  \n")
}
 cons : 29  
 up : 29  
 unknown : 0  
cat("  TFs:", sum(nodes$Type == "TF"))

TFs: 13

for (t in unique(nodes$SigDir)) {
  cat("    ", t, ":", sum(nodes$Type == "TF" & nodes$SigDir == t))
  cat("  \n")
}
 cons : 7  
 up : 1  
 unknown : 5  
idx <- match(nodes$Genes[nodes$Type == "gene"], nodes$Genes[nodes$Type == "TF"])
idx <- idx[!is.na(idx)]
if (length(idx) > 0) {
  cat("TFs that are targets: ", nodes$Genes[nodes$Type == "TF"][idx], " - ", nodes$Symbol[nodes$Type == "TF"][idx])
}

5 Tissue expression revisited for genes in network

Look at the tissue expression of TFs and targets in the network. Compute the RegBias score for each TF, which is the fraction of this TF’s targets that are up.

tfs <- unique(motif_tf$TF)
targets <- unique(motif_gene$Gene)
rem <- intersect(tfs, targets)
if (length(rem) > 0) {
  idx <- match(rem, targets)
  targets <- targets[-idx]
}

motif_tf$RegBias <- c(0)
for (i in 1:length(motif_tf$TF)) {
  t <- edges[edges$Interaction == "TF-gene" & edges$Regulator2 == motif_tf$TF[i],]$Target
  s <- nodes[match(t, nodes$Genes),]$SigDir

  motif_tf$RegBias[i] <- length(s[s == "up"])/length(s)
}

ggplot(motif_tf, aes(RegBias)) + geom_histogram(bins = 10) + theme_bw(base_size = 15)

motif_tf$RegBias <- format(motif_tf$RegBias, digits = 2)

datatable(motif_tf[order(desc(motif_tf$RegBias)),], rownames = FALSE, filter = "top", caption = "TFs in network sorted by RegBias",
          options = list(pageLength = 10, 
                         columnDefs = list(list(className = 'dt-center', targets = 0:(ncol(motif_tf)-1)))))
# Draw a smaller network
subtfs <- motif_tf$TF[motif_tf$RegBias >= 0.65]
subedges <- edges[edges$Regulator2 %in% subtfs | edges$Target2 %in% subtfs | edges$Interaction == "gene-gene",]
subnodes <- nodes[(nodes$Genes %in% subtfs & nodes$Type == "TF") | (nodes$Type == "gene"),]
write.table(subedges, paste0("data/upcons_TFanalysis/",label, "_subedges.txt"), row.names = FALSE, quote = FALSE, sep = "\t")
write.table(subnodes, paste0("data/upcons_TFanalysis/",label, "_subnodes.txt"), row.names = FALSE, quote = FALSE, sep = "\t")

Circles are genes, diamonds are transcription factors. Dotted lines between ohnologs. Directed edge represents putative regulatory interaction. Node color indicate shift direction and magnitude (blue is down, red is up). Size of nodes indicate number of neighbors: For genes, number of regulators. For TFs, number of targets.