knitr::opts_chunk$set(echo = TRUE)
options(stringsAsFactors = FALSE)
assign("last.warning", NULL, envir = baseenv())
library(tidyverse)
library(Ssa.RefSeq.db)
library(gplots)
library(pheatmap)
library(edgeR)
library(gridExtra)
library(DT)
label <- "upcons"
docompute <- FALSE
dir.create("data/upcons_TFanalysis",showWarnings = F)
Duplicates in EVE analysis with one copy up, and where that copy is specific to liver: tau > 0.6 and expression in liver within 90% of the highest expression in any tissue.
# Select target genes
EVE <- readRDS("data/runEVE/EVEresTbl.RDS", refhook = NULL)
targets <- EVE %>%
filter (dup_type == "duplicate", dupSigDir == "up", N3partial == FALSE)
load("input-data/TissueAtlas/tau_all_genes.RData")
up_targets <- targets[targets$sigDir == "up",]
idx <- match(up_targets$Ssal, tau$Genes)
up_targets <- up_targets[tau$Tau[idx] > 0.60 & grepl("liver", tau$Tissue[idx]),]
targets <- targets[targets$N3 %in% up_targets$N3,]
targets.up <- targets[targets$sigDir == "up",]$Ssal
targets.cons <- targets[targets$sigDir != "up",]$Ssal
targets <- targets$Ssal
cat (length(targets), "liver specific targets with up-cons-shift\n")
## 60 liver specific targets with up-cons-shift
# Load gene information
ids <- get.id("*") %>%
filter(gene_biotype=="protein_coding") %>%
separate(gene_id, into=c("gff_id","ncbi_id"), sep = ":", remove = FALSE)
info <- get.genes(ids$gene_id[match(targets,ids$ncbi_id)], match = TRUE)
info$gene <- gsub("LOC","",info$gene)
targets_tab <- data.frame(Target = targets, Symbol = info$gene, Descr = info$product)
targets_tab$Dir <- EVE$sigDir[match(targets_tab$Target, EVE$Ssal)]
targets_tab$Dir[targets_tab$Dir == ""] <- "Cons"
targets_tab$Dir[targets_tab$Dir == "up"] <- "Up"
targets_tab$Pvalue <- EVE$pval[match(targets_tab$Target, EVE$Ssal)]
targets_tab$Pvalue <- format(targets_tab$Pvalue, digits = 3, scientific = TRUE)
targets_tab$Tau <- tau$Tau[match(targets_tab$Target, tau$Genes)]
targets_tab$Tau <- format(targets_tab$Tau, digits = 2)
datatable(targets_tab, caption = "Target genes",
rownames = FALSE, filter = "top",
options = list(pageLength = 10,
columnDefs = list(list(className = 'dt-center', targets = 0:(ncol(targets_tab)-1)))))
# Print table for supplement
idx.up <- match(targets.up, targets_tab$Target)
idx.cons <- match(targets.cons, targets_tab$Target)
targets_tab_up <- targets_tab[idx.up,1:6]
colnames(targets_tab_up) <- paste(colnames(targets_tab_up), "up")
targets_tab_cons <- targets_tab[idx.cons,1:6]
colnames(targets_tab_cons) <- paste(colnames(targets_tab_cons), "cons")
tab <- cbind(targets_tab_up, targets_tab_cons)
write.table(tab, file = paste0("data/upcons_TFanalysis/",label, "_tab.txt"), quote = FALSE, row.names = FALSE, sep = "\t")
BLAST the protein sequence of the uniprot ids in Jaspar to the salmon proteome, then take the four (two WGDs) best hit (hit: Evalue < 1E-10, alignment length > 100) and finally select liver-specific TFs (same criterium as for target genes).
# Get JASPAR unitprot ids
JASPAR <- read.csv("input-data/JASPAR_TFs/JASPAR.txt", sep="")
JASPAR <- JASPAR[JASPAR$COLLECTION == "CORE",]
JASPAR$NAME <- gsub("::", "", JASPAR$NAME)
JASPAR$NAME <- gsub("\\(", "", JASPAR$NAME)
JASPAR$NAME <- gsub("\\)", "", JASPAR$NAME)
JASPAR$Motif <- paste(JASPAR$BASE_ID, JASPAR$VERSION, sep = ".")
JASPAR$Motif <- paste(JASPAR$NAME, JASPAR$Motif, sep = "_")
# # Export uniprot ids for BLAST at https://blast.ncbi.nlm.nih.gov/Blast.cgi
# write.table(data.frame(JASPAR$ACC), "blast.uniprot.txt",
# row.names = FALSE, col.names = FALSE, quote = FALSE)
# Unitprot to salmon TFs
tfblast <- read.table("input-data/JASPAR_TFs/BLAST.csv", header = TRUE, sep = ",")
tfblast$query <- unlist(lapply(strsplit(tfblast$query,"[.]"), function(x){x[1]}))
idx <- match(tfblast$subject, ids$protein_id)
tfblast$subject <- ids$ncbi_id[idx]
tfblast <- tfblast[!is.na(tfblast$subject),]
tfblast <- tfblast %>%
filter(evalue < 1E-10, alignment.length > 100) %>%
group_by(query, subject) %>%
top_n(1, bit.score) %>%
group_by(query) %>%
top_n(4, bit.score)
tfs <- unique(tfblast$subject)
cat (length(tfs), "TFs with motif in JASPER\n")
## 1697 TFs with motif in JASPER
# Filter for liver specific expression
idx <- match(tfs, tau$Genes)
tfs <- tfs[!is.na(idx)]
idx <- idx[!is.na(idx)]
tfs <- tfs[tau$Tau[idx] > 0.60 & grepl("liver", tau$Tissue[idx])]
cat (length(tfs), "liver specific TFs with motif in JASPER\n")
## 56 liver specific TFs with motif in JASPER
tfblast <- tfblast[tfblast$subject %in% tfs,]
# Query/Uniprot id can have several motifs
tfblast$Motif <- c(NA)
for(i in 1:nrow(tfblast)) {
tfblast$Motif[i] <- paste(JASPAR$Motif[grep(tfblast$query[i], JASPAR$ACC)], collapse = ",")
}
tfblast <- separate_rows(tfblast, Motif, sep = ",", convert = TRUE)
info <- get.genes(ids$gene_id[match(tfblast$subject,ids$ncbi_id)], match = TRUE)
info$gene <- gsub("LOC","",info$gene)
tfs_tab <- data.frame(TF = tfblast$subject, Symbol = info$gene, Descr = info$product, Motif = tfblast$Motif)
tfs_tab$Dir <- EVE$sigDir[match(tfs_tab$TF, EVE$Ssal)]
tfs_tab$Dir[is.na(tfs_tab$Dir)] <- "unknown"
tfs_tab$Pvalue <- EVE$pval[match(tfs_tab$TF, EVE$Ssal)]
tfs_tab$Pvalue[is.na(tfs_tab$Pvalue)] <- 1.0
tfs_tab$Pvalue <- format(tfs_tab$Pvalue, digits=3, scientific=TRUE)
tfs_tab$Tau <- tau$Tau[match(tfs_tab$TF, tau$Genes)]
tfs_tab$Tau <- format(tfs_tab$Tau, digits = 2)
tfs_tab <- tfs_tab %>% separate(Motif, into = c("TFname","Motif"), sep = "_")
datatable(tfs_tab, caption = "Liver specific TFs",
rownames = FALSE, filter = "top",
options = list(pageLength = 10,
columnDefs = list(list(className = 'dt-center', targets = 0:(ncol(tfs_tab)-1)))))
Find bound motifs in the targets with liver specific TFs. Plot tissue expression.
# Load bound motif matches (in liver)
if (docompute) {
load("bedtools_closest_TFBS_TSS.Rdata")
colnames(motif_gene) <- c('TFBS_chrom', 'TFBS_start', 'TFBS_end', 'TFBS_id', 'TFBS_strand', 'signal_liver', 'signal_brain', 'bound_liver', 'bound_brain', 'logfc_liver_brain', 'chrom_gene', 'TSS_start', 'TSS_end', 'gene_id', 'strand', 'distance_tss')
motif_gene <- motif_gene[motif_gene$gene_id %in% EVE$Ssal[EVE$dup_type == "duplicate"], ]
motif_gene$distance_tss_sign <- motif_gene$distance_tss
idx1 <- which(motif_gene$strand == '+' & motif_gene$TFBS_start < motif_gene$TSS_start)
idx2 <- which(motif_gene$strand == '-' & motif_gene$TFBS_start > motif_gene$TSS_start)
motif_gene$distance_tss_sign[c(idx1, idx2)] <- (-1)*motif_gene$distance_tss[c(idx1, idx2)]
motif_gene <- motif_gene[motif_gene$distance_tss_sign >= -3000
& motif_gene$distance_tss_sign <= 200,]
motif_gene <- motif_gene %>% filter(bound_liver == 1)
if (FALSE) {
# Write motif locations to file
motif_gene <- motif_gene[motif_gene$TFBS_id %in% paste(tfs_tab$TFname,tfs_tab$Motif, sep = "_"),]
up <- motif_gene[motif_gene$gene_id %in% targets.up,]
cons <- motif_gene[motif_gene$gene_id %in% targets.cons,]
write.table(up, "up_motifs_genome_position.txt", row.names = FALSE, quote = FALSE, sep = "\t")
write.table(cons, "cons_motifs_genome_position.txt", row.names = FALSE, quote = FALSE, sep = "\t")
}
motif_gene <- motif_gene %>%
group_by(TFBS_id, gene_id) %>%
mutate(Matches = n())
motif_gene <- motif_gene %>%
group_by(TFBS_id,gene_id) %>%
slice(1) %>%
select(TFBS_id, Matches, gene_id)
colnames(motif_gene) <- c("Motif", "Matches", "Gene")
save(motif_gene, file = "motif_gene.RData")
}
load("input-data/TFBS_TOBIAS/motif_gene.RData")
motif_gene <- motif_gene[motif_gene$Gene %in% targets,] %>%
separate(Motif, into = c("TFname","Motif"), sep = "_")
idx <- match(motif_gene$Gene, targets_tab$Target)
motif_gene$GeneSymbol <- targets_tab$Symbol[idx]
motif_gene$GeneDescr <- targets_tab$Descr[idx]
# Remove motifs that do not have a liver specific TF
motif_gene <- motif_gene[motif_gene$Motif %in% tfs_tab$Motif,]
# Remove TFs that do not bind motifs in targets
tfs_tab <- tfs_tab[tfs_tab$Motif %in% motif_gene$Motif,]
cat(length(unique(tfs_tab$TF)), "TFs bind",
length(unique(motif_gene$Motif)), "motifs in",
length(unique(motif_gene$Gene)), "target promoters\n")
## 56 TFs bind 70 motifs in 54 target promoters
datatable(motif_gene, caption = "Motifs with liver specific TFs in target genes",
rownames = FALSE, filter = "top",
options = list(pageLength = 10,
columnDefs = list(list(className = 'dt-center', targets = 0:(ncol(motif_gene)-1)))))
# Number of different liver specific motifs in targets
motifs.count <- data.frame()
for (i in 1:length(targets.up)) {
count.up <- nrow(motif_gene[motif_gene$Gene == targets.up[i],])
count.cons <- nrow(motif_gene[motif_gene$Gene == targets.cons[i],])
if (count.up+count.cons > 0) {
motifs.count <- rbind(motifs.count, data.frame(MotifCount = c(count.up, count.cons),
Type = c("Up", "Cons")))
}
}
ggplot(motifs.count, aes(x = Type, y = MotifCount, fill = Type)) + geom_boxplot() +
theme_bw(base_size = 15) + ylab("Liver-specific TFs") + xlab("") +
scale_fill_manual(values=c(rgb(150,150,150, maxColorValue = 255), rgb(255,0,50, maxColorValue = 255)))
pdf(file = "Figures/Figure3B - upcons_motif_boxplot.pdf")
ggplot(motifs.count, aes(x = Type, y = MotifCount, fill = Type)) + geom_boxplot() +
theme_bw(base_size = 30) + ylab("Liver-specific TFs") + xlab("") +
scale_fill_manual(values=c(rgb(150,150,150, maxColorValue = 255), rgb(255,0,50, maxColorValue = 255)))
invisible(dev.off())
up <- motifs.count$MotifCount[motifs.count$Type == "Up"]
cons <- motifs.count$MotifCount[motifs.count$Type == "Cons"]
wilcox.test(up, cons, paired = TRUE, alternative = "greater", exact = FALSE)
##
## Wilcoxon signed rank test with continuity correction
##
## data: up and cons
## V = 347, p-value = 7.655e-05
## alternative hypothesis: true location shift is greater than 0
# Remove rare motifs
motifs <- names(table(motif_gene$Motif)[table(motif_gene$Motif) >= 20])
motif_gene <- motif_gene[motif_gene$Motif %in% motifs,]
tfs_tab <- tfs_tab[tfs_tab$Motif %in% motif_gene$Motif,]
cat("Motifs in at least 20 targets:", length(unique(tfs_tab$TF)), "TFs bind",
length(unique(motif_gene$Motif)), "motifs in",
length(unique(motif_gene$Gene)), "target promoters\n")
## Motifs in at least 20 targets: 22 TFs bind 17 motifs in 52 target promoters
# Tissue specificity
tfs_tab_unique <- tfs_tab %>%
group_by(TF) %>%
summarise(Symbol = unique(Symbol), Descr = unique(Descr),
TFname = paste(TFname, collapse = " "), Motif = paste(Motif, collapse = " "),
Dir = unique(Dir), Tau = unique(Tau))
t <- data.frame(Tau = c(tfs_tab_unique$Tau, targets_tab$Tau), Type = c(rep("TFs", nrow(tfs_tab_unique)), targets_tab$Dir))
t$Tau <- as.numeric(t$Tau)
t$Type <- factor(t$Type, levels = c("Cons", "Up", "TFs"))
ggplot(t, aes(x = Type, y = Tau, fill = Type)) + geom_boxplot() +
theme_bw(base_size = 15) + xlab("") +
scale_fill_manual(values=c(rgb(150,150,150, maxColorValue = 255), rgb(255,0,50, maxColorValue = 255), "darkorchid"))
#Tissue expression
load("input-data/TissueAtlas/TPM_table_log2_tissues.RData")
expr <- TPM_table[match(c(tfs_tab_unique$TF, targets_tab$Target), rownames(TPM_table)),]
dist.obs <- as.dist(1-cor(t(expr)))
dist.obs.tree <- hclust(dist.obs, method = "ward.D")
dist.sam <- as.dist(1-cor(expr))
dist.sam.tree <- hclust(dist.sam, method = "ward.D")
annot_row <- data.frame(Type = factor(c(rep("TFs", nrow(tfs_tab_unique)), targets_tab$Dir), levels = c("Cons", "Up", "TFs")), check.names = FALSE)
rownames(annot_row) <- rownames(expr)
annot_color_map <- c(rgb(150,150,150, maxColorValue = 255), rgb(255,0,50, maxColorValue = 255), "darkorchid")
names(annot_color_map) <- levels(annot_row$Type)
annot_colors = list(Type = annot_color_map)
gene_names <- c(tfs_tab_unique$TFname, rep("", nrow(targets_tab)))
fig <- pheatmap(mat = as.matrix(expr),
color = colorRampPalette(c("blue","white","red"))(10),
cluster_rows = dist.obs.tree,
cluster_cols = dist.sam.tree,
scale = "row",
legend = FALSE,
cellwidth = 12,
cellheight = 7,
border_color = NA,
annotation_legend = FALSE,
annotation_row = annot_row,
annotation_colors = annot_colors,
fontsize_row = 7,
fontsize_col = 12,
show_rownames = TRUE,
labels_row = gene_names,
)
print(fig)
pdf(file = "Figures/Figure3A - upcons_heatmap.pdf", height=11); print(fig); invisible(dev.off())
To avoid to much clutter in the network, we:
For each motif, select the TF with the strongest EVE- then tau-result.
Draw a heatmap of motif-similarity, and remove motifs iterativly that have a target-overlap of above 80%. Keep the motif with the TF with the strongest EVE result.
Merge TFs that bind several motifs into one node.
motif_tf <- tfs_tab %>%
group_by(TFname) %>%
arrange(as.numeric(Pvalue), desc(as.numeric(Tau))) %>%
slice(1)
motif_tf$OrthoGroup <- EVE$N3[match(motif_tf$TF, EVE$Ssal)]
motif_gene$OrthoGroup <- EVE$N3[match(motif_gene$Gene, EVE$Ssal)]
########################
sim.thr <- 0.8
for (k in 1:2) {
TFnames <- motif_tf$TFname
n <- length(TFnames)
sim <- matrix(NA, nrow = n, ncol = n)
for (i in 1:n) {
genesi <- motif_gene$Gene[motif_gene$TFname == TFnames[i]]
for (j in 1:n) {
genesj <- motif_gene$Gene[motif_gene$TFname == TFnames[j]]
sim[i,j] <- length(intersect(genesi, genesj))/length(union(genesi, genesj))
}
}
tree <- hclust(as.dist(1-sim), method = "ward.D")
pheatmap(mat = sim,
color = colorRampPalette(c("blue","white","red"))(10),
cluster_rows = tree,
cluster_cols = tree,
legend = TRUE,
cellwidth = 8,
cellheight = 8,
border_color = NA,
annotation_legend = FALSE,
fontsize_row = 8,
fontsize_col = 8,
labels_row = TFnames,
labels_col = TFnames
)
# No filtering, just plot the heatmap
if (sim.thr > 1) {break}
# Delete redundant motifs
if (k == 1) {
diag(sim) <- 0
cat(" \n")
while (TRUE) {
idx <- data.frame(which(sim == max(sim), arr.ind = TRUE)) %>%
filter(row > col)
tf1 <- TFnames[idx[1,1]]
p1 <- motif_tf$Pvalue[idx[1,1]]
tf2 <- TFnames[idx[1,2]]
p2 <- motif_tf$Pvalue[idx[1,2]]
idx_del <- idx[1,1]
tf_del <- tf1
tf_keep <- tf2
if (p1 < p2) {
idx_del <- idx[1,2]
tf_del <- tf2
tf_keep <- tf1
}
if (sim[idx[1,1], idx[1,2]] < sim.thr) {
break
}
cat("Deleting", tf_del, "and keeping", tf_keep, ":", sim[idx[1,1], idx[1,2]]); cat(" \n")
sim <- sim[-idx_del, -idx_del]
TFnames <- TFnames[-idx_del]
motif_gene <- motif_gene[motif_gene$TFname != tf_del,]
motif_tf <- motif_tf[motif_tf$TFname != tf_del,]
}
}
}
Deleting HNF4G and keeping HNF4A : 1
Deleting FOXA3 and keeping FOXA2 : 0.9090909
########################
edges <- data.frame()
for (i in 1:nrow(motif_gene)) {
idx <- which(motif_tf$Motif == motif_gene$Motif[i])
for (j in idx) {
edges <- rbind(edges, data.frame(Regulator = motif_tf$TFname[j],
Regulator2 = motif_tf$TF[j],
Target = motif_gene$Gene[i],
Target2 = motif_gene$Gene[i],
Interaction = "TF-gene",
Matches = motif_gene$Matches[i]))
}
}
edges <- edges %>%
group_by(Regulator2, Target, Target2, Interaction) %>%
summarise(Regulator = paste0(Regulator, collapse = "_"), Matches = sum(Matches)) %>%
select(Regulator, Regulator2, Target, Target2, Interaction, Matches) %>%
group_by(Regulator2) %>%
mutate(Regulator = unique(Regulator[nchar(Regulator) == max(nchar(Regulator))])) %>%
data.frame()
motif_tf <- motif_tf %>%
group_by(TF) %>%
summarise(Symbol = unique(Symbol), Descr = unique(Descr),
TFname = paste0(TFname, collapse = "_"),
Motif = paste0(Motif, collapse = "_"),
Dir = unique(Dir),
Pvalue = unique(Pvalue),
Tau = unique(Tau),
OrthoGroup = unique(OrthoGroup)
)
TFs <- motif_tf$TFname
TFs2 <- motif_tf$TF
for (i in 1:(length(TFs)-1)) {
for (j in (i+1):length(TFs)) {
if (!is.na(motif_tf$OrthoGroup[i]) & !is.na(motif_tf$OrthoGroup[j]) &
motif_tf$OrthoGroup[i] == motif_tf$OrthoGroup[j]) {
edges <- rbind(edges, data.frame(Regulator = TFs[i], Regulator2 = TFs2[i],
Target = TFs[j], Target2 = TFs2[j], Interaction = "TF-TF", Matches = 1))
}
}
}
orthogroups <- unique(EVE[match(unique(motif_gene$Gene),EVE$Ssal),]$N3)
targets_ohnologs <- EVE[EVE$N3 %in% orthogroups,]$Ssal
targets_orthogroups <- EVE[EVE$N3 %in% orthogroups,]$N3
for (g in orthogroups) {
idx <- which(g == targets_orthogroups)
edges <- rbind(edges, data.frame(Regulator = targets_ohnologs[idx[1]], Regulator2 = targets_ohnologs[idx[1]],
Target = targets_ohnologs[idx[2]], Target2 = targets_ohnologs[idx[2]],
Interaction = "gene-gene", Matches = 1))
}
idx <-match(edges$Regulator2, EVE$Ssal)
pval <- EVE$pval[idx]
pval[is.na(idx)] <- 1
dir <- EVE$shift.direction[idx]
dir[is.na(idx)] <- "unknown"
dir[dir == ""] <- "cons"
edges$Pvalue <- log2(pval)
edges$Pvalue[dir == "up"] <- abs(edges$Pvalue[dir == "up"])
write.table(edges[order(edges$Interaction),], paste0("data/upcons_TFanalysis/",label, "_edges.txt"), row.names = FALSE, quote = FALSE, sep = "\t")
tmp <- edges %>%
separate(Interaction, into=c("T1","T2"),sep = "-")
nodes <- data.frame(Nodes = c(tmp$Regulator,tmp$Target), Genes = c(tmp$Regulator2, tmp$Target2),
Type = c(tmp$T1,tmp$T2))
nodes <- nodes %>%
group_by(Nodes, Genes, Type) %>%
slice(1)
idx <-match(nodes$Genes, EVE$Ssal)
pval <- EVE$pval[idx]
pval[is.na(idx)] <- 1
nodes$Pvalue <- log2(pval)
dir <- EVE$shift.direction[idx]
dir[is.na(idx)] <- "unknown"
dir[dir == ""] <- "cons"
nodes$Dir <- dir
dir <- EVE$sigDir[idx]
dir[is.na(idx)] <- "unknown"
dir[dir == ""] <- "cons"
nodes$SigDir <- dir
nodes$Pvalue[nodes$Dir == "up"] <- abs(nodes$Pvalue[nodes$Dir == "up"])
info <- get.genes(ids$gene_id[match(nodes$Genes,ids$ncbi_id)], match = TRUE)
nodes$Symbol <- info$gene
nodes$Symbol <- gsub("LOC","",nodes$Symbol)
nodes$Symbol[nodes$Type == "TF"] <- nodes$Nodes[nodes$Type == "TF"]
write.table(nodes[order(nodes$Type),], paste0("data/upcons_TFanalysis/",label, "_nodes.txt"), row.names = FALSE, quote = FALSE, sep = "\t")
cat("Network with", nrow(edges), "edges between", nrow(nodes), "nodes\n")
Network with 341 edges between 71 nodes
cat(" Targets:", sum(nodes$Type == "gene"))
Targets: 58
for (t in unique(nodes$SigDir)) {
cat(" ", t, ":", sum(nodes$Type == "gene" & nodes$SigDir == t))
cat(" \n")
}
cons : 29
up : 29
unknown : 0
cat(" TFs:", sum(nodes$Type == "TF"))
TFs: 13
for (t in unique(nodes$SigDir)) {
cat(" ", t, ":", sum(nodes$Type == "TF" & nodes$SigDir == t))
cat(" \n")
}
cons : 7
up : 1
unknown : 5
idx <- match(nodes$Genes[nodes$Type == "gene"], nodes$Genes[nodes$Type == "TF"])
idx <- idx[!is.na(idx)]
if (length(idx) > 0) {
cat("TFs that are targets: ", nodes$Genes[nodes$Type == "TF"][idx], " - ", nodes$Symbol[nodes$Type == "TF"][idx])
}
Look at the tissue expression of TFs and targets in the network. Compute the RegBias score for each TF, which is the fraction of this TF’s targets that are up.
tfs <- unique(motif_tf$TF)
targets <- unique(motif_gene$Gene)
rem <- intersect(tfs, targets)
if (length(rem) > 0) {
idx <- match(rem, targets)
targets <- targets[-idx]
}
motif_tf$RegBias <- c(0)
for (i in 1:length(motif_tf$TF)) {
t <- edges[edges$Interaction == "TF-gene" & edges$Regulator2 == motif_tf$TF[i],]$Target
s <- nodes[match(t, nodes$Genes),]$SigDir
motif_tf$RegBias[i] <- length(s[s == "up"])/length(s)
}
ggplot(motif_tf, aes(RegBias)) + geom_histogram(bins = 10) + theme_bw(base_size = 15)
motif_tf$RegBias <- format(motif_tf$RegBias, digits = 2)
datatable(motif_tf[order(desc(motif_tf$RegBias)),], rownames = FALSE, filter = "top", caption = "TFs in network sorted by RegBias",
options = list(pageLength = 10,
columnDefs = list(list(className = 'dt-center', targets = 0:(ncol(motif_tf)-1)))))
# Draw a smaller network
subtfs <- motif_tf$TF[motif_tf$RegBias >= 0.65]
subedges <- edges[edges$Regulator2 %in% subtfs | edges$Target2 %in% subtfs | edges$Interaction == "gene-gene",]
subnodes <- nodes[(nodes$Genes %in% subtfs & nodes$Type == "TF") | (nodes$Type == "gene"),]
write.table(subedges, paste0("data/upcons_TFanalysis/",label, "_subedges.txt"), row.names = FALSE, quote = FALSE, sep = "\t")
write.table(subnodes, paste0("data/upcons_TFanalysis/",label, "_subnodes.txt"), row.names = FALSE, quote = FALSE, sep = "\t")
Circles are genes, diamonds are transcription factors. Dotted lines between ohnologs. Directed edge represents putative regulatory interaction. Node color indicate shift direction and magnitude (blue is down, red is up). Size of nodes indicate number of neighbors: For genes, number of regulators. For TFs, number of targets.