OGtbl <- read_tsv("input-data/from_ortho_pipeline/OGtbl.tsv",col_types = cols())
# load the expression data
combExprMat <- readRDS("data/BSNormalize/combExprMat.RDS")
# raw expression data
exprsMat <- readRDS("input-data/RNAseq/exprsMat.RDS")
# load EVE results
EVEresTbl <- readRDS("data/runEVE/EVEresTbl.RDS")
Orthogroups (OGs) are based on the gene trees from the ohnolog pipeline. The trees are split into teleost clades (internal node “N3” in the species tree) and filtered based on the phylogeny and gene copy-numbers:
Overview statistics of the EVE results. Shows the proportion of significant shifts up and down.
# When combining 1:1 and 1:2 into one plot the "both" category only occurs for duplicates and
# will either show up in all facets or the bars will have different widths.
# Here is a workaround for dealing with the widths..
# scale facets based on the number of categories in each
drawBalancedBarsFlipped <- function(p){
# convert ggplot object to grob object
gp <- ggplotGrob(p)
# # optional: take a look at the grob object's layout
# gtable::gtable_show_layout(gp)
# get gtable rows corresponding to the facets
facet.rows <- gp$layout$b[grepl("panel", gp$layout$name)]
# get the number of unique x-axis values per facet (1 & 3, in this case)
x.var <- sapply(ggplot_build(p)$layout$panel_scales_x,
function(l) length(l$range$range))
# change the relative widths of the facet columns based on
# how many unique x-axis values are in each facet
gp$heights[facet.rows] <- gp$heights[facet.rows] * x.var
# plot result
grid::grid.draw(gp)
}
Ntbl <-
EVEresTbl %>%
mutate( isComplete = ifelse(N3partial,"partial","full") ) %>%
select(N3, isComplete,dup_type) %>% distinct() %>% count(isComplete,dup_type) %>%
mutate(N=paste("N =",n))
EVEresTbl %>%
mutate( sigDir = ifelse(dup_type=="single",sigDir,dupSigDir)) %>%
mutate( isComplete = ifelse(ifelse(dup_type=="single",N9partial,N3partial),"partial","full") ) %>%
select(N3,sigDir,dup_type,isComplete) %>%
distinct() %>%
add_count(dup_type,isComplete) %>%
# mutate( category = paste0(dup_type,isComplete," (n=",n,")")) %>%
filter( sigDir != "" ) %>%
group_by( sigDir, dup_type,isComplete) %>%
summarise(nSig = n(), N=n[1], propSig=n()/n[1]) %>%
mutate( updownClass = sub(x = sigDir,"(up|down|both).*","\\1")) %>%
group_by(updownClass,dup_type, isComplete) %>%
arrange(desc(sigDir)) %>%
mutate(totSig=cumsum(propSig)) %>%
rename( Direction=sigDir) %>%
ggplot( aes(x=updownClass, y=propSig) ) +
geom_col(aes(fill=Direction)) +
facet_grid( dup_type ~ isComplete,scales = "free" ) +
scale_fill_manual(values = c(both="#993094",down="#298ABD",downdown="#1B4F8B",
up="#F62D31",upup="#8B1B1A")) +
geom_text(aes(label=nSig,y=totSig),hjust=-0.1 ) +
coord_flip(ylim = c(0,0.4)) +
geom_text(data=Ntbl,mapping = aes(label=N),y=0.4,x=1, hjust=1) +
ylab("Proportion of OGs with expression shift") +
theme_classic() +
theme(axis.title.y = element_blank()) -> p
drawBalancedBarsFlipped(p)
Compare results for duplicate clades where one clade is complete and the other has lost one or more orthologs (i.e. partial). The ohnolog clade with loss is more likely to be downregulated
EVEresTbl %>%
filter(dup_type=="duplicate") %>%
group_by(N3) %>%
# find ortholog groups where one of the clades are partial
filter( sum(N9partial)==1) %>%
ungroup() %>%
select( N3, N9partial, sigDir ) %>%
mutate( isComplete = ifelse(N9partial,"partial","full") ) %>%
add_count(isComplete) %>%
group_by(isComplete, sigDir) %>%
summarise(nSig = n(), N=n[1], propSig=n()/n[1]) %>%
filter( sigDir!="") %>%
ggplot( aes(x=sigDir, y=propSig,fill=sigDir) ) +
geom_col() +
facet_grid( isComplete ~ ., scales = "free_x" ) +
scale_fill_manual(values = c(down="#298ABD",downdown="#1B4F8B",
up="#F62D31",upup="#8B1B1A")) +
geom_text(aes(label=nSig),hjust=-0.1 ) +
coord_flip(ylim = c(0,0.4)) +
annotate("text",x=2,y=0.4,label="N = 1775",hjust=1) +
theme_classic() +
theme(legend.position = "none")
EVEresTbl %>%
filter(dup_type=="duplicate") %>%
group_by(N3) %>%
# find ortholog groups where both of the clades are partial
filter( all(N9partial)) %>%
ungroup() %>%
select( N3, dupSigDir ) %>%
distinct() %>%
mutate(n=n()) %>%
filter( dupSigDir!="") %>%
group_by(dupSigDir) %>%
summarise(nSig = n(), N=n[1], propSig=n()/n[1]) %>%
mutate( updownClass = sub(x = dupSigDir,"(up|down|both).*","\\1")) %>%
group_by(updownClass) %>%
arrange(desc(dupSigDir)) %>%
mutate(totSig=cumsum(propSig)) %>%
rename(Direction=dupSigDir) %>%
ggplot( aes(x=updownClass, y=propSig) ) +
geom_col(aes(fill=Direction)) +
scale_fill_manual(values = c(both="#993094",down="#298ABD",downdown="#1B4F8B",
up="#F62D31",upup="#8B1B1A")) +
geom_text(aes(label=nSig,y=totSig),hjust=-0.1 ) +
coord_flip(ylim = c(0,0.4)) +
annotate("text",x=1,y=0.4,label="N = 743",hjust=1) +
ylab("Proportion of OGs with expression shift") +
theme_classic() +
theme(axis.title.y = element_blank())
outgroup_cols <- colnames(combExprMat) %in% c("Drer","Olat","Eluc")
EVEresTbl %>%
left_join(
tibble(
N9 = rownames(combExprMat),
outgroup = rowMeans(combExprMat[ , outgroup_cols],na.rm = T)
), by="N9"
) %>%
filter(N3partial==F) %>%
select(N3,dupSigDir,dup_type,outgroup) %>%
distinct() %>%
mutate( quartile=cut(outgroup, breaks = quantile(outgroup),
include.lowest = T,labels = c("1st","2nd","3rd","4th"))) %>%
add_count(dup_type) %>%
mutate( dupSigDir = ifelse(dupSigDir=="","-",dupSigDir) ) %>%
ggplot( aes(x=quartile,fill=dupSigDir)) +
facet_grid( dup_type ~ .) +
geom_bar() +
scale_fill_manual(values = c(both="#993094",down="#298ABD",downdown="#1B4F8B",
up="#F62D31",upup="#8B1B1A",`-`="grey"), name="")+
coord_flip() +
theme_classic() +
xlab("outgroup expression level quartile")