Last updated: 2018-10-09

Code version: acf38fa

Calculating the phases for each gene

First let’s import and process the data we need to use for this:

We also only want to consider genes with a decent amplitude to remove genes that are constitutively on:

In Broadbent et al. they generate a circular plot to represent the different timepoints as x and y coordinates following dimensional reduction using multidimensional scaling. Prior to this we need to correct for impure Plasmodium cell populations as demonstrated in the following plot:

initial  value 6.803612 
iter   5 value 5.340021
iter  10 value 5.178667
iter  10 value 5.174423
iter  10 value 5.172857
final  value 5.172857 
converged

Although it doesn’t look terrible, there are times when it seems that 3D7 seems to somehow “get ahead” of HB3 and IT in terms of the IDC developmental timepoint. We tried to correct for this by locally realigning time points to the 3D7 53 time point microarray expression data set and predicting the expression values following local realignment.

A “local realignment” refers to adjusting each time point individually whereas a “global realignment” refers to use a descriptive statistic such as the mean or median shift to globally shift all expression values by the same amount.

# filter out undetected genes and genes with low amplitudes
# mean normalize and scale TPMs
# group observations accordingly
# use periodic spline to predict expression levels for 53 time points
pred_exp1 <- exp %>% 
  dplyr::filter(gene_id %in% onall & gene_id %in% high_amp) %>%
  dplyr::group_by(gene_id,strain) %>% 
  dplyr::summarise(m=mean(TPM)) %>% 
  dplyr::inner_join(exp, by = c("gene_id","strain")) %>% 
  dplyr::mutate(norm_tpm=(((TPM/m)-mean(TPM/m))/sd(TPM/m))) %>% 
  dplyr::select(gene_id,tp,norm_tpm,strain) %>% 
  dplyr::ungroup() %>%
  dplyr::rowwise() %>%
  dplyr::mutate(hpi=ifelse(tp==1,1,
                           if(tp==2){8}
                           else if(tp==3){16}
                           else if(tp==4){24}
                           else if(tp==5){32}
                           else if(tp==6){40}
                           else{48})) %>%
  dplyr::ungroup() %>%
  dplyr::group_by(gene_id,strain) %>%
  dplyr::do(pred = predict(splines::periodicSpline(norm_tpm ~ hpi, ., period = 48), seq(1,53,1)))

pred_exp2 <- apply(pred_exp1, 1, function(row) {list(gene_id=row[[1]],strain=row[[2]],x=row[[3]][[1]],y=row[[3]][[2]])})
pred_exp <- do.call(dplyr::bind_rows, 
                    lapply(pred_exp2, function(x) {
                      tibble::tibble(
                        gene_id=rep(x[[1]],53),
                        strain=rep(x[[2]],53),
                        tp=x[[3]],
                        exp=x[[4]])}))

rm(pred_exp1,pred_exp2)

# import and format array datta
array_3d7 <- read.delim("../data/array_correlations/selected_3d7array_foldchanges_v3_geneids.txt") %>%
  tibble::as_tibble()

dups <- as.character(array_3d7$GeneID[duplicated(array_3d7 %$% GeneID)])

array_3d7 <- array_3d7 %>%
  dplyr::filter(GeneID %nin% dups) %>%
  tidyr::gather(tp,exp,-GeneID) %>%
  dplyr::rename(gene_id = GeneID)

array_3d7$gene_id <- as.character(array_3d7$gene_id)
array_3d7 <- array_3d7 %>% 
  dplyr::mutate(tp = ifelse(as.numeric(stringr::str_replace_all(tp, "Timepoint.","")) > 9, paste0("array_",stringr::str_replace_all(tp, "Timepoint.","")), paste0("array_0",stringr::str_replace_all(tp, "Timepoint.",""))))

# format predicted expression
exp_3d7 <- pred_exp %>% filter(strain == "3d7") %>% dplyr::select(-strain)
exp_hb3 <- pred_exp %>% filter(strain == "hb3") %>% dplyr::select(-strain)
exp_it <- pred_exp %>% filter(strain == "it") %>% dplyr::select(-strain)

exp_3d7$tp <- ifelse(exp_3d7$tp > 9, paste(paste0("rnaseq_",exp_3d7$tp)), paste(paste0("rnaseq_0",exp_3d7$tp)))
exp_hb3$tp <- ifelse(exp_hb3$tp > 9, paste(paste0("rnaseq_",exp_hb3$tp)), paste(paste0("rnaseq_0",exp_hb3$tp)))
exp_it$tp <- ifelse(exp_it$tp > 9, paste(paste0("rnaseq_",exp_it$tp)), paste(paste0("rnaseq_0",exp_it$tp)))
initial  value 9.243506 
final  value 8.112635 
converged

Phase comparison histograms

Session Information

R version 3.5.0 (2018-04-23)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Gentoo/Linux

Matrix products: default
BLAS: /usr/local/lib64/R/lib/libRblas.so
LAPACK: /usr/local/lib64/R/lib/libRlapack.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] parallel  stats4    stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] gdtools_0.1.7                        
 [2] bindrcpp_0.2.2                       
 [3] BSgenome.Pfalciparum.PlasmoDB.v24_1.0
 [4] BSgenome_1.48.0                      
 [5] rtracklayer_1.40.6                   
 [6] Biostrings_2.48.0                    
 [7] XVector_0.20.0                       
 [8] GenomicRanges_1.32.7                 
 [9] GenomeInfoDb_1.16.0                  
[10] org.Pf.plasmo.db_3.6.0               
[11] AnnotationDbi_1.42.1                 
[12] IRanges_2.14.12                      
[13] S4Vectors_0.18.3                     
[14] Biobase_2.40.0                       
[15] BiocGenerics_0.26.0                  
[16] scales_1.0.0                         
[17] cowplot_0.9.3                        
[18] magrittr_1.5                         
[19] forcats_0.3.0                        
[20] stringr_1.3.1                        
[21] dplyr_0.7.6                          
[22] purrr_0.2.5                          
[23] readr_1.1.1                          
[24] tidyr_0.8.1                          
[25] tibble_1.4.2                         
[26] ggplot2_3.0.0                        
[27] tidyverse_1.2.1                      

loaded via a namespace (and not attached):
 [1] nlme_3.1-137                bitops_1.0-6               
 [3] matrixStats_0.54.0          lubridate_1.7.4            
 [5] bit64_0.9-7                 httr_1.3.1                 
 [7] rprojroot_1.3-2             tools_3.5.0                
 [9] backports_1.1.2             R6_2.3.0                   
[11] DBI_1.0.0                   lazyeval_0.2.1             
[13] colorspace_1.3-2            withr_2.1.2                
[15] tidyselect_0.2.4            bit_1.1-14                 
[17] compiler_3.5.0              git2r_0.23.0               
[19] cli_1.0.1                   rvest_0.3.2                
[21] xml2_1.2.0                  DelayedArray_0.6.6         
[23] labeling_0.3                digest_0.6.17              
[25] Rsamtools_1.32.3            svglite_1.2.1              
[27] rmarkdown_1.10              R.utils_2.7.0              
[29] pkgconfig_2.0.2             htmltools_0.3.6            
[31] rlang_0.2.2                 readxl_1.1.0               
[33] rstudioapi_0.8              RSQLite_2.1.1              
[35] bindr_0.1.1                 jsonlite_1.5               
[37] BiocParallel_1.14.2         R.oo_1.22.0                
[39] RCurl_1.95-4.11             GenomeInfoDbData_1.1.0     
[41] Matrix_1.2-14               Rcpp_0.12.19               
[43] munsell_0.5.0               R.methodsS3_1.7.1          
[45] stringi_1.2.4               yaml_2.2.0                 
[47] MASS_7.3-49                 SummarizedExperiment_1.10.1
[49] zlibbioc_1.26.0             plyr_1.8.4                 
[51] grid_3.5.0                  blob_1.1.1                 
[53] crayon_1.3.4                lattice_0.20-35            
[55] splines_3.5.0               haven_1.1.2                
[57] hms_0.4.2                   knitr_1.20                 
[59] pillar_1.3.0                XML_3.98-1.16              
[61] glue_1.3.0                  evaluate_0.11              
[63] modelr_0.1.2                cellranger_1.1.0           
[65] gtable_0.2.0                assertthat_0.2.0           
[67] broom_0.5.0                 GenomicAlignments_1.16.0   
[69] memoise_1.1.0               workflowr_1.1.1            

This R Markdown site was created with workflowr