Chapter 2 Introduction: load the datasets

2.1 Load packages

library(janitor)
library(readxl)
library(tidyverse)
library(ggpubr)
library(data.table)

2.2 Load datasets

#patient cohort
cohort_BAS<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/cohort_BAS.csv")
ursodiol<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/ursodiol.csv")

#metabolomics data
#concentrations
conc_all_filtered<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/conc_all_filtered.csv")
filtered_combined_table<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/filtered_combined_table.csv")
#annotations
ba_families<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/ba_families.csv")

#16s data
counts_samples <-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/counts_samples.csv")
asv_annotation_blast_ag<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/asv_annotation_blast_ag.csv")
asv_alpha_all<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/asv_alpha_all.csv")
asv_annotation_blast_color_ag<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/asv_annotation_blast_color_ag.csv")

#shotgun data
BSH_metalphlan <-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/BSH_metalphlan.csv")
bai_genes_clean<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/bai_genes_clean.csv")
taxa_bas_later<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/taxa_bas_later.csv")

#ursodiol cohort: double check that it's ok to share
patients_urso_CIF<- read_csv( "/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/don't include/patients_urso_CIF.csv")


categorize_bile_acids <- function(df, ba_families){
  df %>% 
    left_join(ba_families) %>% 
  mutate(primary_pool=ifelse(prim_vs_sec=="Primary", value, 0)) %>% 
  mutate(secondary_pool=ifelse(prim_vs_sec=="Secondary", value, 0)) %>% 
  mutate(sulfated_pool=ifelse(sulfated=="Y", value, 0)) %>% 
  mutate(conjugated_pool=ifelse(amidated=="Y" & sulfated=="N", value, 0)) %>% 
  mutate(unconjugated_pool=ifelse(amidated=="N" & sulfated=="N", value, 0)) %>% 
  mutate(secondary_nonUDCA=ifelse(prim_vs_sec=="Secondary" & udca=="N", value, 0)) %>% 
  mutate(total_BAs=value) %>% 
  mutate(total_nonUDCA_pool=ifelse(udca=="N", value, 0)) %>% 
  mutate(glycine_pool=ifelse(glycine=="Y" & sulfated=="N", value, 0)) %>% 
  mutate(taurine_pool=ifelse(taurine=="Y" & sulfated=="N", value, 0)) %>% 
  mutate(taurine_SBA_pool=ifelse(taurine=="Y" & prim_vs_sec=="Secondary" & sulfated=="N", value, 0)) %>% 
  mutate(taurine_PBA_pool=ifelse(taurine=="Y" & prim_vs_sec=="Primary" & sulfated=="N", value, 0)) %>% 
  mutate(glycine_SBA_pool=ifelse(glycine=="Y"& prim_vs_sec=="Secondary" & sulfated=="N", value, 0)) %>% 
  mutate(glycine_PBA_pool=ifelse(glycine=="Y" &prim_vs_sec=="Primary"& sulfated=="N", value, 0)) %>% 
  select(-colnames(ba_families), -value) 
}