#patient cohort
cohort_BAS<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/cohort_BAS.csv")
ursodiol<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/ursodiol.csv")
#metabolomics data
#concentrations
conc_all_filtered<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/conc_all_filtered.csv")
filtered_combined_table<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/filtered_combined_table.csv")
#annotations
ba_families<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/ba_families.csv")
#16s data
counts_samples <-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/counts_samples.csv")
asv_annotation_blast_ag<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/asv_annotation_blast_ag.csv")
asv_alpha_all<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/asv_alpha_all.csv")
asv_annotation_blast_color_ag<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/asv_annotation_blast_color_ag.csv")
#shotgun data
BSH_metalphlan <-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/BSH_metalphlan.csv")
bai_genes_clean<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/bai_genes_clean.csv")
taxa_bas_later<-read_csv("/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/taxa_bas_later.csv")
#ursodiol cohort: double check that it's ok to share
patients_urso_CIF<- read_csv( "/Volumes/vandenbrinklab/Oriana/BAs_published_datasets/don't include/patients_urso_CIF.csv")
categorize_bile_acids <- function(df, ba_families){
df %>%
left_join(ba_families) %>%
mutate(primary_pool=ifelse(prim_vs_sec=="Primary", value, 0)) %>%
mutate(secondary_pool=ifelse(prim_vs_sec=="Secondary", value, 0)) %>%
mutate(sulfated_pool=ifelse(sulfated=="Y", value, 0)) %>%
mutate(conjugated_pool=ifelse(amidated=="Y" & sulfated=="N", value, 0)) %>%
mutate(unconjugated_pool=ifelse(amidated=="N" & sulfated=="N", value, 0)) %>%
mutate(secondary_nonUDCA=ifelse(prim_vs_sec=="Secondary" & udca=="N", value, 0)) %>%
mutate(total_BAs=value) %>%
mutate(total_nonUDCA_pool=ifelse(udca=="N", value, 0)) %>%
mutate(glycine_pool=ifelse(glycine=="Y" & sulfated=="N", value, 0)) %>%
mutate(taurine_pool=ifelse(taurine=="Y" & sulfated=="N", value, 0)) %>%
mutate(taurine_SBA_pool=ifelse(taurine=="Y" & prim_vs_sec=="Secondary" & sulfated=="N", value, 0)) %>%
mutate(taurine_PBA_pool=ifelse(taurine=="Y" & prim_vs_sec=="Primary" & sulfated=="N", value, 0)) %>%
mutate(glycine_SBA_pool=ifelse(glycine=="Y"& prim_vs_sec=="Secondary" & sulfated=="N", value, 0)) %>%
mutate(glycine_PBA_pool=ifelse(glycine=="Y" &prim_vs_sec=="Primary"& sulfated=="N", value, 0)) %>%
select(-colnames(ba_families), -value)
}