diff options
Diffstat (limited to 'scripts')
| -rw-r--r-- | scripts/demo_table.R | 35 | ||||
| -rw-r--r-- | scripts/demographics.R | 75 |
2 files changed, 109 insertions, 1 deletions
diff --git a/scripts/demo_table.R b/scripts/demo_table.R new file mode 100644 index 0000000..602cd68 --- /dev/null +++ b/scripts/demo_table.R @@ -0,0 +1,35 @@ +library(tidyverse) +library(knitr) + +orig <- read_csv("../csv/donor_demo.csv") + +data <- orig %>% + na_if("NULL") %>% + replace_na(list(Ethnicity="Unknown")) + +age_table <- tribble( + ~`Age (y)`, ~Value, + "Mean \\pm SD", paste(round(mean(data$Age), 2), "\\pm", round(sd(data$Age), 2)), + "Median (min. to max. range)", paste(median(data$Age), ' (', min(data$Age), "-", max(data$Age), ')') +) %>% +kable(format = "latex", booktabs = TRUE) + +gender_table <- tribble( + ~`Gender`, ~Value, + "Male (%)", paste(sum(data$Gender == "Male"), "(", round(sum(data$Gender == "Male")/ nrow(data), 3) * 100, ")"), + "Female", paste(sum(data$Gender == "Female"), ' (', round(sum(data$Gender == "Female") / nrow(data), 3) * 100 , ")") +) %>% +kable(format = "latex", booktabs = TRUE) + + +race_table <- tribble( + ~`Gender`, ~Value, + "Caucasian (%)", paste(sum(data$Ethnicity == "Caucasian"), "(", round(sum(data$Ethnicity == "Caucasian")/ nrow(data), 3) * 100, ")"), + "African American (Black) (%)", paste(sum(data$Ethnicity == "Black or African American"), ' (', round(sum(data$Ethnicity == "Black or African American") / nrow(data), 3) * 100 , ")"), + "Asian (%)", paste(sum(data$Ethnicity == "Asian"), ' (', round(sum(data$Ethnicity == "Asian") / nrow(data), 3) * 100 , ")"), + "Hispanic/Latino (%)", paste(sum(data$Ethnicity == "Hispanic/Latino"), ' (', round(sum(data$Ethnicity == "Hispanic/Latino") / nrow(data), 3) * 100 , ")"), + "Other (%)", paste(sum(data$Ethnicity == "Other"), ' (', round(sum(data$Ethnicity == "Other") / nrow(data), 3) * 100 , ")"), + "Unknown (%)", paste(sum(data$Ethnicity == "Unknown"), ' (', round(sum(data$Ethnicity == "Unknown") / nrow(data), 3) * 100 , ")"), +) %>% +kable(format = "latex", booktabs = TRUE) + diff --git a/scripts/demographics.R b/scripts/demographics.R index 5cc2fca..d43b453 100644 --- a/scripts/demographics.R +++ b/scripts/demographics.R @@ -1,3 +1,76 @@ -library(ggplot2) +library(tidyverse) +library(ggpubr) +orig <- read_csv("../csv/donor_demo.csv") +data <- orig +tbl <- data %>% + mutate(`CMV status` = factor( + `CMV status`, + labels = c("CMV-", "CMV+", "CMV unknown") + )) %>% + na_if("NULL") %>% + replace_na(list(Ethnicity="Unknown")) %>% + pivot_longer( + cols = c("Gender", "Ethnicity", "CMV status") + ) %>% + group_by( + value, Response + ) %>% + summarise( + count = n(), + ) %>% + mutate( + total = if_else(Response == 1, sum(data$Response == 1), sum(data$Response == 0)), + value = factor(value, levels = rev(c( + "Female", + "Male", + "Caucasian", + "Black or African American", + "Asian", + "Hispanic/Latino", + "Other", + "Unknown", + "CMV+", + "CMV-", + "CMV unknown" + ))), + ) %>% + ggplot(aes(y = value, fill = value)) + + geom_bar(aes(x = count / total), stat = "identity", show.legend = F) + + theme_pubr() + + scale_x_continuous(labels = scales::percent_format(accuracy = 1)) + + labs(x = "Distribution (%)", y="") + + facet_wrap(~factor(Response, labels=c(paste("Low responders (n=", sum(data$Response==0),")"), paste("High responders (n=", sum(data$Response==1),")")))) +factors_demo <- tbl +data <- orig +tbl <- data %>% + select(Age, Response) %>% + mutate( + Response_text = factor( + Response, + labels = c("Low responders", "High responders") + ) + ) %>% + group_by(Age, Response_text) %>% + summarise(count = n(), Response=Response, Response_text=Response_text, Age=Age) %>% + mutate( + total = if_else(Response == 0, sum(data$Response == 0), sum(data$Response == 1)), + percentage = n()/ total * 100 + ) %>% + ggplot(aes(x = Age, y=percentage, fill=factor(Response_text))) + + geom_polygon(show.legend=F) + + labs(x = "Age") + + theme_pubr() + + facet_wrap(~Response_text) +age_responder <- tbl + +figure <- ggarrange( + factors_demo, + age_responder, + ncol=1, + nrow=2, + labels=c("A", "B") +) + +ggsave('../images/demographic.png', figure, width=2*15, height=19, units="cm") |
