summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorMike Vink <mike1994vink@gmail.com>2021-04-23 17:30:09 +0200
committerMike Vink <mike1994vink@gmail.com>2021-04-23 17:30:09 +0200
commit19c3d0dba64d782e519d8ece36028ebb25b33141 (patch)
tree4534be50f304a6de41cd58ac29e32c437ecf3c9c /scripts
parentb64b582e8cbb54e851f00be06ab44fd7811286f1 (diff)
demo table and fig
Diffstat (limited to 'scripts')
-rw-r--r--scripts/demo_table.R35
-rw-r--r--scripts/demographics.R75
2 files changed, 109 insertions, 1 deletions
diff --git a/scripts/demo_table.R b/scripts/demo_table.R
new file mode 100644
index 0000000..602cd68
--- /dev/null
+++ b/scripts/demo_table.R
@@ -0,0 +1,35 @@
+library(tidyverse)
+library(knitr)
+
+orig <- read_csv("../csv/donor_demo.csv")
+
+data <- orig %>%
+ na_if("NULL") %>%
+ replace_na(list(Ethnicity="Unknown"))
+
+age_table <- tribble(
+ ~`Age (y)`, ~Value,
+ "Mean \\pm SD", paste(round(mean(data$Age), 2), "\\pm", round(sd(data$Age), 2)),
+ "Median (min. to max. range)", paste(median(data$Age), ' (', min(data$Age), "-", max(data$Age), ')')
+) %>%
+kable(format = "latex", booktabs = TRUE)
+
+gender_table <- tribble(
+ ~`Gender`, ~Value,
+ "Male (%)", paste(sum(data$Gender == "Male"), "(", round(sum(data$Gender == "Male")/ nrow(data), 3) * 100, ")"),
+ "Female", paste(sum(data$Gender == "Female"), ' (', round(sum(data$Gender == "Female") / nrow(data), 3) * 100 , ")")
+) %>%
+kable(format = "latex", booktabs = TRUE)
+
+
+race_table <- tribble(
+ ~`Gender`, ~Value,
+ "Caucasian (%)", paste(sum(data$Ethnicity == "Caucasian"), "(", round(sum(data$Ethnicity == "Caucasian")/ nrow(data), 3) * 100, ")"),
+ "African American (Black) (%)", paste(sum(data$Ethnicity == "Black or African American"), ' (', round(sum(data$Ethnicity == "Black or African American") / nrow(data), 3) * 100 , ")"),
+ "Asian (%)", paste(sum(data$Ethnicity == "Asian"), ' (', round(sum(data$Ethnicity == "Asian") / nrow(data), 3) * 100 , ")"),
+ "Hispanic/Latino (%)", paste(sum(data$Ethnicity == "Hispanic/Latino"), ' (', round(sum(data$Ethnicity == "Hispanic/Latino") / nrow(data), 3) * 100 , ")"),
+ "Other (%)", paste(sum(data$Ethnicity == "Other"), ' (', round(sum(data$Ethnicity == "Other") / nrow(data), 3) * 100 , ")"),
+ "Unknown (%)", paste(sum(data$Ethnicity == "Unknown"), ' (', round(sum(data$Ethnicity == "Unknown") / nrow(data), 3) * 100 , ")"),
+) %>%
+kable(format = "latex", booktabs = TRUE)
+
diff --git a/scripts/demographics.R b/scripts/demographics.R
index 5cc2fca..d43b453 100644
--- a/scripts/demographics.R
+++ b/scripts/demographics.R
@@ -1,3 +1,76 @@
-library(ggplot2)
+library(tidyverse)
+library(ggpubr)
+orig <- read_csv("../csv/donor_demo.csv")
+data <- orig
+tbl <- data %>%
+ mutate(`CMV status` = factor(
+ `CMV status`,
+ labels = c("CMV-", "CMV+", "CMV unknown")
+ )) %>%
+ na_if("NULL") %>%
+ replace_na(list(Ethnicity="Unknown")) %>%
+ pivot_longer(
+ cols = c("Gender", "Ethnicity", "CMV status")
+ ) %>%
+ group_by(
+ value, Response
+ ) %>%
+ summarise(
+ count = n(),
+ ) %>%
+ mutate(
+ total = if_else(Response == 1, sum(data$Response == 1), sum(data$Response == 0)),
+ value = factor(value, levels = rev(c(
+ "Female",
+ "Male",
+ "Caucasian",
+ "Black or African American",
+ "Asian",
+ "Hispanic/Latino",
+ "Other",
+ "Unknown",
+ "CMV+",
+ "CMV-",
+ "CMV unknown"
+ ))),
+ ) %>%
+ ggplot(aes(y = value, fill = value)) +
+ geom_bar(aes(x = count / total), stat = "identity", show.legend = F) +
+ theme_pubr() +
+ scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
+ labs(x = "Distribution (%)", y="") +
+ facet_wrap(~factor(Response, labels=c(paste("Low responders (n=", sum(data$Response==0),")"), paste("High responders (n=", sum(data$Response==1),")"))))
+factors_demo <- tbl
+data <- orig
+tbl <- data %>%
+ select(Age, Response) %>%
+ mutate(
+ Response_text = factor(
+ Response,
+ labels = c("Low responders", "High responders")
+ )
+ ) %>%
+ group_by(Age, Response_text) %>%
+ summarise(count = n(), Response=Response, Response_text=Response_text, Age=Age) %>%
+ mutate(
+ total = if_else(Response == 0, sum(data$Response == 0), sum(data$Response == 1)),
+ percentage = n()/ total * 100
+ ) %>%
+ ggplot(aes(x = Age, y=percentage, fill=factor(Response_text))) +
+ geom_polygon(show.legend=F) +
+ labs(x = "Age") +
+ theme_pubr() +
+ facet_wrap(~Response_text)
+age_responder <- tbl
+
+figure <- ggarrange(
+ factors_demo,
+ age_responder,
+ ncol=1,
+ nrow=2,
+ labels=c("A", "B")
+)
+
+ggsave('../images/demographic.png', figure, width=2*15, height=19, units="cm")