summaryrefslogtreecommitdiff
path: root/scripts/demographics.R
blob: 3aee883a5f240822c2bf9d9b78c7bf2c5f543a7c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
library(tidyverse)
library(grid)
library(ggpubr)
library(patchwork)



p1 <- tibble(x = 1:10, y = 1:10) %>%
  ggplot(aes(x, y)) +
  geom_point() +
  scale_y_reverse(breaks = seq(1, 10)) +
  labs(y = NULL)

p2 <- tibble(ymin = c(0, 2.1, 7.1), ymax = c(1.9, 6.9, 10), fill = c("Gender", "Ethnicity", "CMV status")) %>%
  ggplot() +
  geom_rect(aes(xmin = 0, xmax = 1, ymin = ymin, ymax = ymax)) +
  geom_text(aes(x = .5, y = (ymin  + ymax) / 2, label = fill), angle = 90) +
  scale_y_reverse(breaks = seq(1, 10), expand = expansion(mult = c(0, 0))) +
  scale_x_continuous(breaks = c(0), expand = expansion(mult = c(0, 0))) +
  guides(fill = FALSE) +
  theme_void()


orig <- read_csv("../csv/donor_demo.csv")
data <- orig
tbl <- data %>%
    mutate(`CMV status` = factor(
        `CMV status`,
        labels = c("CMV-", "CMV+", "CMV unknown")
    )) %>%
    na_if("NULL") %>%
    replace_na(list(Ethnicity = "Unknown")) %>%
    pivot_longer(
        cols = c("Gender", "Ethnicity", "CMV status")
    ) %>%
    group_by(
        value, Response
    ) %>%
    summarise(
        count = n(),
    ) %>%
    mutate(
        total = if_else(Response == 1,
            sum(data$Response == 1),
            sum(data$Response == 0)
        ),
        value = factor(value, levels = rev(c(
            "Female",
            "Male",
            "Caucasian",
            "Black or African American",
            "Asian",
            "Hispanic/Latino",
            "Other",
            "Unknown",
            "CMV+",
            "CMV-",
            "CMV unknown"
        ))),
    )

factors_demo <- tbl %>%
    ggplot(aes(fill = value)) +
    geom_bar(aes(x = count / total, y = value), stat = "identity", show.legend = F) +
    theme_pubr() +
    scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
    labs(x = "Distribution (%)", y = "") +
    facet_wrap(~ factor(Response,
        labels =
            c(
                paste("Low responders (n=", sum(data$Response == 0), ")"),
                paste("High responders (n=", sum(data$Response == 1), ")")
            )
    ))
factors_demo <- p2 + factors_demo + plot_layout(widths= c(0.5,9))

data <- orig
tbl <- data %>%
    select(Age, Response) %>%
    mutate(
        Response_text = factor(
            Response,
            labels = c("Low responders", "High responders")
        )
    ) %>%
    group_by(Age, Response_text) %>%
    summarise(count = n(), Response = Response, Response_text = Response_text, Age = Age) %>%
    mutate(
        total = if_else(Response == 0, sum(data$Response == 0), sum(data$Response == 1)),
        percentage = n() / total * 100
    ) %>%
    ggplot(aes(x = Age, y = percentage, fill = factor(Response_text))) +
    geom_polygon(show.legend = F) +
    labs(x = "Age", y = "Percentage (%)") +
    theme_pubr() +
    facet_wrap(~Response_text)
age_responder <- tbl

figure <- ggarrange(
    factors_demo,
    age_responder,
    ncol = 1,
    nrow = 2,
    labels = c("A", "B")
)

ggsave("../images/demographic.png", figure, width = 2 * 15, height = 19, units = "cm")