--- title: "Vaccination coverage survey, Goma IDP camps, June 2024" output: word_document: keep_md: true --- ```{r setup, include = FALSE, results='hide', message=FALSE, warning=FALSE} ## hide all code chunks in the output, but show errors knitr::opts_chunk$set(echo = FALSE, error = TRUE, message= FALSE, warning = FALSE, fig.width = 6*1.25, fig.height = 6) ## set default NA to - in output, define figure width/height options(knitr.kable.NA = "-") ## Installing required packages for this template required_packages <- c("knitr", # create output docs "here", # find your files "rio", # for importing data "epitrix", # clean/shape data "dplyr", # clean/shape data "tidyr", # clean/shape data "forcats", # manipulate and rearrange factors "stringr", # manipulate texts "ggplot2", # create plots and charts "sitrep", # MSF field epi functions "survey", # for survey functions "srvyr" , # dplyr wrapper for survey package "janitor", "apyramid", "lubridate", "kableExtra", # for tables "gt", "gtsummary", "arsenal" ) for (pkg in required_packages) { ## install packages if not already present if (!pkg %in% rownames(installed.packages())) { install.packages(pkg) } ## load packages to this current session library(pkg, character.only = TRUE) } ## set default text size to 18 for plots ## give classic black/white axes for plots ggplot2::theme_set(theme_classic(base_size = 18)) ``` ```{r run_pop_estimate, message=FALSE, warning = FALSE} # Source the R script ## Not shareable source("Pop_estimation_Goma_2024.R") ``` ```{r read_nonDHIS_data, warning = FALSE, message = FALSE} ##Individual study_data_indiv<- rio::import(here::here("data", "Bulengo_Lush_baseline_vcs_V3_FINAL_18_06.xlsx"), which = "g6") #Household study_data_hh <- rio::import(here::here("data", "Bulengo_Lush_baseline_vcs_V3_FINAL_18_06.xlsx"), which = "Bulengo_Lush_baseline_vcs_V3") ``` ```{r merge_data_levels} ## join the individual and household data to form a complete data set study_data_raw <- left_join(study_data_hh, study_data_indiv, by = c("_index" = "_parent_index")) ##This is the equivalent of the pseudonimysed data set available in the MSF DSP ##pseudonimysation process was as below ## merge age variables --------------------------------------------------------- # age_group_levels <- c("0-11 months", "12-23 months", "2-4 years", "5-14 years", "15-29 years", "30-44 years", "45+ years") # # dat <- dat %>% # mutate( # age_group = case_when( # age_months <12 ~ "0-11 months", # age_months>=12 & age_months <24 ~ "12-23 months", # age_years >= 2 & age_years <=4 ~ "2-4 years", # age_years >= 5 & age_years <=14 ~ "5-14 years", # age_years >= 15 & age_years <= 29 ~ "15-29 years", # age_years >= 30 & age_years <= 44 ~ "30-44 years", # age_years >= 45 ~ "45+ years", # ), # .after = age_years, # age_group = factor(age_group, levels = age_group_levels) # ) # # # # withhold old age variables # dat$age_months <- NA_real_ # dat$age_years <- NA_real_ # # #withhold locations (not relevant for analysis) # dat$ads <- NA_character_ # dat<-camp <- NA_character_ ## Further adjustments to the below code will be required to run it with pseudonimysed data. ## E.g results by camp or aire de sante (ads) will not be available nor population estimates ``` ```{r prep_data} ## generates MSF standard dictionary for Dharma study_data_dict <- msf_dict_survey("Vaccination_short") ## make a copy of your orginal dataset and name it study_data_cleaned study_data_cleaned <- study_data_raw ## define clean variable names using clean_labels from the epitrix package. cleaned_colnames <- epitrix::clean_labels(colnames(study_data_cleaned)) ## overwrite variable names with defined clean names colnames(study_data_cleaned) <- cleaned_colnames ## In this example, we rename variables several variables ## The formula for this is rename(data, NEW_NAME = OLD_NAME). study_data_cleaned <- rename(study_data_cleaned, health_district = "lieu_de_residence") study_data_cleaned <- study_data_cleaned %>% mutate(health_district = recode (health_district, "communaut" = "resident")) ``` ```{r create_age_group} ## Age group variables ---------------------------------------------------------- ## This step shows you how to create categorical variables from numeric variables. ## We have some intermediate steps on the way. ## make sure age is an integer study_data_cleaned <- study_data_cleaned %>% mutate(age_years = as.integer(age_years), age_months = as.integer(age_months)) #The below shows another option for age grouping # create an age group variable (combining those over 5 years) study_data_cleaned <- study_data_cleaned %>% mutate(age_group = factor( if_else(age_years >= 5, "5+", as.character(age_years) ) )) ##Create age group 2 # Create a different age_group column using cut function # Define first break points and labels for age groups breaks <- c(0, 5, 15, 30, 45, Inf) labels <- c("0-4", "5-14", "15-29", "30-44", "45+") study_data_cleaned <- study_data_cleaned %>% mutate(age_group2 = cut(age_years, breaks = breaks, labels = labels, include.lowest = TRUE, right = FALSE)) ## Create age group by 10 years for pyramind study_data_cleaned$age_group_10 <- age_categories(study_data_cleaned$age_years, lower = 0, upper = 60, by = 10) ``` ```{r factor_vars} ##DTP vaccine ever differencing card and verbal study_data_cleaned <- study_data_cleaned %>% mutate(dtp_vaccination_status = case_when( is.na(ever_vac) ~ NA_character_, dtp_ever == "yes" & vac_card_pic== "yes" ~ "Vaccinated - card", dtp_ever == "yes" & vac_card_pic== "no" ~ "Vaccinated - verbal", ever_vac == "no" ~ "Unvaccinated", dtp_ever == "no" ~ "Unvaccinated", ever_vac == "dont_know" ~ "Unknown", dtp_ever == "dont_know" ~ "Unknown") ) ##DTP vaccine ever, card abd verbal same ##The below code consider the do not know answer as "Unvaccinated" study_data_cleaned <- study_data_cleaned %>% mutate(dtp_vaccination_status_simple = case_when( is.na(ever_vac) ~ NA_character_, dtp_ever == "yes" & vac_card_pic== "yes" ~ "Vaccinated", dtp_ever == "yes" & vac_card_pic== "no" ~ "Vaccinated", ever_vac == "no" ~ "Unvaccinated", dtp_ever == "no" ~ "Unvaccinated", ever_vac == "dont_know" ~ "Unvaccinated", dtp_ever == "dont_know" ~ "Unvaccinated") ) ##DTP vaccine number of doses ##The below code consider the do not know answer as "Unvaccinated" study_data_cleaned <- study_data_cleaned %>% mutate(dtp_doses = case_when( is.na(ever_vac) ~ NA_character_, dtp_ever == "yes" & dtp_n== "1" ~ "1 dose", dtp_ever == "yes" & dtp_n== "2" ~ "2 dose", dtp_ever == "yes" & dtp_n== "3" ~ "3 dose", dtp_ever == "yes" & is.na(dtp_n) ~ NA_character_, ever_vac == "no" ~ "Unvaccinated", dtp_ever == "no" ~ "Unvaccinated", ever_vac == "dont_know" ~ "Unvaccinated", dtp_ever == "dont_know" ~ "Unvaccinated") ) # create month of arrival study_data_cleaned <- study_data_cleaned %>% mutate(month_hh_arriv = paste(month(hh_arriv_date, label = TRUE, abbr = FALSE), year(hh_arriv_date), sep = "-")) # Create and debug month_year_string column study_data_cleaned <- study_data_cleaned %>% mutate( month_year_string = paste(year(hh_arriv_date), sprintf("%02d", month(hh_arriv_date)), "01", sep = "-") ) # Convert month_year_string to Date study_data_cleaned <- study_data_cleaned %>% mutate( month_year_date = as.Date(month_year_string, format = "%Y-%m-%d") ) # Arrange data and factorize month_hh_arriv study_data_cleaned <- study_data_cleaned %>% arrange(month_year_date) %>% mutate( month_hh_arriv = factor(month_hh_arriv, levels = unique(month_hh_arriv)) ) %>% filter(!is.na(month_hh_arriv)) # Remove rows with NA in month_hh_arriv ``` ```{r remove_unused_data_duplicates} ## store the cases that you drop so you can describe them (e.g. non-consenting) dropped <- study_data_cleaned %>% filter(consent=="no") ## drop the unused rows from the survey data set study_data_cleaned <- study_data_cleaned %>% filter(consent!= "no") # ## drop the unused rows from the survey data set # study_data_cleaned <- study_data_cleaned %>% # filter(consent) study_data_cleaned <- study_data_cleaned %>% filter(!is.na(age_group)) ``` ```{r read_population_data, warning = FALSE, message = FALSE} ## generate population data by age groups in years for district A population_data_age_district_a<- gen_population(total_pop = 330050, # set total population groups = c("0", "1", "2", "3", "4", "5+"), # set groups proportions = c(0.04, 0.04, 0.04, 0.04, 0.04, 0.8), # set proportions for each group strata = c("male", "female")) %>% # stratify by gender rename(age_group = groups, # rename columns (NEW NAME = OLD NAME) sex = strata, population = n) %>% mutate(health_district = "camp") ## generate population data by age groups in years for district A population_data_age_district_b<- gen_population(total_pop = 346195, # set total population groups = c("0", "1", "2", "3", "4", "5+"), # set groups proportions = c(0.04, 0.04, 0.04, 0.04, 0.04, 0.8), # set proportions for each group strata = c("male", "female")) %>% # stratify by gender rename(age_group = groups, # rename columns (NEW NAME = OLD NAME) sex = strata, population = n) %>% mutate(health_district = "resident") ## bind region population data together to get overall population population_data_age <- bind_rows(population_data_age_district_a, population_data_age_district_b) ##and create age group by months: create age group variable for under 2 years based on months #Create months for all study_data_cleaned <- study_data_cleaned %>% mutate(age_months = ifelse(is.na(age_months), age_years*12, age_months)) # Define first break points and labels for age groups breaks <- c(0, 4, 12, 24, 59, Inf) labels <- c("0<4m", "4<11m", "12-23m", "24-59m", "5y+") study_data_cleaned <- study_data_cleaned %>% mutate(age_group_mon = cut(age_months, breaks = breaks, labels = labels, include.lowest = TRUE, right = FALSE)) ##Create other age group for exploring differences between resident and camps areas # Define first break points and labels for age groups breaks <- c(0, 5, 15, 30, 45, Inf) labels <- c("0-4", "5-14", "15-29", "30-44", "45+") study_data_cleaned <- study_data_cleaned %>% mutate(age_group2 = cut(age_years, breaks = breaks, labels = labels, include.lowest = TRUE, right = FALSE)) ##Create sub set for children under 2--> this what will be used for weights, survey design and vaccination coverage tables study_data_target<- study_data_cleaned %>% filter (age_years<2) # I repeat the above age_group_mon, as otherwise there is a problem with one table breaks <- c(0, 4, 12, 24) labels <- c("0<4m", "4<11m", "12-23m") study_data_target <- study_data_target %>% mutate(age_group_mon2 = cut(age_months, breaks = breaks, labels = labels, include.lowest = TRUE, right = FALSE)) ``` ```{r demo_details} #calculate hh average size mean_hh <- round(mean(study_data_cleaned$member_number, na.rm = TRUE), 2) # 95% CI mean result <- study_data_cleaned %>% summarize( mean_hh = mean(member_number, na.rm = TRUE), sem_hh = sd(member_number, na.rm = TRUE) / sqrt(n()), t_value = qt(0.975, df = n() - 1), margin_of_error = t_value * sem_hh, ci_lower = mean_hh - margin_of_error, ci_upper = mean_hh + margin_of_error ) # Format the 95% CI as a single string mean_hhCI <- paste0(round(result$ci_lower, 2), "-", round(result$ci_upper, 2)) #median hh median_hh <- median(study_data_cleaned$member_number, na.rm = TRUE) iqr_hh<-IQR(study_data_cleaned$member_number) ###CALCULATE BY idpS AND RESIDENTS #create subsets idps<-study_data_cleaned %>% filter(health_district == "camp") resid<-study_data_cleaned %>% filter(health_district == "resident") ##IDPs #calculate hh average size mean_hh_idp<-round(mean(idps$member_number, na.rm = TRUE),2) mean_hh <- round(mean(study_data_cleaned$member_number, na.rm = TRUE), 2) # 95% CI mean result <- idps %>% summarize( mean_hh = mean(member_number, na.rm = TRUE), sem_hh = sd(member_number, na.rm = TRUE) / sqrt(n()), t_value = qt(0.975, df = n() - 1), margin_of_error = t_value * sem_hh, ci_lower = mean_hh - margin_of_error, ci_upper = mean_hh + margin_of_error ) # Format the 95% CI as a single string mean_hhCI_idp <- paste0(round(result$ci_lower, 2), "-", round(result$ci_upper, 2)) #median and IQR hh median_hh_idp <- median(idps$member_number, na.rm = TRUE) iqr_hh_idp<-IQR(idps$member_number) ##RESIDENTS #calculate hh average size mean_hh_res<-round(mean(resid$member_number, na.rm = TRUE),2) # 95% CI mean result <- resid %>% summarize( mean_hh = mean(member_number, na.rm = TRUE), sem_hh = sd(member_number, na.rm = TRUE) / sqrt(n()), t_value = qt(0.975, df = n() - 1), margin_of_error = t_value * sem_hh, ci_lower = mean_hh - margin_of_error, ci_upper = mean_hh + margin_of_error ) # Format the 95% CI as a single string mean_hhCI_res <- paste0(round(result$ci_lower, 2), "-", round(result$ci_upper, 2)) #median hh median_hh_res <- median(resid$member_number, na.rm = TRUE) iqr_hh_resid<-IQR(resid$member_number) ####CALCULATE % OF IDPS IN RESIDENT AREA idp_per<-round((sum(resid$idp)/sum(resid$member_number))*100,2) ``` ```{r survey_weights} ##Create survey weights for a stratified design------------------------------------------------------------------ ## create a variable called "surv_weight_strata" ## contains weights for each individual - by age group, sex and health district study_data_target <- add_weights_strata(x = study_data_target, p = population_data_age, surv_weight = "surv_weight_strata", surv_weight_ID = "surv_weight_ID_strata", age_group, sex, health_district) ``` ```{r survey_design} ## simple random --------------------------------------------------------------- survey_design_simple <- study_data_target %>% as_survey_design(ids = 1, # 1 for no cluster ids weights = NULL, # No weight added strata = NULL # sampling was simple (no strata) ) ## stratified ------------------------------------------------------------------ survey_design <- study_data_target %>% as_survey_design(ids = 1, # 1 for no cluster ids weights = surv_weight_strata, # weight variable created above strata = health_district # sampling was stratified by district ) ``` ```{r save_cleaned_data} # rio::export(study_data_cleaned, str_glue("study_data_cleaned_{Sys.Date()}.xlsx")) ``` # Results ## Population estimates We included a total of `r nrow(study_data_hh)` points in the survey to estimate population. Details on points visited can be seen in annexes. The calculation of the average shelter size was done using `r nrow(data_pop)`. The resulting total population estimate was `r total_population` (95%CI: `r total_ci`). Details of hut average size and household average size can be seen in annexes. Among the study population in the residents areas `r idp_per`% were IDPs. Population estimates by camp can be seen in the table below ### Population estimates by camp ```{r population_estimates_table, warning=FALSE, message=FALSE, echo=FALSE} # Print the results_df as a nice table using gt results_df %>% gt() %>% fmt_number( columns = vars(Population), decimals = 0 ) %>% cols_label( `95% CI` = "95% Confidence Interval" ) ``` ## Survey inclusion ```{r inclusion_counts} ## get counts of number households num_hh <- study_data_cleaned %>% ## get unique houses by cluster distinct(shelt_id, team, date) %>% ## get number of rounds (count how many unique) nrow() ``` Among the `r nrow(study_data_hh)` points included there were `r num_hh` households that were interviewed (details on non visited points can be seen in annex). Among the `r nrow(dropped)` individuals without consent to participate in the survey, the reasons for refusal are described below. ----- PLEASE ADD HERE --- ## Demographic information In total we included `r nrow(study_data_cleaned)` indiviudals in the survey analysis. Among them `r nrow(study_data_target)` were 0 to 23 months. The age break down and a statistical comparison with the source population is shown as a annex. ```{r median_age_sex_ratios} ## compute the median age medage <- median(study_data_cleaned$age_years, na.rm = TRUE) ## paste the lower and uper quartile together iqr <- str_c( # basically copy paste togehter the following ## calculate the 25% and 75% of distribution, with missings removed quantile( study_data_cleaned$age_years, c(0.25, 0.75), na.rm = TRUE), ## between lower and upper place an en-dash collapse = "--") ## compute overall sex ratio sex_ratio <- study_data_cleaned %>% count(sex) %>% pivot_wider(names_from = sex, values_from = n) %>% mutate(ratio = round(male/female, digits = 3)) %>% pull(ratio) ## compute sex ratios by age group sex_ratio_age <- study_data_cleaned %>% count(age_group2, sex) %>% pivot_wider(names_from = sex, values_from = n) %>% mutate(ratio = round(male/female, digits = 3)) %>% select(age_group2, ratio) ## sort table by ascending ratio then select the lowest (first) min_sex_ratio_age <- arrange(sex_ratio_age, ratio) %>% slice(1) ``` Among the `r nrow(study_data_cleaned)` surveyed individuals, there were `r fmt_count(study_data_cleaned, sex == "female")` females and `r fmt_count(study_data_cleaned, sex == "male")` males (unweighted). The male to female ratio was `r sex_ratio` in the surveyed population. The lowest male to female ratio was `r min_sex_ratio_age$ratio` in the `r min_sex_ratio_age$age_group` year age group (males more affected by conflict?) The median age of surveyed individuals was `r medage` years (Q1-Q3 of `r iqr` years). Children under five years of age made up `r fmt_count(study_data_cleaned, age_years < 5)`of the surveyed individuals. The highest number of surveyed indivduals (unweighted) were in the `r table(study_data_cleaned$age_group) %>% which.max() %>% names()` year age group. ### Unweighted age distribution of population by year age group and gender. ```{r describe_by_age_group_and_sex, warning = FALSE, message = FALSE } tab_linelist(study_data_cleaned, age_group2, strata = sex, row_total = TRUE, col_total = TRUE, prop_total = TRUE) %>% ## select and rename column names appropriately select("Age" = "value", "Female cases (n)" = "female n", "% F" = "female proportion", "Male cases (n)" = "male n", "% M" = "male proportion", "Total") %>% kable(digits = 1) ``` There were `r fmt_count(study_data_cleaned, is.na(sex))` cases missing information on sex and `r fmt_count(study_data_cleaned, is.na(age_group))` missing age group. ### Unweighted age and gender distribution of household population covered by the survey (population pyramid). ```{r age_pyramid, warning=FALSE} study_data_cleaned %>% ## make sure the variables are factors mutate(age_group_10 = factor(age_group_10), sex = factor(sex)) %>% age_pyramid( age_group = "age_group_10", split_by = "sex", proportion = TRUE)+ labs(y = "Proportion", x = "Age group (years)") + # change axis labels theme(legend.position = "bottom", # move legend to bottom legend.title = element_blank(), # remove title text = element_text(size = 18) # change text size ) ``` ### Unweighted age distribution of study target population by month and gender. ```{r describe_by_age_category_and_sex} tab_linelist(study_data_target, age_group_mon, strata = sex, row_total = TRUE, col_total = TRUE, prop_total = TRUE) %>% ## select and rename column names appropriately select("Age" = "value", "Female cases (n)" = "female n", "%." = "female proportion", "Male cases (n)" = "male n", "%" = "male proportion", "Total") %>% kable(digits = 1) ``` ### Unweighted age distribution of study target population by month and area. ```{r describe_by_age_category_and_area} tab_linelist(study_data_target, age_group_mon, strata = health_district, row_total = TRUE, col_total = TRUE, prop_total = TRUE) %>% ## select and rename column names appropriately select("Age" = "value", "Camp (n)" = "camp n", "%." = "camp proportion", "Resident (n)" = "resident n", "%" = "resident proportion", "Total") %>% kable(digits = 1) ``` ## Vaccination coverage Children younger than two months were excluded from the analysis as first DTP dose is at 6 weeks. In all tables for DTP (ever or at least one dose) vaccination coverage children less than two months are excluded. In all tables for coverage of DTP by number of doses children who are younger than 4 months were are excluded (as the last dose of DPT should be given at 14 weeks) When no specified otherwise the vaccination coverage tables present results accepting equal validity from self-reported and card-reported vaccination status. Tables presenting results for vaccination coverage for DTP (ever vaccinated with DTP)refer to the percentage of children who have ever received at least one dose of DTP. ### Percentage of children that has never had any vaccination ```{r ever_vac} survey_design %>% # use the survey object (weighted) ## calculate weighted counts and weighted proportions tab_survey(ever_vac, deff = FALSE) %>% ## select and rename appropriate columns select("Ever vaccinated-any antigen" = value, "Children (n)" = n, "% (95% CI)" = ci )%>% #"Design effect" = deff) %>% --> I will remove DEFF everywhere as it does not apply kable(digits = 1) ``` ### Unweighted vaccination coverage for DTP (ever vaccinated with DTP) We present unweigthed results because the main indicators (DTP vaccine coverage were the same in both locations) ```{r vaccination_coverage_overall} survey_design_simple %>% # use the survey object (weighted) ## calculate weighted counts and weighted proportions filter(age_months>1) %>% tab_survey(dtp_vaccination_status_simple, deff = FALSE) %>% ## select and rename appropriate columns select("Vaccination status" = value, "Children (n)" = n, "% (95% CI)" = ci )%>% #"Design effect" = deff) %>% --> I will remove DEFF everywhere as it does not apply kable(digits = 1) ``` ### Weighted vaccination coverage by number of doses ```{r vaccination_coverage_overall_dtp_n} survey_design_simple %>% # use the survey object (weighted) ## calculate weighted counts and weighted proportions filter(age_months>3) %>% tab_survey(dtp_doses, deff = FALSE) %>% ## select and rename appropriate columns select("Vaccination status" = value, "Children (n)" = n, "% (95% CI)" = ci )%>% #"Design effect" = deff) %>% --> I will remove DEFF everywhere as it does not apply kable(digits = 1) ``` ### Unweighted DTP (ever DTP) vaccination coverage by area (IDP or resident) Results by area will be presented unweighted as the different areas samples are not relevant and the age group strata are almost identical ```{r vaccination_coverage_strata} survey_design_simple %>% # use the survey object (weighted) filter(age_months>1) %>% tab_survey(dtp_vaccination_status_simple, strata = health_district) %>% select(-variable) %>% rename("Vaccination status" = value) %>% augment_redundant(" (n)" = " n") %>% # wrap all "n" in braces (note space before n) rename_redundant("% (95% CI)" = " ci") %>% # relabel all columns containing "ci" kable(digits = 1) ``` ### Unweighted DTP vaccination coverage by number of doses and area Including only children 4 to 23 months ```{r dtp_doses_strata} survey_design_simple %>% # use the survey object (weighted) filter (age_months>3) %>% tab_survey(dtp_doses, strata = health_district) %>% select(-variable) %>% rename("Vaccination status" = value) %>% augment_redundant(" (n)" = " n") %>% # wrap all "n" in braces (note space before n) rename_redundant("% (95% CI)" = " ci") %>% # relabel all columns containing "ci" kable(digits = 1) ``` ### Unweighted DTP vaccination coverage by number of doses and age group ```{r dtp_doses_strata_age} survey_design_simple %>% # use the survey object (weighted) #filter (age_months>3) %>% tab_survey(dtp_doses, strata = age_group_mon2) %>% select(-variable) %>% rename("Vaccination status" = value) %>% augment_redundant(" (n)" = " n") %>% # wrap all "n" in braces (note space before n) rename_redundant("% (95% CI)" = " ci") %>% # relabel all columns containing "ci" kable(digits = 1) ``` ### Unweighted DTP (ever) vaccination coverage by sex ```{r vaccination_coverage_sex} survey_design_simple %>% # use the survey object (weighted) ## calculate weighted counts and weighted proportions - stratified by sex filter(age_months>1) %>% tab_survey(dtp_vaccination_status_simple, strata = sex, deff = FALSE) %>% select(-variable) %>% rename("Vaccination status" = value) %>% augment_redundant(" (n)" = " n") %>% # wrap all "n" in braces (note space before n) rename_redundant("% (95% CI)" = " ci") %>% # relabel all columns containing "ci" kable(digits = 1) ``` ### Unweighted DTP (ever) vaccination coverage by age group In the below table we did include children who are less than two months old, because results are anyway stratified by age group and this can help understanding better the data. We present unweigthed results because the main indicators (DTP vaccine coverage) were the same in both locations. ```{r vaccination_coverage_age_group} survey_design_simple %>% tab_survey(dtp_vaccination_status_simple, strata = age_group_mon2, deff = FALSE, digits = 1, drop = "Missing", transpose = "value") %>% rename("Age group (months)" = age_group_mon2) %>% augment_redundant(" (n)" = " n") %>% # wrap all "n" in braces (note space before n) rename_redundant("% (95% CI)" = " ci") %>% # relabel all columns containing "ci" kable(digits = 1) ``` ## INTERPREATION OF RESULTS ```{r interpretations} # Target population for resident areas # AIRES DE SANTE # Aire de Sante POP 2024 # Buhimba 20040 # Mugunga 49199 total_population_res<-(20040+49199)*0.3+(20040+49199) #create tab vaccine coverage ever DTp (for less than 4 m) #Create table age groups poprop<-tab_linelist(study_data_cleaned, age_group_mon, strata = health_district, row_total = FALSE, col_total = TRUE, prop_total = FALSE) #extract values from table less_4m_idp <- poprop %>% filter(value == "0<4m") %>% pull(`camp proportion`) four_11_idp<- poprop %>% filter(value == "4<11m") %>% pull(`camp proportion`) twelve_23_idp<- poprop %>% filter(value == "12-23m") %>% pull(`camp proportion`) twentythree_59_idp<- poprop %>% filter(value == "24-59m") %>% pull(`camp proportion`) less_4m_res <- poprop %>% filter(value == "0<4m") %>% pull(`resident proportion`) four_11_res<- poprop %>% filter(value == "4<11m") %>% pull(`resident proportion`) twelve_23_res<- poprop %>% filter(value == "12-23m") %>% pull(`resident proportion`) twentythree_59_res<- poprop %>% filter(value == "24-59m") %>% pull(`resident proportion`) #Calculate populations points estimates less_4m_idp_p<-less_4m_idp/100*total_population m4to11_idp_p<-(four_11_idp/100)*total_population m12to23_idp_p<-twelve_23_idp/100*total_population m23to59_idp_p<-(twentythree_59_idp/100)*total_population less_4m_res_p<-less_4m_res/100*total_population_res m4to11_res_p<-four_11_res/100*total_population_res m12to23_res_p<-twelve_23_res/100*total_population_res m23to59_res_p<-twentythree_59_res/100*total_population_res #Calculate populations points estimates (with 95%CI, for IDPs) less_4m_idp_h<-less_4m_idp/100*CIb m4to11_idp_h<-(four_11_idp/100)*CIb m12to23_idp_h<-twelve_23_idp/100*CIb m23to59_idp_h<-(twentythree_59_idp/100)*CIb #Vaccination coverage estimates (num,bers to be updated or extratced from tables). Using same estimates for IDPs and residents #Never vaccinated VCever0to3<-0.618 VCever4to11<-0.234 VCever12to23<-0.199 #Partially vaccinated adding those with 1 dose and those with 2 doses) VCpart0to3<-0.263+0.105 VCpart4to11<-0.271+0.286 VCpart12to23<-0.078+0.265 #Chuldren needing all doses in IPDs #I consider higher 95%CIs for pop estimates all_d0_3_i<-VCever0to3*less_4m_idp_h all_d4_11_i<-VCever4to11*m4to11_idp_h all_d12_23_i<-VCever12to23*m12to23_idp_h all_d23_59_i<-VCever12to23*m23to59_idp_h #Consider same VC as 12 to 23 m #Children needing some doses in IPD #I also consider higher 95% CI and 1 and 2 doses of VC part_d0_3_i<-VCpart0to3*less_4m_idp_h part_d4_11_i<-VCpart4to11*m4to11_idp_h part_d12_23_i<-VCpart12to23*m12to23_idp_h part_d23_59_i<-VCpart12to23*m23to59_idp_h #I consider higher 95%CIs for pop estimates IDPs tot_all_idp<-round(all_d0_3_i+all_d4_11_i+all_d12_23_i+all_d23_59_i,0) #I also consider higher 95% CI and 1 and 2 doses of VC IDPs tot_part_idp<-round(part_d0_3_i+part_d4_11_i+part_d12_23_i+part_d23_59_i,0) #Chuldren needing all doses Residents #I consider higher 95%CIs for pop estimates all_d0_3_r<-VCever0to3*less_4m_res_p all_d4_11_r<-VCever4to11*m4to11_res_p all_d12_23_r<-VCever12to23*m12to23_res_p all_d23_59_r<-VCever12to23*m23to59_res_p #Consider same VC as 12 to 23 m #Children needing some doses residents #I also consider higher 95% CI and 1 and 2 doses of VC part_d0_3_r<-VCpart0to3*less_4m_res_p part_d4_11_r<-VCpart4to11*m4to11_res_p part_d12_23_r<-VCpart12to23*m12to23_res_p part_d23_59_r<-VCpart12to23*m23to59_res_p #I consider higher 95%CIs for pop estimates tot_all_r<-round(all_d0_3_r+all_d4_11_r+all_d12_23_r+all_d23_59_r,0 ) #I also consider higher 95% CI and 1 and 2 doses of VC tot_part_r<-round(part_d0_3_r+part_d4_11_r+part_d12_23_r+part_d23_59_r,0) ## totals tot_all<-tot_all_idp + tot_all_r tot_part<- round(tot_part_idp+tot_part_r,0) ##Formatting for report output tot_all_idp <- format(tot_all_idp, scientific = FALSE, big.mark = ",", nsmall = 0) tot_part_idp <- format(tot_part_idp, scientific = FALSE, big.mark = ",", nsmall = 0) tot_part_r <- format(tot_part_r, scientific = FALSE, big.mark = ",", nsmall = 0) tot_all_r<- format(tot_all_r, scientific = FALSE, big.mark = ",", nsmall = 0) tot_all<-format(tot_all, scientific = FALSE, big.mark = ",", nsmall = 0) tot_part<- format(tot_part, scientific = FALSE, big.mark = ",", nsmall = 0) ``` For the calculation in needs of vaccines we consider two sets of indicators: - Vaccination coverage estimates for the different age groups in the different areas (using the results presented in this report, and using overall coverage for both IDPs and residents, as results were similar) - Percentage of population (for IDPs we calculated this using satellite imagery and the survey and for resident areas we use MoH population estimates) #### Distribution of target population by age group (all children, inlcuding those vaccinated) (In IDPs we consider the higher 95%CI of the estimates for population estimates) ``` {r pop_inter, warning = FALSE, message= FALSE} ## Create table for target populations # Create a data frame with the given structure age_groups <- c("0-3 m", "4-11m", "12-23m", "24-59m", "Total") # Combine the vectors into a data frame table_data <- data.frame( `Age group` = age_groups, IDP = c(less_4m_idp_h, m4to11_idp_h, m12to23_idp_h, m23to59_idp_h, sum(less_4m_idp_h, m4to11_idp_h, m12to23_idp_h, m23to59_idp_h)), Residents = c(less_4m_res_p, m4to11_res_p, m12to23_res_p, m23to59_res_p, sum(less_4m_res_p, m4to11_res_p, m12to23_res_p, m23to59_res_p)), Total = c( sum(less_4m_idp_h, less_4m_res_p), sum(m4to11_idp_h, m4to11_res_p), sum(m12to23_idp_h, m12to23_res_p), sum(m23to59_idp_h, m23to59_res_p), sum(less_4m_idp_h, m4to11_idp_h, m12to23_idp_h, m23to59_idp_h, less_4m_res_p, m4to11_res_p, m12to23_res_p, m23to59_res_p) ) ) kable(table_data) ``` ### Calculations of target population considering vaccination status #### Children needing all doses The estimated number of children needing ALL doses in IDP camps is: `r tot_all_idp` The estimated number of children needing ALL doses in reesident areas is: `r tot_all_r` **The total of children needing ALL doses would be `r tot_all`** #### Children needing partial doses The estimated number of children needing SOME doses in IDP camps is: `r tot_part_idp` The estimated number of children needing SOME doses in resident areas is: `r tot_part_r` **The total of children needing SOME doses would be `r tot_part`** #### Assumptions - We consider that >24 # Annexes ### Unweighted age and gender distribution, stratified by health district, of household population covered by the survey (population pyramid). ```{r age_pyramid_strata, warning=FALSE} study_data_cleaned %>% ## make sure the variables are factors mutate(age_group_10 = factor(age_group_10), sex = factor(sex)) %>% age_pyramid( age_group = "age_group_10", split_by = "sex", stack_by = "health_district", proportion = TRUE)+ labs(y = "Proportion", x = "Age group (years)") + # change axis labels theme(legend.position = "bottom", # move legend to bottom legend.title = element_blank(), # remove title text = element_text(size = 18) # change text size ) ``` ### Unweighted distribution of participants by age groups and area ```{r describe_by_age_category_and_area2} tab_linelist(study_data_cleaned, age_group_mon, strata = health_district, row_total = FALSE, col_total = TRUE, prop_total = FALSE) %>% ## select and rename column names appropriately select("Age" = "value", "Camp (n)" = "camp n", "%." = "camp proportion", "Resident (n)" = "resident n", "%" = "resident proportion") %>% kable(digits = 1) ``` ```{r describe_by_age_group_and_area, warning = FALSE, message = FALSE } tab_linelist(study_data_cleaned, age_group2, strata = health_district, row_total = FALSE, col_total = TRUE, prop_total = FALSE) %>% ## select and rename column names appropriately select("Age group" = "value", "Camp (n)" = "camp n", "% camp" = "camp proportion", "Resident (n)" = "resident n", "% Resident" = "resident proportion") %>% kable(digits = 1) ``` ### Weighted DTP ever vaccination coverage; distinguishing between vaccination cards and verbal confirmation ```{r vaccination_coverage_overall_detail} survey_design %>% # use the survey object (weighted) ## calculate weighted counts and weighted proportions filter(age_months>1) %>% tab_survey(dtp_vaccination_status) %>% select("Vaccination status" = value, "Children (n)" = n, "% (95% CI)" = ci) %>% kable(digits = 1) ``` ### Details of shelter average size, household average size and propoportion of IDPs living in resident areas The mean shelter size in the IDP camps was: `r hut_mean`, 95%CI:`r hut_meanCI` Points that were considered as zero for the calculation of the average shelter size for population estimates: - In `r nrow(not_sleep)` of the visited points the structure identified by satellite images and the mapathon was not a shelter used for sleeping or was not a shelter anymore. - In `r nrow(empty_shelters)` the structures were or had been used for sleeping but were empty (nobody had slept there the night before). Points that were discarded (considered NA) for the average shelter size calculations: - There were `r nrow(dropped)` points that were inhabited but the owner refused participation. - There were `r nrow(not_know_empty)` points for which the onwers were not found and the team could not establish if somebody had slept in the shelter. #### Median and mean household sizes by location ```{r median_mean_hh} # Create a data frame with the desired structure table_data <- data.frame( Location = c("Residents", "IDPs", "All"), Median = c(median_hh_res, median_hh_idp, median_hh), Median_IQR = c(iqr_hh_resid, iqr_hh_idp, iqr_hh), Mean = c(mean_hh_res, mean_hh_idp, mean_hh), Mean_95_CI = c(mean_hhCI_res, mean_hhCI_idp, mean_hhCI) ) # Use gt to create a styled table mean_med_hh <- table_data %>% gt() %>% cols_label( Median_IQR = "Median-IQR", Mean_95_CI = "Mean-95%CI" ) %>% tab_style( style = list( cell_text(weight = "bold") ), locations = cells_column_labels(everything()) ) # Print the gt table mean_med_hh ``` ### Distribution of households by month of arrival ```{r hh_month_arrival} # Create the histogram ggplot(study_data_cleaned, aes(x = month_hh_arriv)) + geom_bar() + xlab("Month-Year") + ylab("Count") + ggtitle("Distribution of Households by month of arrival") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) ``` ### Analysis of potential sampling bias The p-value for certain groups is less than 0.05, but the percentage of children sampled by age group it´s less than 1% different to what we expected (in all age groups). We consider this to be a very succesful sample despite the low sample size we aimed for. ```{r descriptive_sampling_bias, warning = FALSE} ## counts and props of the study population ag <- tab_linelist(study_data_cleaned, "age_group") %>% mutate(n_total = sum(n)) %>% select(-variable) %>% rename(age_group = value) %>% mutate(age_group = fct_inorder(age_group)) ## counts and props of the source population propcount <- group_by(population_data_age, age_group) %>% tally(population) %>% mutate(proportion = n / sum(n)) ## bind together the columns of two tables, group by age, and perform a ## binomial test to see if n/total is significantly different from population ## proportion. ## suffix here adds to text to the end of columns in each of the two datasets left_join(ag, propcount, by = "age_group", suffix = c("", "_pop")) %>% group_by(age_group) %>% ## broom::tidy(binom.test()) makes a data frame out of the binomial test and ## will add the variables p.value, parameter, conf.low, conf.high, method, and ## alternative. We will only use p.value here. You can include other ## columns if you want to report confidence intervals mutate(binom = list(broom::tidy(binom.test(n, n_total, proportion_pop)))) %>% unnest(cols = c(binom)) %>% # important for expanding the binom.test data frame mutate(proportion_pop = proportion_pop * 100) %>% ## Adjusting the p-values to correct for false positives ## (because testing multiple age groups). This will only make ## a difference if you have many age categories mutate(p.value = p.adjust(p.value, method = "holm")) %>% select(age_group, n, proportion, n_pop, proportion_pop, p.value) %>% ## Only show p-values over 0.001 (those under report as <0.001) mutate(p.value = ifelse(p.value < 0.001, "<0.001", as.character(round(p.value, 3)))) %>% ## rename the columns appropriatley rename( "Age group" = age_group, "Study population (n)" = n, "% study pop" = proportion, "Source population (n)" = n_pop, "% source pop" = proportion_pop, "P-value" = p.value ) %>% kable(digits = 1) ```