COVID-19:
#'
#' Load COVID-19 spread: infected, recovered, and fatal cases
#' Source: https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series
#'
load_covid_spread <- function() {
require(dplyr)
require(data.table)
require(purrr)
require(tidyr)
load_time_series <- function(.case_type) {
path_pattern <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_%s_global.csv"
fread(sprintf(path_pattern, .case_type)) %>%
rename(country = `Country/Region`) %>%
select(-c(`Province/State`, Lat, Long)) %>%
group_by(country) %>%
summarise_if(is.numeric, sum) %>%
ungroup %>%
gather(key = "date", value = "n", -country) %>%
mutate(date = mdy(date))
}
dt <- load_time_series("confirmed") %>% rename(confirmed_n = n) %>%
inner_join(
load_time_series("recovered") %>% rename(recovered_n = n),
by = c("country", "date")
) %>%
inner_join(
load_time_series("deaths") %>% rename(deaths_n = n),
by = c("country", "date")
)
stopifnot(nrow(dt) > 0)
return(dt)
}
spread_raw <- load_covid_spread()
spread_raw %>% sample_n(10)
:
#'
#' Load countries stats
#' Source: https://ods.ai/competitions/sberbank-covid19-forecast
#'
load_countries_stats <- function() {
require(dplyr)
require(magrittr)
dt <- fread("https://raw.githubusercontent.com/codez0mb1e/covid-19/master/data/countries.csv")
dt %<>%
select(-c(iso_alpha2, iso_numeric, name, official_name))
stopifnot(nrow(dt) > 0)
return(dt)
}
countries_raw <- load_countries_stats()
countries_raw %>% sample_n(10)
:
data <- spread_raw %>%
# add country population
inner_join(
countries_raw %>% transmute(ccse_name, country_iso = iso_alpha3, population) %>% filter(!is.na(country_iso)),
by = c("country" = "ccse_name")
) %>%
# calculate active cases
mutate(
active_n = confirmed_n - recovered_n - deaths_n
) %>%
# calculate cases per 1M population
mutate_at(
vars(ends_with("_n")),
list("per_1M" = ~ ./population*1e6)
)
## Calculte number of days since...
get_date_since <- function(dt, .case_type, .n) {
dt %>%
group_by(country) %>%
filter_at(vars(.case_type), ~ . > .n) %>%
summarise(since_date = min(date))
}
data %<>%
inner_join(
data %>% get_date_since("confirmed_n", 0) %>% rename(since_1st_confirmed_date = since_date),
by = "country"
) %>%
inner_join(
data %>% get_date_since("confirmed_n_per_1M", 1) %>% rename(since_1_confirmed_per_1M_date = since_date),
by = "country"
) %>%
inner_join(
data %>% get_date_since("deaths_n_per_1M", .1) %>% rename(since_dot1_deaths_per_1M_date = since_date),
by = "country"
) %>%
mutate_at(
vars(starts_with("since_")),
list("n_days" = ~ difftime(date, ., units = "days") %>% as.numeric)
) %>%
mutate_at(
vars(ends_with("n_days")),
list(~ if_else(. > 0, ., NA_real_))
)
:
theme_set(theme_minimal())
lab_caption <- paste0(
"Data source: Novel Coronavirus (COVID-19) Cases provided by Johns Hopkins University Center for Systems Science. \n",
sprintf("Last updated: %s. ", format(max(data$date), '%d %B, %Y')),
"Source code: github.com/codez0mb1e/covid-19"
)
filter_countries <- function(dt) dt %>% filter(country_iso %in% c("KOR", "ITA", "RUS", "CHN", "USA"))