library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dbplyr)
##
## Attaching package: 'dbplyr'
## The following objects are masked from 'package:dplyr':
##
## ident, sql
library(bikeHelpR)
library(recipes)
##
## Attaching package: 'recipes'
## The following object is masked from 'package:stats':
##
## step
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
con <- DBI::dbConnect(odbc::odbc(), "Content DB")
df <- tbl(con, "bike_model_data")
pins::board_register_rsconnect(
server = "https://colorado.rstudio.com/rsc",
key = Sys.getenv("RSTUDIOCONNECT_API_KEY")
)
n_days_test <- 2
months_train <- 6
dates <- df %>%
count(date) %>%
arrange(desc(date)) %>%
head(n_days_test + 1) %>%
pull(date) %>%
as.Date()
split_date <- dates[n_days_test + 1]
start_train_date <- split_date %m-% months(months_train)
test_dates <- dates[1:(length(dates) - 1)]
test_dates_str <- paste(test_dates, collapse = " and ")
print(glue::glue(
"Using data on or before {min(test_dates)} as training, data from {test_dates_str} to test."
))
## Using data on or before 2020-12-28 as training, data from 2020-12-29 and 2020-12-28 to test.
print(glue::glue("Using data between {start_train_date} and {split_date} for training."))
## Using data between 2020-06-27 and 2020-12-27 for training.
train_dat <- df %>%
dplyr::filter(
date <= split_date,
date >= start_train_date
) %>%
dplyr::collect()
recipe <- recipe(n_bikes ~ ., data = train_dat) %>%
step_dummy(dow) %>%
prep(train_dat, retain = FALSE)
pins::pin(
list(
train_date = Sys.Date(),
split_date = split_date,
train_start = start_train_date,
recipe = recipe),
"bike_model_params", "Parameters for Creating Training Dataset",
board = "rsconnect"
)
DBI::dbDisconnect(con)