This report runs after the new data has been cleansed. The report pulls the cleaned data and loads the latest trained model, and then scores the data with the new model, over-writing any previous model scores.
library(magrittr)
library(bikeHelpR)
library(dbplyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:dbplyr':
##
## ident, sql
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(xgboost)
##
## Attaching package: 'xgboost'
## The following object is masked from 'package:dplyr':
##
## slice
library(yardstick)
## For binary classification, the first factor level is assumed to be the event.
## Set the global option `yardstick.event_first` to `FALSE` to change this.
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 0.1.0 ──
## ✔ broom 0.5.6 ✔ recipes 0.1.13
## ✔ dials 0.0.7 ✔ rsample 0.0.7
## ✔ ggplot2 3.3.2 ✔ tibble 3.0.1
## ✔ infer 0.5.2 ✔ tune 0.1.0
## ✔ parsnip 0.1.1 ✔ workflows 0.1.1
## ✔ purrr 0.3.4
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::ident() masks dbplyr::ident()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::set_names() masks magrittr::set_names()
## ✖ xgboost::slice() masks dplyr::slice()
## ✖ dplyr::sql() masks dbplyr::sql()
## ✖ recipes::step() masks stats::step()
library(RPostgreSQL)
## Loading required package: DBI
con <- DBI::dbConnect(odbc::odbc(), "Content DB")
pins::board_register_rsconnect(server = "https://colorado.rstudio.com/rsc",
key = Sys.getenv("RSTUDIOCONNECT_API_KEY"))
model_details <- pins::pin_get("bike_model_rxgb", board = "rsconnect")
model <- model_details$model
recipe <- model_details$recipe
train_date <- model_details$train_date
split_date <- model_details$split_date
all_days <- tbl(con, "bike_model_data") %>%
collect()
# prep and predict
predictions <- all_days %>%
bake(recipe, .) %>%
select(-id, -date, -n_bikes) %>%
as.matrix() %>%
predict(model, .)
## [10:01:46] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
results <- all_days %>%
transmute(
id = id,
hour = hour,
date = date,
preds = predictions,
residuals = n_bikes - preds,
actual = n_bikes,
upload_time = Sys.time(),
train_date = train_date,
model = "rxgb"
)
db_drop_table(con, "bike_pred_data", force = TRUE)
## [1] 0
drv <- dbDriver("PostgreSQL")
con2 <- RPostgreSQL::dbConnect(drv,
host = "soleng-content-rds1.cloiraotshw4.us-east-1.rds.amazonaws.com",
dbname = "rds",
user = Sys.getenv("CONTENT_DB_USER"),
password = Sys.getenv("CONTENT_DB_PWD")
)
RPostgreSQL::dbWriteTable(con2, "bike_pred_data",results)
## [1] TRUE
We summarize the model results based on three windows: the model’s original training window (stored alongside the model in the model_details
pin), the new “test” data that has arrived after the training window, the latest data from the last time this report ran.
train_res <- results %>%
filter(date < split_date)
oos_metrics(train_res$actual, train_res$preds)
## # A tibble: 1 x 4
## rmse mae ccc r2
## <dbl> <dbl> <dbl> <dbl>
## 1 4.41 3.28 0.595 0.357
test_res <- results %>%
filter(date >= split_date)
oos_metrics(test_res$actual, test_res$preds)
## # A tibble: 1 x 4
## rmse mae ccc r2
## <dbl> <dbl> <dbl> <dbl>
## 1 3.25 2.40 0.755 0.583
latest_day <- results %>%
slice_max(order_by = date, n=1, with_ties = FALSE) %>%
pull(date)
latest_res <- results %>%
filter(date == latest_day)
oos_metrics(latest_res$actual, latest_res$preds)
## # A tibble: 1 x 4
## rmse mae ccc r2
## <dbl> <dbl> <dbl> <dbl>
## 1 3.58 2.72 0.717 0.543
DBI::dbDisconnect(con)
RPostgreSQL::dbDisconnect(con2)
## [1] TRUE