This report runs after the new data has been cleansed. The report pulls the cleaned data and loads the latest trained model, and then scores the data with the new model, over-writing any previous model scores.

library(magrittr)
library(bikeHelpR)
library(dbplyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:dbplyr':
## 
##     ident, sql
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(xgboost)
## 
## Attaching package: 'xgboost'
## The following object is masked from 'package:dplyr':
## 
##     slice
library(yardstick)
## For binary classification, the first factor level is assumed to be the event.
## Set the global option `yardstick.event_first` to `FALSE` to change this.
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 0.1.0 ──
## ✔ broom     0.5.6      ✔ recipes   0.1.13
## ✔ dials     0.0.7      ✔ rsample   0.0.7 
## ✔ ggplot2   3.3.2      ✔ tibble    3.0.1 
## ✔ infer     0.5.2      ✔ tune      0.1.0 
## ✔ parsnip   0.1.1      ✔ workflows 0.1.1 
## ✔ purrr     0.3.4
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard()   masks scales::discard()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::ident()     masks dbplyr::ident()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ purrr::set_names() masks magrittr::set_names()
## ✖ xgboost::slice()   masks dplyr::slice()
## ✖ dplyr::sql()       masks dbplyr::sql()
## ✖ recipes::step()    masks stats::step()
library(RPostgreSQL)
## Loading required package: DBI
con <- DBI::dbConnect(odbc::odbc(), "Content DB")
pins::board_register_rsconnect(server = "https://colorado.rstudio.com/rsc",
                               key = Sys.getenv("RSTUDIOCONNECT_API_KEY"))

Load model

model_details <- pins::pin_get("bike_model_rxgb", board = "rsconnect")
model <- model_details$model
recipe <- model_details$recipe
train_date <- model_details$train_date
split_date <- model_details$split_date

Score all data with model

all_days <- tbl(con, "bike_model_data") %>% 
  collect()

# prep and predict
predictions <- all_days %>% 
  bake(recipe, .) %>% 
  select(-id, -date, -n_bikes) %>% 
  as.matrix() %>% 
  predict(model, .)
## [10:01:46] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
results <- all_days %>% 
  transmute(
         id = id,
         hour = hour,
         date = date,
         preds = predictions,
         residuals = n_bikes - preds,
         actual = n_bikes,
         upload_time = Sys.time(),
         train_date = train_date,
         model = "rxgb"
  )

Write predictions to database

db_drop_table(con, "bike_pred_data", force = TRUE)
## [1] 0
drv <- dbDriver("PostgreSQL")
con2 <- RPostgreSQL::dbConnect(drv, 
  host = "soleng-content-rds1.cloiraotshw4.us-east-1.rds.amazonaws.com",
  dbname = "rds",
  user = Sys.getenv("CONTENT_DB_USER"),
  password = Sys.getenv("CONTENT_DB_PWD")
)
RPostgreSQL::dbWriteTable(con2, "bike_pred_data",results)
## [1] TRUE

Summarize model accuracy

We summarize the model results based on three windows: the model’s original training window (stored alongside the model in the model_details pin), the new “test” data that has arrived after the training window, the latest data from the last time this report ran.

Training

train_res <- results %>% 
  filter(date < split_date) 
oos_metrics(train_res$actual, train_res$preds)
## # A tibble: 1 x 4
##    rmse   mae   ccc    r2
##   <dbl> <dbl> <dbl> <dbl>
## 1  4.41  3.28 0.595 0.357

Test

test_res <- results %>% 
  filter(date >= split_date) 
oos_metrics(test_res$actual, test_res$preds)
## # A tibble: 1 x 4
##    rmse   mae   ccc    r2
##   <dbl> <dbl> <dbl> <dbl>
## 1  3.25  2.40 0.755 0.583

Latest

latest_day <- results %>% 
    slice_max(order_by = date, n=1, with_ties = FALSE) %>% 
    pull(date)
latest_res <- results %>% 
  filter(date == latest_day) 
oos_metrics(latest_res$actual, latest_res$preds)
## # A tibble: 1 x 4
##    rmse   mae   ccc    r2
##   <dbl> <dbl> <dbl> <dbl>
## 1  3.58  2.72 0.717 0.543
DBI::dbDisconnect(con)
RPostgreSQL::dbDisconnect(con2)
## [1] TRUE