Set up packages and connect to RStudio Connect board

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.2.1          ✔ purrr   0.3.4     
## ✔ tibble  3.0.1          ✔ dplyr   1.0.0     
## ✔ tidyr   1.0.2.9000     ✔ stringr 1.4.0     
## ✔ readr   1.3.1          ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(odbc)

con <- dbConnect(odbc::odbc(), "Content DB", timeout = 10)
pins::board_register_rsconnect(server = "https://colorado.rstudio.com/rsc",
                               key = Sys.getenv("RSTUDIOCONNECT_API_KEY"))

Connect to Database Data

DBI::dbWriteTable(con, "bike_station_info", pins::pin_get("alex.gold/bike_station_info"), overwrite = TRUE)

df_con <- dplyr::tbl(con, "bike_raw_data")
stats <- dplyr::tbl(con, "bike_station_info")

Clean data

db_drop_table(con, "bike_model_data", force = TRUE)
## [1] 0
query <- df_con %>% 
    group_by(
        id = station_id, 
        hour = hour(time), 
        date = date(time), 
        month = month(time), 
        dow = TRIM(to_char(time, "Day"))
    ) %>%
    summarize(
        n_bikes = mean(num_bikes_available, na.rm = TRUE)
    ) %>%
    inner_join(
        select(stats, id = station_id, lat, lon)
    ) %>%
    dbplyr::sql_render() %>%
    stringr::str_replace("SELECT", "CREATE TABLE bike_model_data AS SELECT")
## Joining, by = "id"
dbSendQuery(con, query)
## <OdbcResult>
##   SQL  CREATE TABLE bike_model_data AS SELECT "LHS"."id" AS "id", "LHS"."hour" AS "hour", "LHS"."date" AS "date", "LHS"."month" AS "month", "LHS"."dow" AS "dow", "LHS"."n_bikes" AS "n_bikes", "RHS"."lat" AS "lat", "RHS"."lon" AS "lon"
## FROM (SELECT "id", "hour", "date", "month", "dow", AVG("num_bikes_available") AS "n_bikes"
## FROM (SELECT "station_id", "num_bikes_available", "num_ebikes_available", "num_bikes_disabled", "num_docks_available", "num_docks_disabled", "is_installed", "is_renting", "is_returning", "last_reported", "eightd_has_available_keys", "time", "station_id" AS "id", EXTRACT(hour FROM "time") AS "hour", date("time") AS "date", EXTRACT(MONTH FROM "time") AS "month", TRIM(to_char("time", 'Day')) AS "dow"
## FROM "bike_raw_data") "dbplyr_001"
## GROUP BY "id", "hour", "date", "month", "dow") "LHS"
## INNER JOIN (SELECT "station_id" AS "id", "lat", "lon"
## FROM "bike_station_info") "RHS"
## ON ("LHS"."id" = "RHS"."id")
## 
##   ROWS Fetched: 0 [complete]
##        Changed: 5180406
tbl(con, "bike_model_data")
## # Source:   table<bike_model_data> [?? x 8]
## # Database: postgres [content@localhost:/rds]
##    id     hour date       month dow       n_bikes   lat   lon
##    <chr> <dbl> <date>     <dbl> <chr>       <dbl> <dbl> <dbl>
##  1 1         0 2019-09-21     9 Saturday     4.33  38.9 -77.1
##  2 1         0 2019-09-22     9 Sunday       3     38.9 -77.1
##  3 1         0 2019-09-23     9 Monday      11.3   38.9 -77.1
##  4 1         0 2019-09-24     9 Tuesday      9     38.9 -77.1
##  5 1         0 2019-09-25     9 Wednesday    8     38.9 -77.1
##  6 1         0 2019-09-26     9 Thursday     5     38.9 -77.1
##  7 1         0 2019-09-27     9 Friday      10     38.9 -77.1
##  8 1         0 2019-09-28     9 Saturday    14     38.9 -77.1
##  9 1         0 2019-09-29     9 Sunday       0     38.9 -77.1
## 10 1         0 2019-09-30     9 Monday       9     38.9 -77.1
## # … with more rows