Set up packages and connect to RStudio Connect board
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.2.1 ✔ purrr 0.3.3
## ✔ tibble 2.1.3 ✔ dplyr 0.8.4
## ✔ tidyr 1.0.2 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(odbc)
con <- dbConnect(odbc::odbc(), "Content DB", timeout = 10)
pins::board_register_rsconnect(server = Sys.getenv("CONNECT_SERVER"),
key = Sys.getenv("CONNECT_API_KEY"))
Connect to Database Data
DBI::dbWriteTable(con, "bike_station_info", pins::pin_get("alex.gold/bike_station_info"), overwrite = TRUE)
df_con <- dplyr::tbl(con, "bike_raw_data")
## Found more than one class "ident" in cache; using the first, from namespace 'dplyr'
## Also defined by 'dbplyr'
## Found more than one class "ident" in cache; using the first, from namespace 'dplyr'
## Also defined by 'dbplyr'
stats <- dplyr::tbl(con, "bike_station_info")
Clean data
db_drop_table(con, "bike_model_data", force = TRUE)
## [1] 0
query <- df_con %>%
group_by(
id = station_id,
hour = hour(time),
date = date(time),
month = month(time),
dow = TRIM(to_char(time, "Day"))
) %>%
summarize(
n_bikes = mean(num_bikes_available, na.rm = TRUE)
) %>%
inner_join(
select(stats, id = station_id, lat, lon)
) %>%
dbplyr::sql_render() %>%
stringr::str_replace("SELECT", "CREATE TABLE bike_model_data AS SELECT")
## Joining, by = "id"
dbSendQuery(con, query)
## <OdbcResult>
## SQL CREATE TABLE bike_model_data AS SELECT "LHS"."id" AS "id", "LHS"."hour" AS "hour", "LHS"."date" AS "date", "LHS"."month" AS "month", "LHS"."dow" AS "dow", "LHS"."n_bikes" AS "n_bikes", "RHS"."lat" AS "lat", "RHS"."lon" AS "lon"
## FROM (SELECT "id", "hour", "date", "month", "dow", AVG("num_bikes_available") AS "n_bikes"
## FROM (SELECT "station_id", "num_bikes_available", "num_ebikes_available", "num_bikes_disabled", "num_docks_available", "num_docks_disabled", "is_installed", "is_renting", "is_returning", "last_reported", "eightd_has_available_keys", "time", "station_id" AS "id", EXTRACT(hour FROM "time") AS "hour", date("time") AS "date", EXTRACT(MONTH FROM "time") AS "month", TRIM(to_char("time", 'Day')) AS "dow"
## FROM "bike_raw_data") "dbplyr_001"
## GROUP BY "id", "hour", "date", "month", "dow") "LHS"
## INNER JOIN (SELECT "station_id" AS "id", "lat", "lon"
## FROM "bike_station_info") "RHS"
## ON ("LHS"."id" = "RHS"."id")
##
## ROWS Fetched: 0 [complete]
## Changed: 15060921
tbl(con, "bike_model_data")
## # Source: table<bike_model_data> [?? x 8]
## # Database: postgres [soleng@localhost:/rds]
## id hour date month dow n_bikes lat lon
## <chr> <dbl> <date> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 1 0 2019-09-21 9 Saturday 4.33 38.9 -77.1
## 2 1 0 2019-09-22 9 Sunday 3 38.9 -77.1
## 3 1 0 2019-09-23 9 Monday 11.3 38.9 -77.1
## 4 1 0 2019-09-24 9 Tuesday 9 38.9 -77.1
## 5 1 0 2019-09-25 9 Wednesday 8 38.9 -77.1
## 6 1 0 2019-09-26 9 Thursday 5 38.9 -77.1
## 7 1 0 2019-09-27 9 Friday 10 38.9 -77.1
## 8 1 0 2019-09-28 9 Saturday 14 38.9 -77.1
## 9 1 0 2019-09-29 9 Sunday 0 38.9 -77.1
## 10 1 0 2019-09-30 9 Monday 9 38.9 -77.1
## # … with more rows
dbDisconnect(con)