---
title: "SALURBAL Google Analytics Ingestions"
author:
- name: Ran Li (Maintainer)
orcid: 0000-0002-4699-4755
date: last-modified
format:
html:
page-layout: full
toc: true
self-contained: true
code-fold: true
df-print: kable
code-tools: true
comments:
hypothesis:
theme: clean
editor: source
execute:
warning: false
message: false
eval: true
editor_options:
chunk_output_type: console
---
```{r}
#| code-summary: "Setup Code"
#| echo: false
library(pacman)
p_load(tidyverse, arrow, googleAnalyticsR, lubridate, reactable, janitor)
## local parametrs
start_date = "2022-10-01"
end_date = 'today'
```
This notebook ingests google analytics data for our datawarehouse we will leverage the `googleAnalyticsR` package.
# Properties
```{r}
#| code-summary: "Connect to Google Analytics"
#| echo: false
ga_auth(email="ranli2011@gmail.com")
# ga_auth()
# account_df <- ga_accounts()
# account_df$id
```
```{r}
#| code-summary: "Authenticate if needd"
#| echo: false
#| eval: false
ga_auth()
account_df <- ga_accounts()
account_df$id
```
Lets get a list of all of our google analytics accounts - focusing on SALURBAL
```{r}
#| code-summary: "Get SALURBAL GA properties"
#| eval: false
## All SALURBAL Properties
salurbal_ga = ga_account_list("ga4") %>%
filter(str_detect(property_name, "salurbal"))
## Portal Property
ga_portal = salurbal_ga %>%
filter(property_name == 'salurbal-data-portal')
property_id = ga_portal$propertyId
salurbal_ga
```
Here is a table of available dimensions and metrics available for us to analyze.
```{r}
#| code-summary: "Google Analytics codebook"
df_metrics_available = ga_meta(
version = c("universal", "data"),
propertyId = property_id,
cached = TRUE,
no_api = FALSE
) %>%
as_tibble()%>%
filter(status != "DEPRECATED")
df_metrics_available %>%
select(type, name = uiName, description) %>%
arrange(desc(type), name) %>%
reactable(searchable = TRUE)
```
# Portal
Lets get some basic monthly information about SALURABL portal. Lets see what is available for this property.
## Trends
### Download
Let's just get basic metrics.
```{r}
#| code-summary: "Get basic metrics"
if (!file.exists('metadata_cache/df_daily.parquet')){
df_daily = ga_data(
property_id,
metrics = c(
"sessions",
"newUsers",
"activeUsers",
"screenPageViews",
"bounceRate",
"averageSessionDuration",
"eventsPerSession"
),
dimensions = c('date'),
date_range = c(start_date, end_date),
limit = -1
) %>%
arrange(desc(date))
df_daily %>%
clean_names()%>%
write_parquet("metadata_cache/df_daily.parquet")
} else {
df_daily = read_parquet("metadata_cache/df_daily.parquet")
}
df_daily %>%
reactable()
```
### Metrics
#### New users by Month
This is the one metric we have on the usage dashboard. Let's summarize new users by month.
```{r}
df_monthly_new_users = df_daily %>%
select(date, new_users ) %>%
mutate(year = year(date),
month = month(date),
year_month_str = glue::glue("{year}-{month}")) %>%
group_by(year_month_str) %>%
summarise(new_users = sum(new_users)) %>%
ungroup() %>%
mutate(date = ymd(glue::glue("{year_month_str}-01"))) %>%
## format date as MM dd, YYYY
mutate(Name = format(date, "%B %d, %Y")) %>%
arrange(desc(date))%>%
select(Name, `New Users` = new_users, Date = date)
df_monthly_new_users %>%
write_csv("metadata_cache/df_monthly_new_users.csv")
df_monthly_new_users %>%
jsonlite::write_json("metadata_cache/df_monthly_new_users.json", auto_unbox = TRUE, pretty = T)
```
Let's preview this
```{r}
df_monthly_new_users %>%
reactable()
```
## Stratafied trends
```{r}
if (!file.exists('metadata_cache/df_daily_stratafied.parquet')){
df_daily_stratafied = ga_data(
property_id,
metrics = c(
"sessions",
"newUsers",
"activeUsers",
"screenPageViews",
"bounceRate",
"averageSessionDuration",
"eventsPerSession"
),
dimensions = c(
"date",
"newVsReturning",
"deviceCategory",
"sessionSource",
"sessionMedium"
),
date_range = c(start_date, end_date),
limit = -1
) %>%
arrange(desc(date))
df_daily_stratafied %>%
write_parquet("metadata_cache/df_daily_stratafied.parquet")
} else {
df_daily_stratafied = read_parquet("metadata_cache/df_daily_stratafied.parquet")
}
df_daily_stratafied %>%
reactable()
```
## Cohort Analysis
Chekc auth
```{r}
```
Get cohorsts data
```{r}
ga_meta(
version = c("data"),
propertyId = property_id,
cached = TRUE,
no_api = FALSE
) %>% View()
ga_data(
property_id,
metrics = c(
"cohortActiveUsers",
"cohortTotalUsers"
),
dimensions = c('cohort','cohortNthMonth'),
date_range = c(start_date, end_date),
limit = -1
) %>%
arrange(desc(date))
google_analytics(
viewId = property_id,
date_range = c(start_date, end_date),
metrics = c("sessions"),
)
```