From 7233f80ca88af6b0b3a8dd9e41fc8caf2e78a4ef Mon Sep 17 00:00:00 2001 From: stevenc81 Date: Wed, 5 Nov 2025 15:28:42 +0800 Subject: [PATCH 1/2] uses buffer-data project backends --- tools/cloud_functions/bq_table_snapshots/.gitignore | 11 +++++++++++ .../bq_table_snapshots/terraform/backend.tf | 5 ++--- .../bq_table_snapshots/terraform/function.tf | 5 +++-- .../bq_table_snapshots/terraform/terraform.tfvars | 10 +++++----- 4 files changed, 21 insertions(+), 10 deletions(-) create mode 100644 tools/cloud_functions/bq_table_snapshots/.gitignore diff --git a/tools/cloud_functions/bq_table_snapshots/.gitignore b/tools/cloud_functions/bq_table_snapshots/.gitignore new file mode 100644 index 000000000..49085668f --- /dev/null +++ b/tools/cloud_functions/bq_table_snapshots/.gitignore @@ -0,0 +1,11 @@ +# Cache files +.channels_cache_v2.json +.users_cache.json +*_cache*.json + +# Terraform files +.terraform/ +.terraform.lock.hcl +*.tfstate +*.tfstate.* +terraform.tfvars.backup diff --git a/tools/cloud_functions/bq_table_snapshots/terraform/backend.tf b/tools/cloud_functions/bq_table_snapshots/terraform/backend.tf index fbfee8eda..276ac7025 100644 --- a/tools/cloud_functions/bq_table_snapshots/terraform/backend.tf +++ b/tools/cloud_functions/bq_table_snapshots/terraform/backend.tf @@ -14,8 +14,7 @@ terraform { backend "gcs" { - bucket = "YOUR_TF_STATE_CLOUD_STORAGE_BUCKET" - prefix = "terraform/state" + bucket = "terraform-state-backend-for-project-buffer-data" + prefix = "terraform/state/bq-snapshots" } } - diff --git a/tools/cloud_functions/bq_table_snapshots/terraform/function.tf b/tools/cloud_functions/bq_table_snapshots/terraform/function.tf index 2ef205d8c..6d8ac6438 100644 --- a/tools/cloud_functions/bq_table_snapshots/terraform/function.tf +++ b/tools/cloud_functions/bq_table_snapshots/terraform/function.tf @@ -29,8 +29,9 @@ resource "random_id" "bucket_prefix" { # BQ Target Dataset # ########################################## resource "google_bigquery_dataset" "dataset" { - project = var.storage_project_id - dataset_id = var.target_dataset_name + project = var.storage_project_id + dataset_id = var.target_dataset_name + delete_contents_on_destroy = true } ########################################## diff --git a/tools/cloud_functions/bq_table_snapshots/terraform/terraform.tfvars b/tools/cloud_functions/bq_table_snapshots/terraform/terraform.tfvars index b9fe2ad09..1b1093161 100644 --- a/tools/cloud_functions/bq_table_snapshots/terraform/terraform.tfvars +++ b/tools/cloud_functions/bq_table_snapshots/terraform/terraform.tfvars @@ -1,6 +1,6 @@ -project_id = "YOUR_CLOUD_FUNCTIONS_PROJECT_ID" -storage_project_id = "YOUR_BIGQUERY_PROJECT_ID" -source_dataset_name = "YOUR_BIGQUERY_DATASET_ID" -target_dataset_name = "YOUR_SNAPSHOT_DATASET_ID" -crontab_format = "10 * * * *" +project_id = "buffer-data" +storage_project_id = "buffer-data" +source_dataset_name = "dbt_steven" +target_dataset_name = "dbt_steven_snapshots" +crontab_format = "15 * * * *" seconds_before_expiration = 604800 From 75163cb92d190d4e02dc6b4891f0b904e1941fe6 Mon Sep 17 00:00:00 2001 From: stevenc81 Date: Thu, 6 Nov 2025 13:02:28 +0800 Subject: [PATCH 2/2] use a seperate service account with more restrictive permission levels --- .../bq_table_snapshots/terraform/function.tf | 13 ++ .../bq_table_snapshots/terraform/iam.tf | 132 ++++++++++++++++++ .../terraform/terraform.tfvars | 8 +- 3 files changed, 149 insertions(+), 4 deletions(-) create mode 100644 tools/cloud_functions/bq_table_snapshots/terraform/iam.tf diff --git a/tools/cloud_functions/bq_table_snapshots/terraform/function.tf b/tools/cloud_functions/bq_table_snapshots/terraform/function.tf index 6d8ac6438..a7c8f84fb 100644 --- a/tools/cloud_functions/bq_table_snapshots/terraform/function.tf +++ b/tools/cloud_functions/bq_table_snapshots/terraform/function.tf @@ -91,6 +91,7 @@ resource "google_cloudfunctions_function" "bq_backup_fetch_tables_names" { entry_point = "main" source_archive_bucket = google_storage_bucket.bucket.name source_archive_object = google_storage_bucket_object.bq_backup_fetch_tables_names.name + service_account_email = google_service_account.fetcher.email environment_variables = { DATA_PROJECT_ID = var.storage_project_id @@ -102,6 +103,11 @@ resource "google_cloudfunctions_function" "bq_backup_fetch_tables_names" { event_type = "providers/cloud.pubsub/eventTypes/topic.publish" resource = google_pubsub_topic.snapshot_dataset_topic.id } + + depends_on = [ + google_bigquery_dataset_iam_member.fetcher_source_dataset, + google_pubsub_topic_iam_member.fetcher_pubsub + ] } ########################################## @@ -128,6 +134,7 @@ resource "google_cloudfunctions_function" "bq_backup_create_snapshots" { entry_point = "main" source_archive_bucket = google_storage_bucket.bucket.name source_archive_object = google_storage_bucket_object.bq_backup_create_snapshots.name + service_account_email = google_service_account.creator.email environment_variables = { BQ_DATA_PROJECT_ID = var.storage_project_id @@ -138,5 +145,11 @@ resource "google_cloudfunctions_function" "bq_backup_create_snapshots" { event_type = "providers/cloud.pubsub/eventTypes/topic.publish" resource = google_pubsub_topic.bq_snapshot_create_snapshot_topic.id } + + depends_on = [ + google_project_iam_member.creator_job_user, + google_bigquery_dataset_iam_member.creator_source_dataset, + google_bigquery_dataset_iam_member.creator_target_dataset + ] } diff --git a/tools/cloud_functions/bq_table_snapshots/terraform/iam.tf b/tools/cloud_functions/bq_table_snapshots/terraform/iam.tf new file mode 100644 index 000000000..251b7141e --- /dev/null +++ b/tools/cloud_functions/bq_table_snapshots/terraform/iam.tf @@ -0,0 +1,132 @@ +# Service Accounts for BigQuery Snapshot Functions +# +# This configuration implements least privilege access by creating dedicated +# service accounts with minimal permissions for each Cloud Function. +# +# Architecture: +# - sa-bq-snap-fetcher: Lists tables in source dataset, publishes to Pub/Sub +# - sa-bq-snap-creator: Creates snapshots from source to target dataset + +# ============================================================================= +# Service Accounts +# ============================================================================= + +resource "google_service_account" "fetcher" { + account_id = "sa-bq-snap-fetcher" + display_name = "BigQuery Snapshot Fetcher Service Account" + description = "Service account for listing tables in source dataset and triggering snapshot creation" + project = var.project_id +} + +resource "google_service_account" "creator" { + account_id = "sa-bq-snap-creator" + display_name = "BigQuery Snapshot Creator Service Account" + description = "Service account for creating BigQuery table snapshots from source to target dataset" + project = var.project_id +} + +# ============================================================================= +# Custom IAM Roles +# ============================================================================= + +# Custom role for fetcher function +# Permissions: list tables and get dataset metadata from source dataset +resource "google_project_iam_custom_role" "bq_snapshot_fetcher" { + project = var.project_id + role_id = "bqSnapshotFetcher" + title = "BigQuery Snapshot Fetcher" + description = "Minimal permissions to list tables in a dataset for snapshot processing" + permissions = [ + "bigquery.tables.list", + "bigquery.datasets.get" + ] +} + +# NOTE: Using predefined BigQuery roles instead of custom roles. +# Per Google Cloud documentation, only specific predefined roles (dataOwner, admin, +# studioAdmin) can create snapshots with expiration times. Custom roles cannot work +# for this use case due to BigQuery API limitations. + +# ============================================================================= +# IAM Bindings - Fetcher Service Account +# ============================================================================= + +# Grant fetcher SA permissions to list tables in source dataset +resource "google_bigquery_dataset_iam_member" "fetcher_source_dataset" { + project = var.storage_project_id + dataset_id = var.source_dataset_name + role = google_project_iam_custom_role.bq_snapshot_fetcher.id + member = "serviceAccount:${google_service_account.fetcher.email}" +} + +# Grant fetcher SA permissions to publish messages to snapshot trigger topic +resource "google_pubsub_topic_iam_member" "fetcher_pubsub" { + project = var.project_id + topic = google_pubsub_topic.bq_snapshot_create_snapshot_topic.name + role = "roles/pubsub.publisher" + member = "serviceAccount:${google_service_account.fetcher.email}" +} + +# ============================================================================= +# IAM Bindings - Creator Service Account +# ============================================================================= + +# Grant creator SA read permissions on source dataset +# Using predefined dataViewer role which includes: +# - bigquery.tables.get (read metadata) +# - bigquery.tables.getData (read data for time-travel) +# - bigquery.tables.createSnapshot (create snapshot from source) +# - bigquery.datasets.get (access dataset) +resource "google_bigquery_dataset_iam_member" "creator_source_dataset" { + project = var.storage_project_id + dataset_id = var.source_dataset_name + role = "roles/bigquery.dataViewer" + member = "serviceAccount:${google_service_account.creator.email}" +} + +# Grant creator SA write permissions on target dataset +# Using predefined dataOwner role which includes: +# - bigquery.tables.create (create snapshot tables) +# - bigquery.tables.createSnapshot (snapshot operation) +# - bigquery.tables.deleteSnapshot (REQUIRED for expiration) +# - bigquery.tables.updateData (write snapshot data) +# - bigquery.tables.update (update table metadata) +# - bigquery.tables.delete (cleanup old snapshots) +# - bigquery.datasets.get (access dataset) +# - bigquery.tables.setIamPolicy (manage table permissions) +# +# IMPORTANT: Per Google Cloud documentation, ONLY bigquery.dataOwner, +# bigquery.admin, and bigquery.studioAdmin can create snapshots with +# expiration times. This is a BigQuery API limitation - dataEditor lacks +# bigquery.tables.deleteSnapshot which is required for setting expiration. +# dataOwner is the least privileged role that supports snapshot expiration. +resource "google_bigquery_dataset_iam_member" "creator_target_dataset" { + project = var.storage_project_id + dataset_id = google_bigquery_dataset.dataset.dataset_id + role = "roles/bigquery.dataOwner" + member = "serviceAccount:${google_service_account.creator.email}" +} + +# Grant creator SA permissions to create BigQuery jobs at project level +# Note: BigQuery jobs are project-scoped resources, so this must be project-level. +# This is a known limitation - the SA can create any BigQuery job type, but this +# is the minimum required for snapshot operations. +resource "google_project_iam_member" "creator_job_user" { + project = var.project_id + role = "roles/bigquery.jobUser" + member = "serviceAccount:${google_service_account.creator.email}" +} + +# ============================================================================= +# Outputs +# ============================================================================= + +output "fetcher_service_account_email" { + description = "Email of the fetcher service account" + value = google_service_account.fetcher.email +} + +output "creator_service_account_email" { + description = "Email of the creator service account" + value = google_service_account.creator.email +} diff --git a/tools/cloud_functions/bq_table_snapshots/terraform/terraform.tfvars b/tools/cloud_functions/bq_table_snapshots/terraform/terraform.tfvars index 1b1093161..8ca30732e 100644 --- a/tools/cloud_functions/bq_table_snapshots/terraform/terraform.tfvars +++ b/tools/cloud_functions/bq_table_snapshots/terraform/terraform.tfvars @@ -1,6 +1,6 @@ project_id = "buffer-data" storage_project_id = "buffer-data" -source_dataset_name = "dbt_steven" -target_dataset_name = "dbt_steven_snapshots" -crontab_format = "15 * * * *" -seconds_before_expiration = 604800 +source_dataset_name = "dbt_buffer" +target_dataset_name = "dbt_buffer_snapshots" +crontab_format = "0 2 * * *" +seconds_before_expiration = 2419200