Use only parquet files for events
This commit is contained in:
52
app/jobs/bootstrap_parquet_export_job.rb
Normal file
52
app/jobs/bootstrap_parquet_export_job.rb
Normal file
@@ -0,0 +1,52 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# One-time job to bootstrap Parquet export system
|
||||
# Exports all existing DuckDB data to weekly Parquet archives
|
||||
# Run this once when setting up Parquet exports for the first time
|
||||
#
|
||||
# Usage:
|
||||
# BootstrapParquetExportJob.perform_now
|
||||
# # or via docker:
|
||||
# docker compose exec jobs bin/rails runner "BootstrapParquetExportJob.perform_now"
|
||||
class BootstrapParquetExportJob < ApplicationJob
|
||||
queue_as :default
|
||||
|
||||
def perform
|
||||
service = AnalyticsDuckdbService.instance
|
||||
|
||||
# Check if DuckDB has any data
|
||||
event_count = service.event_count
|
||||
Rails.logger.info "[Parquet Bootstrap] DuckDB event count: #{event_count}"
|
||||
|
||||
if event_count == 0
|
||||
Rails.logger.warn "[Parquet Bootstrap] No events in DuckDB. Run SyncEventsToDuckdbJob first."
|
||||
return
|
||||
end
|
||||
|
||||
# Check if Parquet files already exist
|
||||
existing_weeks = Dir.glob(AnalyticsDuckdbService::PARQUET_WEEKS_PATH.join("*.parquet")).size
|
||||
if existing_weeks > 0
|
||||
Rails.logger.info "[Parquet Bootstrap] Found #{existing_weeks} existing week archives"
|
||||
end
|
||||
|
||||
Rails.logger.info "[Parquet Bootstrap] Starting export of all DuckDB data to Parquet..."
|
||||
|
||||
start_time = Time.current
|
||||
|
||||
# Run the bootstrap export
|
||||
service.export_all_to_parquet
|
||||
|
||||
duration = Time.current - start_time
|
||||
week_count = Dir.glob(AnalyticsDuckdbService::PARQUET_WEEKS_PATH.join("*.parquet")).size
|
||||
|
||||
Rails.logger.info "[Parquet Bootstrap] Complete!"
|
||||
Rails.logger.info "[Parquet Bootstrap] - Time taken: #{duration.round(2)} seconds"
|
||||
Rails.logger.info "[Parquet Bootstrap] - Week archives: #{week_count}"
|
||||
Rails.logger.info "[Parquet Bootstrap] - Storage: #{AnalyticsDuckdbService::PARQUET_BASE_PATH}"
|
||||
Rails.logger.info "[Parquet Bootstrap] System is ready - jobs will maintain exports automatically"
|
||||
rescue StandardError => e
|
||||
Rails.logger.error "[Parquet Bootstrap] Job failed: #{e.message}"
|
||||
Rails.logger.error e.backtrace.join("\n")
|
||||
raise # Re-raise to mark job as failed
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user