53 lines
2.0 KiB
Ruby
53 lines
2.0 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
# One-time job to bootstrap Parquet export system
|
|
# Exports all existing DuckDB data to weekly Parquet archives
|
|
# Run this once when setting up Parquet exports for the first time
|
|
#
|
|
# Usage:
|
|
# BootstrapParquetExportJob.perform_now
|
|
# # or via docker:
|
|
# docker compose exec jobs bin/rails runner "BootstrapParquetExportJob.perform_now"
|
|
class BootstrapParquetExportJob < ApplicationJob
|
|
queue_as :default
|
|
|
|
def perform
|
|
service = AnalyticsDuckdbService.instance
|
|
|
|
# Check if DuckDB has any data
|
|
event_count = service.event_count
|
|
Rails.logger.info "[Parquet Bootstrap] DuckDB event count: #{event_count}"
|
|
|
|
if event_count == 0
|
|
Rails.logger.warn "[Parquet Bootstrap] No events in DuckDB. Run SyncEventsToDuckdbJob first."
|
|
return
|
|
end
|
|
|
|
# Check if Parquet files already exist
|
|
existing_weeks = Dir.glob(AnalyticsDuckdbService::PARQUET_WEEKS_PATH.join("*.parquet")).size
|
|
if existing_weeks > 0
|
|
Rails.logger.info "[Parquet Bootstrap] Found #{existing_weeks} existing week archives"
|
|
end
|
|
|
|
Rails.logger.info "[Parquet Bootstrap] Starting export of all DuckDB data to Parquet..."
|
|
|
|
start_time = Time.current
|
|
|
|
# Run the bootstrap export
|
|
service.export_all_to_parquet
|
|
|
|
duration = Time.current - start_time
|
|
week_count = Dir.glob(AnalyticsDuckdbService::PARQUET_WEEKS_PATH.join("*.parquet")).size
|
|
|
|
Rails.logger.info "[Parquet Bootstrap] Complete!"
|
|
Rails.logger.info "[Parquet Bootstrap] - Time taken: #{duration.round(2)} seconds"
|
|
Rails.logger.info "[Parquet Bootstrap] - Week archives: #{week_count}"
|
|
Rails.logger.info "[Parquet Bootstrap] - Storage: #{AnalyticsDuckdbService::PARQUET_BASE_PATH}"
|
|
Rails.logger.info "[Parquet Bootstrap] System is ready - jobs will maintain exports automatically"
|
|
rescue StandardError => e
|
|
Rails.logger.error "[Parquet Bootstrap] Job failed: #{e.message}"
|
|
Rails.logger.error e.backtrace.join("\n")
|
|
raise # Re-raise to mark job as failed
|
|
end
|
|
end
|