# frozen_string_literal: true namespace :duckdb do desc "Rebuild DuckDB analytics database from scratch" task rebuild: :environment do puts "=" * 80 puts "DuckDB Rebuild" puts "=" * 80 puts duckdb_path = Rails.root.join("storage", "analytics.duckdb") # Step 1: Check if DuckDB exists if File.exist?(duckdb_path) puts "đŸ—‘ī¸ Deleting existing DuckDB database..." File.delete(duckdb_path) puts " ✅ Deleted: #{duckdb_path}" puts else puts "â„šī¸ No existing DuckDB database found" puts end # Step 2: Rebuild from PostgreSQL puts "🔨 Rebuilding DuckDB from PostgreSQL events..." puts start_time = Time.current begin SyncEventsToDuckdbJob.perform_now duration = Time.current - start_time # Step 3: Verify the rebuild event_count = AnalyticsDuckdbService.instance.event_count bot_count = AnalyticsDuckdbService.instance.with_connection do |conn| result = conn.query("SELECT COUNT(*) FROM events WHERE is_bot = true") result.first&.first || 0 end puts "=" * 80 puts "✅ DuckDB Rebuild Complete!" puts "=" * 80 puts " Duration: #{duration.round(2)}s" puts " Total events synced: #{event_count}" puts " Bot events: #{bot_count} (#{(bot_count.to_f / event_count * 100).round(1)}%)" if event_count > 0 puts " Human events: #{event_count - bot_count} (#{((event_count - bot_count).to_f / event_count * 100).round(1)}%)" if event_count > 0 puts puts "📂 Database location: #{duckdb_path}" puts "📊 Database size: #{File.size(duckdb_path) / 1024.0 / 1024.0}MB" puts rescue => e puts "❌ Error rebuilding DuckDB: #{e.message}" puts e.backtrace.first(5).join("\n") exit 1 end end desc "Show DuckDB statistics" task stats: :environment do duckdb_path = Rails.root.join("storage", "analytics.duckdb") unless File.exist?(duckdb_path) puts "❌ DuckDB database not found at: #{duckdb_path}" exit 1 end puts "=" * 80 puts "DuckDB Statistics" puts "=" * 80 puts total = AnalyticsDuckdbService.instance.event_count AnalyticsDuckdbService.instance.with_connection do |conn| # Bot breakdown result = conn.query(<<~SQL) SELECT is_bot, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips FROM events GROUP BY is_bot SQL puts "📊 Bot Traffic Breakdown:" result.each do |row| type = row[0] ? "🤖 Bots" : "👤 Humans" count = row[1] ips = row[2] percentage = (count.to_f / total * 100).round(1) puts " #{type}: #{count} events (#{percentage}%) from #{ips} unique IPs" end puts # Date range range_result = conn.query("SELECT MIN(timestamp), MAX(timestamp) FROM events") min_ts, max_ts = range_result.first puts "📅 Date Range:" puts " Oldest event: #{min_ts}" puts " Newest event: #{max_ts}" puts # Database info puts "💾 Database Info:" puts " Location: #{duckdb_path}" puts " Size: #{(File.size(duckdb_path) / 1024.0 / 1024.0).round(2)}MB" puts " Total events: #{total}" puts end end desc "Sync new events from PostgreSQL to DuckDB" task sync: :environment do puts "🔄 Syncing events from PostgreSQL to DuckDB..." start_time = Time.current begin SyncEventsToDuckdbJob.perform_now duration = Time.current - start_time puts "✅ Sync complete in #{duration.round(2)}s" rescue => e puts "❌ Error syncing: #{e.message}" exit 1 end end end