128 lines
3.7 KiB
Ruby
128 lines
3.7 KiB
Ruby
# frozen_string_literal: true
|
||
|
||
namespace :duckdb do
|
||
desc "Rebuild DuckDB analytics database from scratch"
|
||
task rebuild: :environment do
|
||
puts "=" * 80
|
||
puts "DuckDB Rebuild"
|
||
puts "=" * 80
|
||
puts
|
||
|
||
duckdb_path = Rails.root.join("storage", "analytics.duckdb")
|
||
|
||
# Step 1: Check if DuckDB exists
|
||
if File.exist?(duckdb_path)
|
||
puts "🗑️ Deleting existing DuckDB database..."
|
||
File.delete(duckdb_path)
|
||
puts " ✅ Deleted: #{duckdb_path}"
|
||
puts
|
||
else
|
||
puts "ℹ️ No existing DuckDB database found"
|
||
puts
|
||
end
|
||
|
||
# Step 2: Rebuild from PostgreSQL
|
||
puts "🔨 Rebuilding DuckDB from PostgreSQL events..."
|
||
puts
|
||
|
||
start_time = Time.current
|
||
begin
|
||
SyncEventsToDuckdbJob.perform_now
|
||
duration = Time.current - start_time
|
||
|
||
# Step 3: Verify the rebuild
|
||
event_count = AnalyticsDuckdbService.instance.event_count
|
||
bot_count = AnalyticsDuckdbService.instance.with_connection do |conn|
|
||
result = conn.query("SELECT COUNT(*) FROM events WHERE is_bot = true")
|
||
result.first&.first || 0
|
||
end
|
||
|
||
puts "=" * 80
|
||
puts "✅ DuckDB Rebuild Complete!"
|
||
puts "=" * 80
|
||
puts " Duration: #{duration.round(2)}s"
|
||
puts " Total events synced: #{event_count}"
|
||
puts " Bot events: #{bot_count} (#{(bot_count.to_f / event_count * 100).round(1)}%)" if event_count > 0
|
||
puts " Human events: #{event_count - bot_count} (#{((event_count - bot_count).to_f / event_count * 100).round(1)}%)" if event_count > 0
|
||
puts
|
||
puts "📂 Database location: #{duckdb_path}"
|
||
puts "📊 Database size: #{File.size(duckdb_path) / 1024.0 / 1024.0}MB"
|
||
puts
|
||
rescue => e
|
||
puts "❌ Error rebuilding DuckDB: #{e.message}"
|
||
puts e.backtrace.first(5).join("\n")
|
||
exit 1
|
||
end
|
||
end
|
||
|
||
desc "Show DuckDB statistics"
|
||
task stats: :environment do
|
||
duckdb_path = Rails.root.join("storage", "analytics.duckdb")
|
||
|
||
unless File.exist?(duckdb_path)
|
||
puts "❌ DuckDB database not found at: #{duckdb_path}"
|
||
exit 1
|
||
end
|
||
|
||
puts "=" * 80
|
||
puts "DuckDB Statistics"
|
||
puts "=" * 80
|
||
puts
|
||
|
||
total = AnalyticsDuckdbService.instance.event_count
|
||
|
||
AnalyticsDuckdbService.instance.with_connection do |conn|
|
||
# Bot breakdown
|
||
result = conn.query(<<~SQL)
|
||
SELECT
|
||
is_bot,
|
||
COUNT(*) as event_count,
|
||
COUNT(DISTINCT ip_address) as unique_ips
|
||
FROM events
|
||
GROUP BY is_bot
|
||
SQL
|
||
|
||
puts "📊 Bot Traffic Breakdown:"
|
||
result.each do |row|
|
||
type = row[0] ? "🤖 Bots" : "👤 Humans"
|
||
count = row[1]
|
||
ips = row[2]
|
||
percentage = (count.to_f / total * 100).round(1)
|
||
puts " #{type}: #{count} events (#{percentage}%) from #{ips} unique IPs"
|
||
end
|
||
puts
|
||
|
||
# Date range
|
||
range_result = conn.query("SELECT MIN(timestamp), MAX(timestamp) FROM events")
|
||
min_ts, max_ts = range_result.first
|
||
puts "📅 Date Range:"
|
||
puts " Oldest event: #{min_ts}"
|
||
puts " Newest event: #{max_ts}"
|
||
puts
|
||
|
||
# Database info
|
||
puts "💾 Database Info:"
|
||
puts " Location: #{duckdb_path}"
|
||
puts " Size: #{(File.size(duckdb_path) / 1024.0 / 1024.0).round(2)}MB"
|
||
puts " Total events: #{total}"
|
||
puts
|
||
end
|
||
end
|
||
|
||
desc "Sync new events from PostgreSQL to DuckDB"
|
||
task sync: :environment do
|
||
puts "🔄 Syncing events from PostgreSQL to DuckDB..."
|
||
start_time = Time.current
|
||
|
||
begin
|
||
SyncEventsToDuckdbJob.perform_now
|
||
duration = Time.current - start_time
|
||
|
||
puts "✅ Sync complete in #{duration.round(2)}s"
|
||
rescue => e
|
||
puts "❌ Error syncing: #{e.message}"
|
||
exit 1
|
||
end
|
||
end
|
||
end
|