Drop add_headers - headers can now be added to meta[] to be applied for any action. Consilidate Tagging in a service

This commit is contained in:
Dan Milne
2025-11-30 13:18:17 +11:00
parent de2eb43e2b
commit 179563022e
9 changed files with 157 additions and 97 deletions

127
lib/tasks/duckdb.rake Normal file
View File

@@ -0,0 +1,127 @@
# frozen_string_literal: true
namespace :duckdb do
desc "Rebuild DuckDB analytics database from scratch"
task rebuild: :environment do
puts "=" * 80
puts "DuckDB Rebuild"
puts "=" * 80
puts
duckdb_path = Rails.root.join("storage", "analytics.duckdb")
# Step 1: Check if DuckDB exists
if File.exist?(duckdb_path)
puts "🗑️ Deleting existing DuckDB database..."
File.delete(duckdb_path)
puts " ✅ Deleted: #{duckdb_path}"
puts
else
puts " No existing DuckDB database found"
puts
end
# Step 2: Rebuild from PostgreSQL
puts "🔨 Rebuilding DuckDB from PostgreSQL events..."
puts
start_time = Time.current
begin
SyncEventsToDuckdbJob.perform_now
duration = Time.current - start_time
# Step 3: Verify the rebuild
event_count = AnalyticsDuckdbService.instance.event_count
bot_count = AnalyticsDuckdbService.instance.with_connection do |conn|
result = conn.query("SELECT COUNT(*) FROM events WHERE is_bot = true")
result.first&.first || 0
end
puts "=" * 80
puts "✅ DuckDB Rebuild Complete!"
puts "=" * 80
puts " Duration: #{duration.round(2)}s"
puts " Total events synced: #{event_count}"
puts " Bot events: #{bot_count} (#{(bot_count.to_f / event_count * 100).round(1)}%)" if event_count > 0
puts " Human events: #{event_count - bot_count} (#{((event_count - bot_count).to_f / event_count * 100).round(1)}%)" if event_count > 0
puts
puts "📂 Database location: #{duckdb_path}"
puts "📊 Database size: #{File.size(duckdb_path) / 1024.0 / 1024.0}MB"
puts
rescue => e
puts "❌ Error rebuilding DuckDB: #{e.message}"
puts e.backtrace.first(5).join("\n")
exit 1
end
end
desc "Show DuckDB statistics"
task stats: :environment do
duckdb_path = Rails.root.join("storage", "analytics.duckdb")
unless File.exist?(duckdb_path)
puts "❌ DuckDB database not found at: #{duckdb_path}"
exit 1
end
puts "=" * 80
puts "DuckDB Statistics"
puts "=" * 80
puts
total = AnalyticsDuckdbService.instance.event_count
AnalyticsDuckdbService.instance.with_connection do |conn|
# Bot breakdown
result = conn.query(<<~SQL)
SELECT
is_bot,
COUNT(*) as event_count,
COUNT(DISTINCT ip_address) as unique_ips
FROM events
GROUP BY is_bot
SQL
puts "📊 Bot Traffic Breakdown:"
result.each do |row|
type = row[0] ? "🤖 Bots" : "👤 Humans"
count = row[1]
ips = row[2]
percentage = (count.to_f / total * 100).round(1)
puts " #{type}: #{count} events (#{percentage}%) from #{ips} unique IPs"
end
puts
# Date range
range_result = conn.query("SELECT MIN(timestamp), MAX(timestamp) FROM events")
min_ts, max_ts = range_result.first
puts "📅 Date Range:"
puts " Oldest event: #{min_ts}"
puts " Newest event: #{max_ts}"
puts
# Database info
puts "💾 Database Info:"
puts " Location: #{duckdb_path}"
puts " Size: #{(File.size(duckdb_path) / 1024.0 / 1024.0).round(2)}MB"
puts " Total events: #{total}"
puts
end
end
desc "Sync new events from PostgreSQL to DuckDB"
task sync: :environment do
puts "🔄 Syncing events from PostgreSQL to DuckDB..."
start_time = Time.current
begin
SyncEventsToDuckdbJob.perform_now
duration = Time.current - start_time
puts "✅ Sync complete in #{duration.round(2)}s"
rescue => e
puts "❌ Error syncing: #{e.message}"
exit 1
end
end
end