Drop add_headers - headers can now be added to meta[] to be applied for any action. Consilidate Tagging in a service
This commit is contained in:
127
lib/tasks/duckdb.rake
Normal file
127
lib/tasks/duckdb.rake
Normal file
@@ -0,0 +1,127 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
namespace :duckdb do
|
||||
desc "Rebuild DuckDB analytics database from scratch"
|
||||
task rebuild: :environment do
|
||||
puts "=" * 80
|
||||
puts "DuckDB Rebuild"
|
||||
puts "=" * 80
|
||||
puts
|
||||
|
||||
duckdb_path = Rails.root.join("storage", "analytics.duckdb")
|
||||
|
||||
# Step 1: Check if DuckDB exists
|
||||
if File.exist?(duckdb_path)
|
||||
puts "🗑️ Deleting existing DuckDB database..."
|
||||
File.delete(duckdb_path)
|
||||
puts " ✅ Deleted: #{duckdb_path}"
|
||||
puts
|
||||
else
|
||||
puts "ℹ️ No existing DuckDB database found"
|
||||
puts
|
||||
end
|
||||
|
||||
# Step 2: Rebuild from PostgreSQL
|
||||
puts "🔨 Rebuilding DuckDB from PostgreSQL events..."
|
||||
puts
|
||||
|
||||
start_time = Time.current
|
||||
begin
|
||||
SyncEventsToDuckdbJob.perform_now
|
||||
duration = Time.current - start_time
|
||||
|
||||
# Step 3: Verify the rebuild
|
||||
event_count = AnalyticsDuckdbService.instance.event_count
|
||||
bot_count = AnalyticsDuckdbService.instance.with_connection do |conn|
|
||||
result = conn.query("SELECT COUNT(*) FROM events WHERE is_bot = true")
|
||||
result.first&.first || 0
|
||||
end
|
||||
|
||||
puts "=" * 80
|
||||
puts "✅ DuckDB Rebuild Complete!"
|
||||
puts "=" * 80
|
||||
puts " Duration: #{duration.round(2)}s"
|
||||
puts " Total events synced: #{event_count}"
|
||||
puts " Bot events: #{bot_count} (#{(bot_count.to_f / event_count * 100).round(1)}%)" if event_count > 0
|
||||
puts " Human events: #{event_count - bot_count} (#{((event_count - bot_count).to_f / event_count * 100).round(1)}%)" if event_count > 0
|
||||
puts
|
||||
puts "📂 Database location: #{duckdb_path}"
|
||||
puts "📊 Database size: #{File.size(duckdb_path) / 1024.0 / 1024.0}MB"
|
||||
puts
|
||||
rescue => e
|
||||
puts "❌ Error rebuilding DuckDB: #{e.message}"
|
||||
puts e.backtrace.first(5).join("\n")
|
||||
exit 1
|
||||
end
|
||||
end
|
||||
|
||||
desc "Show DuckDB statistics"
|
||||
task stats: :environment do
|
||||
duckdb_path = Rails.root.join("storage", "analytics.duckdb")
|
||||
|
||||
unless File.exist?(duckdb_path)
|
||||
puts "❌ DuckDB database not found at: #{duckdb_path}"
|
||||
exit 1
|
||||
end
|
||||
|
||||
puts "=" * 80
|
||||
puts "DuckDB Statistics"
|
||||
puts "=" * 80
|
||||
puts
|
||||
|
||||
total = AnalyticsDuckdbService.instance.event_count
|
||||
|
||||
AnalyticsDuckdbService.instance.with_connection do |conn|
|
||||
# Bot breakdown
|
||||
result = conn.query(<<~SQL)
|
||||
SELECT
|
||||
is_bot,
|
||||
COUNT(*) as event_count,
|
||||
COUNT(DISTINCT ip_address) as unique_ips
|
||||
FROM events
|
||||
GROUP BY is_bot
|
||||
SQL
|
||||
|
||||
puts "📊 Bot Traffic Breakdown:"
|
||||
result.each do |row|
|
||||
type = row[0] ? "🤖 Bots" : "👤 Humans"
|
||||
count = row[1]
|
||||
ips = row[2]
|
||||
percentage = (count.to_f / total * 100).round(1)
|
||||
puts " #{type}: #{count} events (#{percentage}%) from #{ips} unique IPs"
|
||||
end
|
||||
puts
|
||||
|
||||
# Date range
|
||||
range_result = conn.query("SELECT MIN(timestamp), MAX(timestamp) FROM events")
|
||||
min_ts, max_ts = range_result.first
|
||||
puts "📅 Date Range:"
|
||||
puts " Oldest event: #{min_ts}"
|
||||
puts " Newest event: #{max_ts}"
|
||||
puts
|
||||
|
||||
# Database info
|
||||
puts "💾 Database Info:"
|
||||
puts " Location: #{duckdb_path}"
|
||||
puts " Size: #{(File.size(duckdb_path) / 1024.0 / 1024.0).round(2)}MB"
|
||||
puts " Total events: #{total}"
|
||||
puts
|
||||
end
|
||||
end
|
||||
|
||||
desc "Sync new events from PostgreSQL to DuckDB"
|
||||
task sync: :environment do
|
||||
puts "🔄 Syncing events from PostgreSQL to DuckDB..."
|
||||
start_time = Time.current
|
||||
|
||||
begin
|
||||
SyncEventsToDuckdbJob.perform_now
|
||||
duration = Time.current - start_time
|
||||
|
||||
puts "✅ Sync complete in #{duration.round(2)}s"
|
||||
rescue => e
|
||||
puts "❌ Error syncing: #{e.message}"
|
||||
exit 1
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user