From 179563022e59f84f6507786ef2825d59ec5798b1 Mon Sep 17 00:00:00 2001 From: Dan Milne Date: Sun, 30 Nov 2025 13:18:17 +1100 Subject: [PATCH] Drop add_headers - headers can now be added to meta[] to be applied for any action. Consilidate Tagging in a service --- app/controllers/rules_controller.rb | 6 -- app/models/event.rb | 18 +++- app/models/event_ddb.rb | 6 +- app/models/rule.rb | 28 +----- app/models/waf_policy.rb | 40 +------- app/views/events/index.html.erb | 2 +- app/views/rules/new.html.erb | 21 ---- config/initializers/device_detector.rb | 6 ++ lib/tasks/duckdb.rake | 127 +++++++++++++++++++++++++ 9 files changed, 157 insertions(+), 97 deletions(-) create mode 100644 config/initializers/device_detector.rb create mode 100644 lib/tasks/duckdb.rake diff --git a/app/controllers/rules_controller.rb b/app/controllers/rules_controller.rb index 277fa21..df89040 100644 --- a/app/controllers/rules_controller.rb +++ b/app/controllers/rules_controller.rb @@ -261,12 +261,6 @@ def process_quick_create_parameters # Ensure metadata is a hash @rule.metadata = {} unless @rule.metadata.is_a?(Hash) - # Handle add_header fields - use provided params or existing metadata values - if @rule.add_header_action? && (params[:header_name].present? || params[:header_value].present?) - @rule.metadata['header_name'] = params[:header_name].presence || @rule.metadata['header_name'] || 'X-Bot-Agent' - @rule.metadata['header_value'] = params[:header_value].presence || @rule.metadata['header_value'] || 'Unknown' - end - # Handle expires_at parsing for text input if params.dig(:rule, :expires_at).present? expires_at_str = params[:rule][:expires_at].strip diff --git a/app/models/event.rb b/app/models/event.rb index 8349a8c..4f0ada8 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -226,8 +226,8 @@ class Event < ApplicationRecord # Normalize headers in payload during import phase normalized_payload = normalize_payload_headers(payload) - # Create the WAF request event - create!( + # Create the WAF request event with agent-provided tags + event = create!( request_id: request_id, timestamp: parse_timestamp(normalized_payload["timestamp"]), payload: normalized_payload, @@ -250,11 +250,18 @@ class Event < ApplicationRecord server_name: normalized_payload["server_name"], environment: normalized_payload["environment"], - + # Tags: start with agent-provided tags only + tags: normalized_payload["tags"] || [], + # WAF agent info agent_version: normalized_payload.dig("agent", "version"), agent_name: normalized_payload.dig("agent", "name") ) + + # Apply rule tags using EventTagger service + EventTagger.tag_event(event) + + event end # Normalize headers in payload to lower case during import phase @@ -347,7 +354,10 @@ class Event < ApplicationRecord def tags # Use the dedicated tags column (array), fallback to payload during transition - super.presence || (payload&.dig("tags") || []) + # Ensure we always return an Array, even if payload has malformed data (e.g., {} instead of []) + result = super.presence || payload&.dig("tags") + return [] if result.nil? + result.is_a?(Array) ? result : [] end def headers diff --git a/app/models/event_ddb.rb b/app/models/event_ddb.rb index c9df10f..3758159 100644 --- a/app/models/event_ddb.rb +++ b/app/models/event_ddb.rb @@ -34,8 +34,10 @@ class EventDdb SQL # Convert to hash like ActiveRecord .group.count returns - # DuckDB returns arrays: [waf_action, count] - result.to_a.to_h { |row| [row[0], row[1]] } + # DuckDB returns integer enum values, map to string names + # 0=deny, 1=allow, 2=redirect, 3=challenge, 4=log + action_map = { 0 => "deny", 1 => "allow", 2 => "redirect", 3 => "challenge", 4 => "log" } + result.to_a.to_h { |row| [action_map[row[0]] || "unknown", row[1]] } end rescue StandardError => e Rails.logger.error "[EventDdb] Error in breakdown_by_action: #{e.message}" diff --git a/app/models/rule.rb b/app/models/rule.rb index ffa04d8..00f5453 100644 --- a/app/models/rule.rb +++ b/app/models/rule.rb @@ -7,7 +7,8 @@ class Rule < ApplicationRecord # Rule enums (prefix needed to avoid rate_limit collision) # Canonical WAF action order - aligned with Agent and Event models - enum :waf_action, { deny: 0, allow: 1, redirect: 2, challenge: 3, log: 4, add_header: 5 }, prefix: :action + # Note: allow and log actions can include headers/tags in metadata for automatic injection + enum :waf_action, { deny: 0, allow: 1, redirect: 2, challenge: 3, log: 4 }, prefix: :action enum :waf_rule_type, { network: 0, rate_limit: 1, path_pattern: 2 }, prefix: :type SOURCES = %w[manual auto:scanner_detected auto:rate_limit_exceeded auto:bot_detected imported default manual:surgical_block manual:surgical_exception policy].freeze @@ -120,10 +121,6 @@ class Rule < ApplicationRecord action_challenge? end - def add_header_action? - action_add_header? - end - # Redirect/challenge convenience methods def redirect_url metadata_hash['redirect_url'] @@ -141,14 +138,6 @@ class Rule < ApplicationRecord metadata&.dig('challenge_message') end - def header_name - metadata&.dig('header_name') - end - - def header_value - metadata&.dig('header_value') - end - # Tag-related methods def tags metadata_hash['tags'] || [] @@ -469,12 +458,6 @@ class Rule < ApplicationRecord if source&.start_with?('auto:') || source == 'default' self.user ||= User.find_by(role: 1) # admin role end - - # Set default header values for add_header action - if add_header_action? - self.metadata['header_name'] ||= 'X-Bot-Agent' - self.metadata['header_value'] ||= 'Unknown' - end end def calculate_priority_for_network_rules @@ -558,13 +541,6 @@ class Rule < ApplicationRecord if challenge_type_value && !%w[captcha javascript proof_of_work].include?(challenge_type_value) errors.add(:metadata, "challenge_type must be one of: captcha, javascript, proof_of_work") end - when "add_header" - unless metadata&.dig("header_name").present? - errors.add(:metadata, "must include 'header_name' for add_header action") - end - unless metadata&.dig("header_value").present? - errors.add(:metadata, "must include 'header_value' for add_header action") - end end end diff --git a/app/models/waf_policy.rb b/app/models/waf_policy.rb index b5f4017..c061ed6 100644 --- a/app/models/waf_policy.rb +++ b/app/models/waf_policy.rb @@ -9,7 +9,7 @@ class WafPolicy < ApplicationRecord POLICY_TYPES = %w[country asn company network_type path_pattern].freeze # Actions - what to do when traffic matches this policy - ACTIONS = %w[allow deny redirect challenge add_header].freeze + ACTIONS = %w[allow deny redirect challenge log].freeze # Associations belongs_to :user @@ -25,7 +25,6 @@ validate :targets_must_be_array validate :validate_targets_by_type validate :validate_redirect_configuration, if: :redirect_policy_action? validate :validate_challenge_configuration, if: :challenge_policy_action? - validate :validate_add_header_configuration, if: :add_header_policy_action? # Scopes scope :enabled, -> { where(enabled: true) } @@ -96,10 +95,6 @@ validate :targets_must_be_array policy_action == 'challenge' end - def add_header_policy_action? - policy_action == 'add_header' - end - # Lifecycle methods def active? enabled? && !expired? @@ -168,7 +163,7 @@ validate :targets_must_be_array priority: network_range.prefix_length ) - # Handle redirect/challenge/add_header specific data + # Handle redirect/challenge specific data if redirect_action? && additional_data['redirect_url'] rule.update!( metadata: rule.metadata.merge( @@ -183,13 +178,6 @@ validate :targets_must_be_array challenge_message: additional_data['challenge_message'] ) ) - elsif add_header_action? - rule.update!( - metadata: rule.metadata.merge( - header_name: additional_data['header_name'], - header_value: additional_data['header_value'] - ) - ) end rule @@ -224,7 +212,7 @@ validate :targets_must_be_array priority: 50 # Default priority for path rules ) - # Handle redirect/challenge/add_header specific data + # Handle redirect/challenge specific data if redirect_action? && additional_data['redirect_url'] rule.update!( metadata: rule.metadata.merge( @@ -239,13 +227,6 @@ validate :targets_must_be_array challenge_message: additional_data['challenge_message'] ) ) - elsif add_header_action? - rule.update!( - metadata: rule.metadata.merge( - header_name: additional_data['header_name'], - header_value: additional_data['header_value'] - ) - ) end rule @@ -365,12 +346,6 @@ validate :targets_must_be_array self.targets ||= [] self.additional_data ||= {} self.enabled = true if enabled.nil? - - # Set default header values for add_header action - if add_header_policy_action? - self.additional_data['header_name'] ||= 'X-Bot-Agent' - self.additional_data['header_value'] ||= 'Unknown' - end end def targets_must_be_array @@ -455,15 +430,6 @@ validate :targets_must_be_array end end - def validate_add_header_configuration - if additional_data['header_name'].blank? - errors.add(:additional_data, "must include 'header_name' for add_header action") - end - if additional_data['header_value'].blank? - errors.add(:additional_data, "must include 'header_value' for add_header action") - end - end - # Matching logic for different policy types def matches_country?(network_range) country = network_range.country || network_range.inherited_intelligence[:country] diff --git a/app/views/events/index.html.erb b/app/views/events/index.html.erb index e23c12b..01c1bc3 100644 --- a/app/views/events/index.html.erb +++ b/app/views/events/index.html.erb @@ -25,7 +25,7 @@
<%= form.label :waf_action, "Action", class: "block text-sm font-medium text-gray-700" %> <%= form.select :waf_action, - options_for_select([['All', ''], ['Allow', 'allow'], ['Deny', 'deny'], ['Redirect', 'redirect'], ['Challenge', 'challenge'], ['Add Header', 'add_header']], params[:waf_action]), + options_for_select([['All', ''], ['Allow', 'allow'], ['Deny', 'deny'], ['Redirect', 'redirect'], ['Challenge', 'challenge'], ['Log', 'log']], params[:waf_action]), { }, { class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm" } %>
diff --git a/app/views/rules/new.html.erb b/app/views/rules/new.html.erb index 2327ecf..52d2b59 100644 --- a/app/views/rules/new.html.erb +++ b/app/views/rules/new.html.erb @@ -159,27 +159,6 @@
- - -
<%= form.label :metadata, "Metadata", class: "block text-sm font-medium text-gray-700" %> diff --git a/config/initializers/device_detector.rb b/config/initializers/device_detector.rb new file mode 100644 index 0000000..0341bae --- /dev/null +++ b/config/initializers/device_detector.rb @@ -0,0 +1,6 @@ +# frozen_string_literal: true + +# Configure DeviceDetector cache +# Default is 5,000 entries - we increase to 10,000 for better hit rate +# Memory usage: ~1-2MB for 10k cached user agents +DeviceDetector.config.max_cache_keys = 10_000 diff --git a/lib/tasks/duckdb.rake b/lib/tasks/duckdb.rake new file mode 100644 index 0000000..f11dc79 --- /dev/null +++ b/lib/tasks/duckdb.rake @@ -0,0 +1,127 @@ +# frozen_string_literal: true + +namespace :duckdb do + desc "Rebuild DuckDB analytics database from scratch" + task rebuild: :environment do + puts "=" * 80 + puts "DuckDB Rebuild" + puts "=" * 80 + puts + + duckdb_path = Rails.root.join("storage", "analytics.duckdb") + + # Step 1: Check if DuckDB exists + if File.exist?(duckdb_path) + puts "đŸ—‘ī¸ Deleting existing DuckDB database..." + File.delete(duckdb_path) + puts " ✅ Deleted: #{duckdb_path}" + puts + else + puts "â„šī¸ No existing DuckDB database found" + puts + end + + # Step 2: Rebuild from PostgreSQL + puts "🔨 Rebuilding DuckDB from PostgreSQL events..." + puts + + start_time = Time.current + begin + SyncEventsToDuckdbJob.perform_now + duration = Time.current - start_time + + # Step 3: Verify the rebuild + event_count = AnalyticsDuckdbService.instance.event_count + bot_count = AnalyticsDuckdbService.instance.with_connection do |conn| + result = conn.query("SELECT COUNT(*) FROM events WHERE is_bot = true") + result.first&.first || 0 + end + + puts "=" * 80 + puts "✅ DuckDB Rebuild Complete!" + puts "=" * 80 + puts " Duration: #{duration.round(2)}s" + puts " Total events synced: #{event_count}" + puts " Bot events: #{bot_count} (#{(bot_count.to_f / event_count * 100).round(1)}%)" if event_count > 0 + puts " Human events: #{event_count - bot_count} (#{((event_count - bot_count).to_f / event_count * 100).round(1)}%)" if event_count > 0 + puts + puts "📂 Database location: #{duckdb_path}" + puts "📊 Database size: #{File.size(duckdb_path) / 1024.0 / 1024.0}MB" + puts + rescue => e + puts "❌ Error rebuilding DuckDB: #{e.message}" + puts e.backtrace.first(5).join("\n") + exit 1 + end + end + + desc "Show DuckDB statistics" + task stats: :environment do + duckdb_path = Rails.root.join("storage", "analytics.duckdb") + + unless File.exist?(duckdb_path) + puts "❌ DuckDB database not found at: #{duckdb_path}" + exit 1 + end + + puts "=" * 80 + puts "DuckDB Statistics" + puts "=" * 80 + puts + + total = AnalyticsDuckdbService.instance.event_count + + AnalyticsDuckdbService.instance.with_connection do |conn| + # Bot breakdown + result = conn.query(<<~SQL) + SELECT + is_bot, + COUNT(*) as event_count, + COUNT(DISTINCT ip_address) as unique_ips + FROM events + GROUP BY is_bot + SQL + + puts "📊 Bot Traffic Breakdown:" + result.each do |row| + type = row[0] ? "🤖 Bots" : "👤 Humans" + count = row[1] + ips = row[2] + percentage = (count.to_f / total * 100).round(1) + puts " #{type}: #{count} events (#{percentage}%) from #{ips} unique IPs" + end + puts + + # Date range + range_result = conn.query("SELECT MIN(timestamp), MAX(timestamp) FROM events") + min_ts, max_ts = range_result.first + puts "📅 Date Range:" + puts " Oldest event: #{min_ts}" + puts " Newest event: #{max_ts}" + puts + + # Database info + puts "💾 Database Info:" + puts " Location: #{duckdb_path}" + puts " Size: #{(File.size(duckdb_path) / 1024.0 / 1024.0).round(2)}MB" + puts " Total events: #{total}" + puts + end + end + + desc "Sync new events from PostgreSQL to DuckDB" + task sync: :environment do + puts "🔄 Syncing events from PostgreSQL to DuckDB..." + start_time = Time.current + + begin + SyncEventsToDuckdbJob.perform_now + duration = Time.current - start_time + + puts "✅ Sync complete in #{duration.round(2)}s" + rescue => e + puts "❌ Error syncing: #{e.message}" + exit 1 + end + end +end