Drop add_headers - headers can now be added to meta[] to be applied for any action. Consilidate Tagging in a service
This commit is contained in:
@@ -261,12 +261,6 @@ def process_quick_create_parameters
|
||||
# Ensure metadata is a hash
|
||||
@rule.metadata = {} unless @rule.metadata.is_a?(Hash)
|
||||
|
||||
# Handle add_header fields - use provided params or existing metadata values
|
||||
if @rule.add_header_action? && (params[:header_name].present? || params[:header_value].present?)
|
||||
@rule.metadata['header_name'] = params[:header_name].presence || @rule.metadata['header_name'] || 'X-Bot-Agent'
|
||||
@rule.metadata['header_value'] = params[:header_value].presence || @rule.metadata['header_value'] || 'Unknown'
|
||||
end
|
||||
|
||||
# Handle expires_at parsing for text input
|
||||
if params.dig(:rule, :expires_at).present?
|
||||
expires_at_str = params[:rule][:expires_at].strip
|
||||
|
||||
@@ -226,8 +226,8 @@ class Event < ApplicationRecord
|
||||
# Normalize headers in payload during import phase
|
||||
normalized_payload = normalize_payload_headers(payload)
|
||||
|
||||
# Create the WAF request event
|
||||
create!(
|
||||
# Create the WAF request event with agent-provided tags
|
||||
event = create!(
|
||||
request_id: request_id,
|
||||
timestamp: parse_timestamp(normalized_payload["timestamp"]),
|
||||
payload: normalized_payload,
|
||||
@@ -250,11 +250,18 @@ class Event < ApplicationRecord
|
||||
server_name: normalized_payload["server_name"],
|
||||
environment: normalized_payload["environment"],
|
||||
|
||||
# Tags: start with agent-provided tags only
|
||||
tags: normalized_payload["tags"] || [],
|
||||
|
||||
# WAF agent info
|
||||
agent_version: normalized_payload.dig("agent", "version"),
|
||||
agent_name: normalized_payload.dig("agent", "name")
|
||||
)
|
||||
|
||||
# Apply rule tags using EventTagger service
|
||||
EventTagger.tag_event(event)
|
||||
|
||||
event
|
||||
end
|
||||
|
||||
# Normalize headers in payload to lower case during import phase
|
||||
@@ -347,7 +354,10 @@ class Event < ApplicationRecord
|
||||
|
||||
def tags
|
||||
# Use the dedicated tags column (array), fallback to payload during transition
|
||||
super.presence || (payload&.dig("tags") || [])
|
||||
# Ensure we always return an Array, even if payload has malformed data (e.g., {} instead of [])
|
||||
result = super.presence || payload&.dig("tags")
|
||||
return [] if result.nil?
|
||||
result.is_a?(Array) ? result : []
|
||||
end
|
||||
|
||||
def headers
|
||||
|
||||
@@ -34,8 +34,10 @@ class EventDdb
|
||||
SQL
|
||||
|
||||
# Convert to hash like ActiveRecord .group.count returns
|
||||
# DuckDB returns arrays: [waf_action, count]
|
||||
result.to_a.to_h { |row| [row[0], row[1]] }
|
||||
# DuckDB returns integer enum values, map to string names
|
||||
# 0=deny, 1=allow, 2=redirect, 3=challenge, 4=log
|
||||
action_map = { 0 => "deny", 1 => "allow", 2 => "redirect", 3 => "challenge", 4 => "log" }
|
||||
result.to_a.to_h { |row| [action_map[row[0]] || "unknown", row[1]] }
|
||||
end
|
||||
rescue StandardError => e
|
||||
Rails.logger.error "[EventDdb] Error in breakdown_by_action: #{e.message}"
|
||||
|
||||
@@ -7,7 +7,8 @@
|
||||
class Rule < ApplicationRecord
|
||||
# Rule enums (prefix needed to avoid rate_limit collision)
|
||||
# Canonical WAF action order - aligned with Agent and Event models
|
||||
enum :waf_action, { deny: 0, allow: 1, redirect: 2, challenge: 3, log: 4, add_header: 5 }, prefix: :action
|
||||
# Note: allow and log actions can include headers/tags in metadata for automatic injection
|
||||
enum :waf_action, { deny: 0, allow: 1, redirect: 2, challenge: 3, log: 4 }, prefix: :action
|
||||
enum :waf_rule_type, { network: 0, rate_limit: 1, path_pattern: 2 }, prefix: :type
|
||||
|
||||
SOURCES = %w[manual auto:scanner_detected auto:rate_limit_exceeded auto:bot_detected imported default manual:surgical_block manual:surgical_exception policy].freeze
|
||||
@@ -120,10 +121,6 @@ class Rule < ApplicationRecord
|
||||
action_challenge?
|
||||
end
|
||||
|
||||
def add_header_action?
|
||||
action_add_header?
|
||||
end
|
||||
|
||||
# Redirect/challenge convenience methods
|
||||
def redirect_url
|
||||
metadata_hash['redirect_url']
|
||||
@@ -141,14 +138,6 @@ class Rule < ApplicationRecord
|
||||
metadata&.dig('challenge_message')
|
||||
end
|
||||
|
||||
def header_name
|
||||
metadata&.dig('header_name')
|
||||
end
|
||||
|
||||
def header_value
|
||||
metadata&.dig('header_value')
|
||||
end
|
||||
|
||||
# Tag-related methods
|
||||
def tags
|
||||
metadata_hash['tags'] || []
|
||||
@@ -469,12 +458,6 @@ class Rule < ApplicationRecord
|
||||
if source&.start_with?('auto:') || source == 'default'
|
||||
self.user ||= User.find_by(role: 1) # admin role
|
||||
end
|
||||
|
||||
# Set default header values for add_header action
|
||||
if add_header_action?
|
||||
self.metadata['header_name'] ||= 'X-Bot-Agent'
|
||||
self.metadata['header_value'] ||= 'Unknown'
|
||||
end
|
||||
end
|
||||
|
||||
def calculate_priority_for_network_rules
|
||||
@@ -558,13 +541,6 @@ class Rule < ApplicationRecord
|
||||
if challenge_type_value && !%w[captcha javascript proof_of_work].include?(challenge_type_value)
|
||||
errors.add(:metadata, "challenge_type must be one of: captcha, javascript, proof_of_work")
|
||||
end
|
||||
when "add_header"
|
||||
unless metadata&.dig("header_name").present?
|
||||
errors.add(:metadata, "must include 'header_name' for add_header action")
|
||||
end
|
||||
unless metadata&.dig("header_value").present?
|
||||
errors.add(:metadata, "must include 'header_value' for add_header action")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ class WafPolicy < ApplicationRecord
|
||||
POLICY_TYPES = %w[country asn company network_type path_pattern].freeze
|
||||
|
||||
# Actions - what to do when traffic matches this policy
|
||||
ACTIONS = %w[allow deny redirect challenge add_header].freeze
|
||||
ACTIONS = %w[allow deny redirect challenge log].freeze
|
||||
|
||||
# Associations
|
||||
belongs_to :user
|
||||
@@ -25,7 +25,6 @@ validate :targets_must_be_array
|
||||
validate :validate_targets_by_type
|
||||
validate :validate_redirect_configuration, if: :redirect_policy_action?
|
||||
validate :validate_challenge_configuration, if: :challenge_policy_action?
|
||||
validate :validate_add_header_configuration, if: :add_header_policy_action?
|
||||
|
||||
# Scopes
|
||||
scope :enabled, -> { where(enabled: true) }
|
||||
@@ -96,10 +95,6 @@ validate :targets_must_be_array
|
||||
policy_action == 'challenge'
|
||||
end
|
||||
|
||||
def add_header_policy_action?
|
||||
policy_action == 'add_header'
|
||||
end
|
||||
|
||||
# Lifecycle methods
|
||||
def active?
|
||||
enabled? && !expired?
|
||||
@@ -168,7 +163,7 @@ validate :targets_must_be_array
|
||||
priority: network_range.prefix_length
|
||||
)
|
||||
|
||||
# Handle redirect/challenge/add_header specific data
|
||||
# Handle redirect/challenge specific data
|
||||
if redirect_action? && additional_data['redirect_url']
|
||||
rule.update!(
|
||||
metadata: rule.metadata.merge(
|
||||
@@ -183,13 +178,6 @@ validate :targets_must_be_array
|
||||
challenge_message: additional_data['challenge_message']
|
||||
)
|
||||
)
|
||||
elsif add_header_action?
|
||||
rule.update!(
|
||||
metadata: rule.metadata.merge(
|
||||
header_name: additional_data['header_name'],
|
||||
header_value: additional_data['header_value']
|
||||
)
|
||||
)
|
||||
end
|
||||
|
||||
rule
|
||||
@@ -224,7 +212,7 @@ validate :targets_must_be_array
|
||||
priority: 50 # Default priority for path rules
|
||||
)
|
||||
|
||||
# Handle redirect/challenge/add_header specific data
|
||||
# Handle redirect/challenge specific data
|
||||
if redirect_action? && additional_data['redirect_url']
|
||||
rule.update!(
|
||||
metadata: rule.metadata.merge(
|
||||
@@ -239,13 +227,6 @@ validate :targets_must_be_array
|
||||
challenge_message: additional_data['challenge_message']
|
||||
)
|
||||
)
|
||||
elsif add_header_action?
|
||||
rule.update!(
|
||||
metadata: rule.metadata.merge(
|
||||
header_name: additional_data['header_name'],
|
||||
header_value: additional_data['header_value']
|
||||
)
|
||||
)
|
||||
end
|
||||
|
||||
rule
|
||||
@@ -365,12 +346,6 @@ validate :targets_must_be_array
|
||||
self.targets ||= []
|
||||
self.additional_data ||= {}
|
||||
self.enabled = true if enabled.nil?
|
||||
|
||||
# Set default header values for add_header action
|
||||
if add_header_policy_action?
|
||||
self.additional_data['header_name'] ||= 'X-Bot-Agent'
|
||||
self.additional_data['header_value'] ||= 'Unknown'
|
||||
end
|
||||
end
|
||||
|
||||
def targets_must_be_array
|
||||
@@ -455,15 +430,6 @@ validate :targets_must_be_array
|
||||
end
|
||||
end
|
||||
|
||||
def validate_add_header_configuration
|
||||
if additional_data['header_name'].blank?
|
||||
errors.add(:additional_data, "must include 'header_name' for add_header action")
|
||||
end
|
||||
if additional_data['header_value'].blank?
|
||||
errors.add(:additional_data, "must include 'header_value' for add_header action")
|
||||
end
|
||||
end
|
||||
|
||||
# Matching logic for different policy types
|
||||
def matches_country?(network_range)
|
||||
country = network_range.country || network_range.inherited_intelligence[:country]
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
<div>
|
||||
<%= form.label :waf_action, "Action", class: "block text-sm font-medium text-gray-700" %>
|
||||
<%= form.select :waf_action,
|
||||
options_for_select([['All', ''], ['Allow', 'allow'], ['Deny', 'deny'], ['Redirect', 'redirect'], ['Challenge', 'challenge'], ['Add Header', 'add_header']], params[:waf_action]),
|
||||
options_for_select([['All', ''], ['Allow', 'allow'], ['Deny', 'deny'], ['Redirect', 'redirect'], ['Challenge', 'challenge'], ['Log', 'log']], params[:waf_action]),
|
||||
{ }, { class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm" } %>
|
||||
</div>
|
||||
<div>
|
||||
|
||||
@@ -159,27 +159,6 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Add Header Fields (shown for add_header action) -->
|
||||
<div id="add_header_section" class="hidden space-y-4" data-rule-form-target="addHeaderSection">
|
||||
<div>
|
||||
<%= label_tag :header_name, "Header Name", class: "block text-sm font-medium text-gray-700" %>
|
||||
<%= text_field_tag :header_name, "",
|
||||
class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm",
|
||||
placeholder: "X-Bot-Agent",
|
||||
id: "header_name_input" %>
|
||||
<p class="mt-2 text-sm text-gray-500">The HTTP header name to add (e.g., X-Bot-Agent, X-Network-Type)</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<%= label_tag :header_value, "Header Value", class: "block text-sm font-medium text-gray-700" %>
|
||||
<%= text_field_tag :header_value, "",
|
||||
class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm",
|
||||
placeholder: "BingBot",
|
||||
id: "header_value_input" %>
|
||||
<p class="mt-2 text-sm text-gray-500">The value for the header (e.g., BingBot, GoogleBot, Unknown)</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Metadata -->
|
||||
<div data-controller="json-validator" data-json-validator-valid-class="json-valid" data-json-validator-invalid-class="json-invalid" data-json-validator-valid-status-class="json-valid-status" data-json-validator-invalid-status-class="json-invalid-status">
|
||||
<%= form.label :metadata, "Metadata", class: "block text-sm font-medium text-gray-700" %>
|
||||
|
||||
6
config/initializers/device_detector.rb
Normal file
6
config/initializers/device_detector.rb
Normal file
@@ -0,0 +1,6 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# Configure DeviceDetector cache
|
||||
# Default is 5,000 entries - we increase to 10,000 for better hit rate
|
||||
# Memory usage: ~1-2MB for 10k cached user agents
|
||||
DeviceDetector.config.max_cache_keys = 10_000
|
||||
127
lib/tasks/duckdb.rake
Normal file
127
lib/tasks/duckdb.rake
Normal file
@@ -0,0 +1,127 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
namespace :duckdb do
|
||||
desc "Rebuild DuckDB analytics database from scratch"
|
||||
task rebuild: :environment do
|
||||
puts "=" * 80
|
||||
puts "DuckDB Rebuild"
|
||||
puts "=" * 80
|
||||
puts
|
||||
|
||||
duckdb_path = Rails.root.join("storage", "analytics.duckdb")
|
||||
|
||||
# Step 1: Check if DuckDB exists
|
||||
if File.exist?(duckdb_path)
|
||||
puts "🗑️ Deleting existing DuckDB database..."
|
||||
File.delete(duckdb_path)
|
||||
puts " ✅ Deleted: #{duckdb_path}"
|
||||
puts
|
||||
else
|
||||
puts "ℹ️ No existing DuckDB database found"
|
||||
puts
|
||||
end
|
||||
|
||||
# Step 2: Rebuild from PostgreSQL
|
||||
puts "🔨 Rebuilding DuckDB from PostgreSQL events..."
|
||||
puts
|
||||
|
||||
start_time = Time.current
|
||||
begin
|
||||
SyncEventsToDuckdbJob.perform_now
|
||||
duration = Time.current - start_time
|
||||
|
||||
# Step 3: Verify the rebuild
|
||||
event_count = AnalyticsDuckdbService.instance.event_count
|
||||
bot_count = AnalyticsDuckdbService.instance.with_connection do |conn|
|
||||
result = conn.query("SELECT COUNT(*) FROM events WHERE is_bot = true")
|
||||
result.first&.first || 0
|
||||
end
|
||||
|
||||
puts "=" * 80
|
||||
puts "✅ DuckDB Rebuild Complete!"
|
||||
puts "=" * 80
|
||||
puts " Duration: #{duration.round(2)}s"
|
||||
puts " Total events synced: #{event_count}"
|
||||
puts " Bot events: #{bot_count} (#{(bot_count.to_f / event_count * 100).round(1)}%)" if event_count > 0
|
||||
puts " Human events: #{event_count - bot_count} (#{((event_count - bot_count).to_f / event_count * 100).round(1)}%)" if event_count > 0
|
||||
puts
|
||||
puts "📂 Database location: #{duckdb_path}"
|
||||
puts "📊 Database size: #{File.size(duckdb_path) / 1024.0 / 1024.0}MB"
|
||||
puts
|
||||
rescue => e
|
||||
puts "❌ Error rebuilding DuckDB: #{e.message}"
|
||||
puts e.backtrace.first(5).join("\n")
|
||||
exit 1
|
||||
end
|
||||
end
|
||||
|
||||
desc "Show DuckDB statistics"
|
||||
task stats: :environment do
|
||||
duckdb_path = Rails.root.join("storage", "analytics.duckdb")
|
||||
|
||||
unless File.exist?(duckdb_path)
|
||||
puts "❌ DuckDB database not found at: #{duckdb_path}"
|
||||
exit 1
|
||||
end
|
||||
|
||||
puts "=" * 80
|
||||
puts "DuckDB Statistics"
|
||||
puts "=" * 80
|
||||
puts
|
||||
|
||||
total = AnalyticsDuckdbService.instance.event_count
|
||||
|
||||
AnalyticsDuckdbService.instance.with_connection do |conn|
|
||||
# Bot breakdown
|
||||
result = conn.query(<<~SQL)
|
||||
SELECT
|
||||
is_bot,
|
||||
COUNT(*) as event_count,
|
||||
COUNT(DISTINCT ip_address) as unique_ips
|
||||
FROM events
|
||||
GROUP BY is_bot
|
||||
SQL
|
||||
|
||||
puts "📊 Bot Traffic Breakdown:"
|
||||
result.each do |row|
|
||||
type = row[0] ? "🤖 Bots" : "👤 Humans"
|
||||
count = row[1]
|
||||
ips = row[2]
|
||||
percentage = (count.to_f / total * 100).round(1)
|
||||
puts " #{type}: #{count} events (#{percentage}%) from #{ips} unique IPs"
|
||||
end
|
||||
puts
|
||||
|
||||
# Date range
|
||||
range_result = conn.query("SELECT MIN(timestamp), MAX(timestamp) FROM events")
|
||||
min_ts, max_ts = range_result.first
|
||||
puts "📅 Date Range:"
|
||||
puts " Oldest event: #{min_ts}"
|
||||
puts " Newest event: #{max_ts}"
|
||||
puts
|
||||
|
||||
# Database info
|
||||
puts "💾 Database Info:"
|
||||
puts " Location: #{duckdb_path}"
|
||||
puts " Size: #{(File.size(duckdb_path) / 1024.0 / 1024.0).round(2)}MB"
|
||||
puts " Total events: #{total}"
|
||||
puts
|
||||
end
|
||||
end
|
||||
|
||||
desc "Sync new events from PostgreSQL to DuckDB"
|
||||
task sync: :environment do
|
||||
puts "🔄 Syncing events from PostgreSQL to DuckDB..."
|
||||
start_time = Time.current
|
||||
|
||||
begin
|
||||
SyncEventsToDuckdbJob.perform_now
|
||||
duration = Time.current - start_time
|
||||
|
||||
puts "✅ Sync complete in #{duration.round(2)}s"
|
||||
rescue => e
|
||||
puts "❌ Error syncing: #{e.message}"
|
||||
exit 1
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user