Fix some blocked/allow laggards after migrating. Add DuckDB for outstanding analyitcs performance. Start adding an import for all bot networks

This commit is contained in:
Dan Milne
2025-11-18 16:40:05 +11:00
parent ef56779584
commit 3f274c842c
37 changed files with 3522 additions and 151 deletions

View File

@@ -23,9 +23,10 @@ class AnalyticsController < ApplicationController
# Cache key includes period and start_time (hour-aligned for consistency)
cache_key_base = "analytics/#{@time_period}/#{@start_time.to_i}"
# Core statistics - cached
# Core statistics - cached (uses DuckDB if available)
@total_events = Rails.cache.fetch("#{cache_key_base}/total_events", expires_in: cache_ttl) do
Event.where("timestamp >= ?", @start_time).count
with_duckdb_fallback { EventDdb.count_since(@start_time) } ||
Event.where("timestamp >= ?", @start_time).count
end
@total_rules = Rails.cache.fetch("analytics/total_rules", expires_in: 5.minutes) do
@@ -40,31 +41,33 @@ class AnalyticsController < ApplicationController
NetworkRange.count
end
# Event breakdown by action - cached
# Event breakdown by action - cached (uses DuckDB if available)
@event_breakdown = Rails.cache.fetch("#{cache_key_base}/event_breakdown", expires_in: cache_ttl) do
Event.where("timestamp >= ?", @start_time)
.group(:waf_action)
.count
# Keys are already strings ("allow", "deny", etc.) from the enum
with_duckdb_fallback { EventDdb.breakdown_by_action(@start_time) } ||
Event.where("timestamp >= ?", @start_time)
.group(:waf_action)
.count
end
# Top countries by event count - cached (now uses denormalized country column)
# Top countries by event count - cached (uses DuckDB if available)
@top_countries = Rails.cache.fetch("#{cache_key_base}/top_countries", expires_in: cache_ttl) do
Event.where("timestamp >= ? AND country IS NOT NULL", @start_time)
.group(:country)
.count
.sort_by { |_, count| -count }
.first(10)
with_duckdb_fallback { EventDdb.top_countries(@start_time, 10) } ||
Event.where("timestamp >= ? AND country IS NOT NULL", @start_time)
.group(:country)
.count
.sort_by { |_, count| -count }
.first(10)
end
# Top blocked IPs - cached
# Top blocked IPs - cached (uses DuckDB if available)
@top_blocked_ips = Rails.cache.fetch("#{cache_key_base}/top_blocked_ips", expires_in: cache_ttl) do
Event.where("timestamp >= ?", @start_time)
.where(waf_action: 1) # deny action in enum
.group(:ip_address)
.count
.sort_by { |_, count| -count }
.first(10)
with_duckdb_fallback { EventDdb.top_blocked_ips(@start_time, 10) } ||
Event.where("timestamp >= ?", @start_time)
.where(waf_action: 0) # deny action in enum
.group(:ip_address)
.count
.sort_by { |_, count| -count }
.first(10)
end
# Network range intelligence breakdown - cached
@@ -92,7 +95,7 @@ class AnalyticsController < ApplicationController
total_users: User.count,
active_rules: Rule.enabled.count,
disabled_rules: Rule.where(enabled: false).count,
recent_errors: Event.where("timestamp >= ? AND waf_action = ?", @start_time, 1).count # 1 = deny
recent_errors: Event.where("timestamp >= ? AND waf_action = ?", @start_time, 0).count # 0 = deny
}
end
@@ -117,38 +120,90 @@ class AnalyticsController < ApplicationController
@time_period = params[:period]&.to_sym || :day
@start_time = calculate_start_time(@time_period)
# Top networks by request volume (using denormalized network_range_id)
# Use a subquery approach to avoid PostgreSQL GROUP BY issues with network_ranges.*
event_stats = Event.where("timestamp >= ?", @start_time)
.where.not(network_range_id: nil)
.group(:network_range_id)
.select("network_range_id, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips")
# Top networks by request volume - use DuckDB if available
network_stats = with_duckdb_fallback { EventDdb.top_networks(@start_time, 50) }
# Join the stats back to NetworkRange to get full network details
@top_networks = NetworkRange.joins("INNER JOIN (#{event_stats.to_sql}) stats ON stats.network_range_id = network_ranges.id")
.select("network_ranges.*, stats.event_count, stats.unique_ips")
.order("stats.event_count DESC")
.limit(50)
if network_stats
# DuckDB path: array format [network_range_id, event_count, unique_ips]
network_ids = network_stats.map { |row| row[0] }
stats_by_id = network_stats.to_h { |row| [row[0], { event_count: row[1], unique_ips: row[2] }] }
@top_networks = NetworkRange.where(id: network_ids)
.to_a
.map do |network|
stats = stats_by_id[network.id]
network.define_singleton_method(:event_count) { stats[:event_count] }
network.define_singleton_method(:unique_ips) { stats[:unique_ips] }
# Add inherited intelligence support
intelligence = network.inherited_intelligence
if intelligence[:inherited]
network.define_singleton_method(:display_company) { intelligence[:company] }
network.define_singleton_method(:display_country) { intelligence[:country] }
network.define_singleton_method(:inherited_from) { intelligence[:parent_cidr] }
network.define_singleton_method(:has_inherited_data?) { true }
else
network.define_singleton_method(:display_company) { network.company }
network.define_singleton_method(:display_country) { network.country }
network.define_singleton_method(:inherited_from) { nil }
network.define_singleton_method(:has_inherited_data?) { false }
end
network
end
.sort_by { |n| -n.event_count }
else
# PostgreSQL fallback
event_stats = Event.where("timestamp >= ?", @start_time)
.where.not(network_range_id: nil)
.group(:network_range_id)
.select("network_range_id, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips")
@top_networks = NetworkRange.joins("INNER JOIN (#{event_stats.to_sql}) stats ON stats.network_range_id = network_ranges.id")
.select("network_ranges.*, stats.event_count, stats.unique_ips")
.order("stats.event_count DESC")
.limit(50)
# Add inherited intelligence support for PostgreSQL fallback
@top_networks = @top_networks.to_a.map do |network|
intelligence = network.inherited_intelligence
if intelligence[:inherited]
network.define_singleton_method(:display_company) { intelligence[:company] }
network.define_singleton_method(:display_country) { intelligence[:country] }
network.define_singleton_method(:inherited_from) { intelligence[:parent_cidr] }
network.define_singleton_method(:has_inherited_data?) { true }
else
network.define_singleton_method(:display_company) { network.company }
network.define_singleton_method(:display_country) { network.country }
network.define_singleton_method(:inherited_from) { nil }
network.define_singleton_method(:has_inherited_data?) { false }
end
network
end
end
# Network type breakdown with traffic stats
@network_breakdown = calculate_network_type_stats(@start_time)
# Company breakdown for top traffic sources (using denormalized company column)
@top_companies = Event.where("timestamp >= ? AND company IS NOT NULL", @start_time)
# Company breakdown for top traffic sources - use DuckDB if available
@top_companies = with_duckdb_fallback { EventDdb.top_companies(@start_time, 20) } ||
Event.where("timestamp >= ? AND company IS NOT NULL", @start_time)
.group(:company)
.select("company, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips, COUNT(DISTINCT network_range_id) as network_count")
.order("event_count DESC")
.limit(20)
# ASN breakdown (using denormalized asn columns)
@top_asns = Event.where("timestamp >= ? AND asn IS NOT NULL", @start_time)
# ASN breakdown - use DuckDB if available
@top_asns = with_duckdb_fallback { EventDdb.top_asns(@start_time, 15) } ||
Event.where("timestamp >= ? AND asn IS NOT NULL", @start_time)
.group(:asn, :asn_org)
.select("asn, asn_org, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips, COUNT(DISTINCT network_range_id) as network_count")
.order("event_count DESC")
.limit(15)
# Geographic breakdown (using denormalized country column)
@top_countries = Event.where("timestamp >= ? AND country IS NOT NULL", @start_time)
# Geographic breakdown - use DuckDB if available
@top_countries = with_duckdb_fallback { EventDdb.top_countries_with_stats(@start_time, 15) } ||
Event.where("timestamp >= ? AND country IS NOT NULL", @start_time)
.group(:country)
.select("country, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips")
.order("event_count DESC")
@@ -191,12 +246,15 @@ class AnalyticsController < ApplicationController
# Historical hours are cached for full TTL, current hour cached briefly for freshness
# Cache historical hours (1-23 hours ago) - these are complete and won't change
# No expiration - will stick around until evicted by cache store
# No expiration - will stick around until evicted by cache store (uses DuckDB if available)
historical_timeline = Rails.cache.fetch("#{cache_key_base}/chart_historical") do
historical_start = 23.hours.ago.beginning_of_hour
events_by_hour = Event.where("timestamp >= ? AND timestamp < ?", historical_start, Time.current.beginning_of_hour)
.group("DATE_TRUNC('hour', timestamp)")
.count
current_hour_start = Time.current.beginning_of_hour
events_by_hour = with_duckdb_fallback { EventDdb.hourly_timeline(historical_start, current_hour_start) } ||
Event.where("timestamp >= ? AND timestamp < ?", historical_start, current_hour_start)
.group("DATE_TRUNC('hour', timestamp)")
.count
(1..23).map do |hour_ago|
hour_time = hour_ago.hours.ago.beginning_of_hour
@@ -209,6 +267,7 @@ class AnalyticsController < ApplicationController
end
# Current hour (0 hours ago) - cache very briefly since it's actively accumulating
# ALWAYS use PostgreSQL for current hour to get real-time data (DuckDB syncs every minute)
current_hour_data = Rails.cache.fetch("#{cache_key_base}/chart_current_hour", expires_in: 1.minute) do
hour_time = Time.current.beginning_of_hour
count = Event.where("timestamp >= ?", hour_time).count
@@ -290,6 +349,12 @@ class AnalyticsController < ApplicationController
end
def calculate_network_type_stats(start_time)
# Try DuckDB first, fallback to PostgreSQL
duckdb_stats = with_duckdb_fallback { EventDdb.network_type_stats(start_time) }
return duckdb_stats if duckdb_stats
# PostgreSQL fallback
# Get all network types with their traffic statistics using denormalized columns
network_types = [
{ type: 'datacenter', label: 'Datacenter', column: :is_datacenter },
@@ -333,6 +398,12 @@ class AnalyticsController < ApplicationController
end
def calculate_suspicious_patterns(start_time)
# Try DuckDB first, fallback to PostgreSQL
duckdb_patterns = with_duckdb_fallback { EventDdb.suspicious_patterns(start_time) }
return duckdb_patterns if duckdb_patterns
# PostgreSQL fallback
patterns = {}
# High volume networks (top 1% by request count) - using denormalized network_range_id
@@ -358,9 +429,9 @@ class AnalyticsController < ApplicationController
high_deny_networks = Event.where("timestamp >= ? AND network_range_id IS NOT NULL", start_time)
.group(:network_range_id)
.select("network_range_id,
COUNT(CASE WHEN waf_action = 1 THEN 1 END) as denied_count,
COUNT(CASE WHEN waf_action = 0 THEN 1 END) as denied_count,
COUNT(*) as total_count")
.having("COUNT(CASE WHEN waf_action = 1 THEN 1 END)::float / COUNT(*) > 0.5")
.having("COUNT(CASE WHEN waf_action = 0 THEN 1 END)::float / COUNT(*) > 0.5")
.having("COUNT(*) >= 10") # minimum threshold
patterns[:high_deny_rate] = {
@@ -392,12 +463,14 @@ class AnalyticsController < ApplicationController
{
id: network.id,
cidr: network.cidr,
company: network.company,
company: network.display_company,
asn: network.asn,
country: network.country,
country: network.display_country,
network_type: network.network_type,
event_count: network.event_count,
unique_ips: network.unique_ips
unique_ips: network.unique_ips,
has_inherited_data: network.has_inherited_data?,
inherited_from: network.inherited_from
}
},
network_breakdown: @network_breakdown,
@@ -449,4 +522,27 @@ class AnalyticsController < ApplicationController
}
end
end
# Helper method to try DuckDB first, fall back to PostgreSQL
def with_duckdb_fallback(&block)
result = yield
result.nil? ? nil : result # Return result or nil to trigger fallback
rescue StandardError => e
Rails.logger.warn "[Analytics] DuckDB query failed, falling back to PostgreSQL: #{e.message}"
nil # Return nil to trigger fallback
end
# Check if DuckDB has recent data (within last 2 minutes)
# Returns true if DuckDB is up-to-date, false if potentially stale
def duckdb_is_fresh?
newest = AnalyticsDuckdbService.instance.newest_event_timestamp
return false if newest.nil?
# Consider fresh if newest event is within 2 minutes
# (sync job runs every 1 minute, so 2 minutes allows for some lag)
newest >= 2.minutes.ago
rescue StandardError => e
Rails.logger.warn "[Analytics] Error checking DuckDB freshness: #{e.message}"
false
end
end

View File

@@ -0,0 +1,126 @@
# frozen_string_literal: true
class BotNetworkRangesController < ApplicationController
before_action :authenticate_user!
before_action :require_admin
def index
@bot_sources = BotNetworkRangeImporter::BOT_SOURCES
@recent_imports = DataImport.where(import_type: 'bot_network_ranges').order(created_at: :desc).limit(10)
@bot_network_ranges = NetworkRange.where("source LIKE 'bot_import_%'").order(created_at: :desc).limit(50)
end
def import
source_key = params[:source]
options = import_options
if source_key.present?
# Perform import synchronously for immediate feedback
begin
result = BotNetworkRangeImporter.import_from_source(source_key, options)
# Create a data import record
DataImport.create!(
import_type: 'bot_network_ranges',
source: source_key.to_s,
status: 'completed',
records_processed: result[:imported],
notes: "Imported from #{result[:source]}: #{result[:note] || 'Success'}"
)
flash[:notice] = "Successfully imported #{result[:imported]} ranges from #{result[:source]}"
rescue => e
flash[:alert] = "Failed to import from #{source_key}: #{e.message}"
end
else
flash[:alert] = "Please select a source to import from"
end
redirect_to bot_network_ranges_path
end
def import_async
source_key = params[:source]
options = import_options
if source_key.present?
# Create a data import record for tracking
data_import = DataImport.create!(
import_type: 'bot_network_ranges',
source: source_key.to_s,
status: 'pending',
records_processed: 0,
notes: "Import job queued for #{source_key}"
)
# Queue the background job
ImportBotNetworkRangesJob.perform_later(source_key, options.merge(data_import_id: data_import.id))
flash[:notice] = "Import job queued for #{source_key}. You'll be notified when it's complete."
else
flash[:alert] = "Please select a source to import from"
end
redirect_to bot_network_ranges_path
end
def import_all
options = import_options
# Create a data import record for batch import
data_import = DataImport.create!(
import_type: 'bot_network_ranges',
source: 'all_sources',
status: 'pending',
records_processed: 0,
notes: "Batch import job queued for all available sources"
)
# Queue the batch import job
ImportAllBotNetworkRangesJob.perform_later(options.merge(data_import_id: data_import.id))
flash[:notice] = "Batch import job queued for all sources. This may take several minutes."
redirect_to bot_network_ranges_path
end
def show
@network_ranges = NetworkRange.where("source LIKE 'bot_import_#{params[:source]}%'")
.order(created_at: :desc)
.page(params[:page])
.per(50)
@source_name = BotNetworkRangeImporter::BOT_SOURCES[params[:source].to_sym]&.dig(:name) || params[:source]
@import_stats = NetworkRange.where("source LIKE 'bot_import_#{params[:source]}%'")
.group(:source)
.count
end
def destroy
source = params[:source]
deleted_count = NetworkRange.where("source LIKE 'bot_import_#{source}%'").delete_all
flash[:notice] = "Deleted #{deleted_count} network ranges from #{source}"
redirect_to bot_network_ranges_path
end
private
def require_admin
redirect_to root_path, alert: 'Admin access required' unless current_user&.admin?
end
def import_options
options = {}
# AWS-specific options
if params[:aws_services].present?
options[:aws_services] = params[:aws_services].split(',').map(&:strip)
end
# Batch size control
options[:batch_size] = params[:batch_size].to_i if params[:batch_size].present?
options[:batch_size] = 1000 if options[:batch_size].zero?
options
end
end

View File

@@ -46,8 +46,10 @@ class NetworkRangesController < ApplicationController
authorize @network_range
if @network_range.persisted?
# Real network - use direct IP containment for consistency with stats
events_scope = Event.where("ip_address <<= ?", @network_range.cidr).recent
# Real network - use indexed network_range_id for much better performance
# Include child network ranges to capture all traffic within this network block
network_ids = [@network_range.id] + @network_range.child_ranges.pluck(:id)
events_scope = Event.where(network_range_id: network_ids).recent
else
# Virtual network - find events by IP range containment
events_scope = Event.where("ip_address <<= ?::inet", @network_range.to_s).recent
@@ -58,22 +60,24 @@ class NetworkRangesController < ApplicationController
@child_ranges = @network_range.child_ranges.limit(20)
@parent_ranges = @network_range.parent_ranges.limit(10)
@associated_rules = @network_range.persisted? ? @network_range.rules.includes(:user).order(created_at: :desc) : []
@associated_rules = @network_range.persisted? ? @network_range.rules.includes(:user, :network_range, :waf_policy).order(created_at: :desc) : []
# Load rules from supernets and subnets
@supernet_rules = @network_range.persisted? ? @network_range.supernet_rules.includes(:network_range, :user).limit(10) : []
@subnet_rules = @network_range.persisted? ? @network_range.child_rules.includes(:network_range, :user).limit(20) : []
@supernet_rules = @network_range.persisted? ? @network_range.supernet_rules.includes(:network_range, :user, :waf_policy).limit(10) : []
@subnet_rules = @network_range.persisted? ? @network_range.child_rules.includes(:network_range, :user, :waf_policy).limit(20) : []
# Traffic analytics (if we have events)
@traffic_stats = calculate_traffic_stats(@network_range)
# Check if we have IPAPI data (or if parent has it)
# Check if we have IPAPI data (or if parent has it) - cache expensive parent lookup
@has_ipapi_data = @network_range.has_network_data_from?(:ipapi)
@parent_with_ipapi = nil
unless @has_ipapi_data
# Check if parent has IPAPI data
parent = @network_range.parent_with_intelligence
# Cache expensive parent intelligence lookup
parent = Rails.cache.fetch("network_parent_intel:#{@network_range.cache_key}", expires_in: 1.hour) do
@network_range.parent_with_intelligence
end
if parent&.has_network_data_from?(:ipapi)
@parent_with_ipapi = parent
@has_ipapi_data = true
@@ -194,6 +198,15 @@ class NetworkRangesController < ApplicationController
private
# Helper method to try DuckDB first, fall back to PostgreSQL
def with_duckdb_fallback(&block)
result = yield
result.nil? ? nil : result # Return result or nil to trigger fallback
rescue StandardError => e
Rails.logger.warn "[NetworkRanges] DuckDB query failed, falling back to PostgreSQL: #{e.message}"
nil # Return nil to trigger fallback
end
def set_network_range
# Handle CIDR slugs (e.g., "40.77.167.100_32" -> "40.77.167.100/32")
cidr = params[:id].gsub('_', '/')
@@ -248,27 +261,37 @@ class NetworkRangesController < ApplicationController
# Use indexed network_range_id for much better performance instead of expensive CIDR operator
# Include child network ranges to capture all traffic within this network block
network_ids = [network_range.id] + network_range.child_ranges.pluck(:id)
base_query = Event.where(network_range_id: network_ids)
# Use separate queries: one for grouping (without ordering), one for recent activity (with ordering)
events_for_grouping = base_query.limit(1000)
events_for_activity = base_query.recent.limit(20)
# Try DuckDB first for stats (much faster)
duckdb_stats = with_duckdb_fallback { EventDdb.network_traffic_stats(network_ids) }
duckdb_top_paths = with_duckdb_fallback { EventDdb.network_top_paths(network_ids, 10) }
duckdb_top_agents = with_duckdb_fallback { EventDdb.network_top_user_agents(network_ids, 5) }
# Calculate counts properly - use consistent base_query for all counts
total_requests = base_query.count
unique_ips = base_query.except(:order).distinct.count(:ip_address)
blocked_requests = base_query.blocked.count
allowed_requests = base_query.allowed.count
if duckdb_stats
# DuckDB success - use fast aggregated stats
stats = duckdb_stats.merge(
top_paths: duckdb_top_paths&.to_h || {},
top_user_agents: duckdb_top_agents&.to_h || {},
recent_activity: Event.where(network_range_id: network_ids).recent.limit(20)
)
else
# PostgreSQL fallback
base_query = Event.where(network_range_id: network_ids)
events_for_grouping = base_query.limit(1000)
events_for_activity = base_query.recent.limit(20)
{
total_requests: total_requests,
unique_ips: unique_ips,
blocked_requests: blocked_requests,
allowed_requests: allowed_requests,
top_paths: events_for_grouping.group(:request_path).count.sort_by { |_, count| -count }.first(10),
top_user_agents: events_for_grouping.group(:user_agent).count.sort_by { |_, count| -count }.first(5),
recent_activity: events_for_activity
}
stats = {
total_requests: base_query.count,
unique_ips: base_query.except(:order).distinct.count(:ip_address),
blocked_requests: base_query.blocked.count,
allowed_requests: base_query.allowed.count,
top_paths: events_for_grouping.group(:request_path).count.sort_by { |_, count| -count }.first(10).to_h,
top_user_agents: events_for_grouping.group(:user_agent).count.sort_by { |_, count| -count }.first(5).to_h,
recent_activity: events_for_activity
}
end
stats
else
# No events - return empty stats
{
@@ -296,8 +319,8 @@ class NetworkRangesController < ApplicationController
unique_ips: base_query.except(:order).distinct.count(:ip_address),
blocked_requests: base_query.blocked.count,
allowed_requests: base_query.allowed.count,
top_paths: events_for_grouping.group(:request_path).count.sort_by { |_, count| -count }.first(10),
top_user_agents: events_for_grouping.group(:user_agent).count.sort_by { |_, count| -count }.first(5),
top_paths: events_for_grouping.group(:request_path).count.sort_by { |_, count| -count }.first(10).to_h,
top_user_agents: events_for_grouping.group(:user_agent).count.sort_by { |_, count| -count }.first(5).to_h,
recent_activity: events_for_activity
}
else

View File

@@ -46,12 +46,9 @@ class RulesController < ApplicationController
process_quick_create_parameters
# Handle network range creation if CIDR is provided
if params[:cidr].present? && @rule.network_rule?
network_range = NetworkRange.find_or_create_by(cidr: params[:cidr]) do |range|
range.user = Current.user
range.source = 'manual'
range.creation_reason = "Created for rule ##{@rule.id}"
end
cidr_param = params[:new_cidr].presence || params[:cidr].presence
if cidr_param.present? && @rule.network_rule?
network_range = NetworkRange.find_or_create_by_cidr(cidr_param, user: Current.user, source: 'manual')
@rule.network_range = network_range
end
@@ -132,7 +129,9 @@ class RulesController < ApplicationController
:expires_at,
:enabled,
:source,
:network_range_id
:network_range_id,
:header_name,
:header_value
]
# Only include conditions for non-network rules
@@ -250,15 +249,24 @@ def process_quick_create_parameters
})
end
# Parse metadata if it's a string that looks like JSON
if @rule.metadata.is_a?(String) && @rule.metadata.starts_with?('{')
# Parse metadata textarea first if it's JSON
if @rule.metadata.is_a?(String) && @rule.metadata.present? && @rule.metadata.starts_with?('{')
begin
@rule.metadata = JSON.parse(@rule.metadata)
rescue JSON::ParserError
# Keep as string if not valid JSON
# Keep as string if not valid JSON - will be caught by validation
end
end
# Ensure metadata is a hash
@rule.metadata = {} unless @rule.metadata.is_a?(Hash)
# Handle add_header fields - use provided params or existing metadata values
if @rule.add_header_action? && (params[:header_name].present? || params[:header_value].present?)
@rule.metadata['header_name'] = params[:header_name].presence || @rule.metadata['header_name'] || 'X-Bot-Agent'
@rule.metadata['header_value'] = params[:header_value].presence || @rule.metadata['header_value'] || 'Unknown'
end
# Handle expires_at parsing for text input
if params.dig(:rule, :expires_at).present?
expires_at_str = params[:rule][:expires_at].strip

View File

@@ -0,0 +1,36 @@
import { Controller } from "@hotwired/stimulus"
export default class RuleFormController extends Controller {
static targets = ["actionSelect", "addHeaderSection", "expirationCheckbox", "expirationField"]
connect() {
this.updateActionSections()
}
updateActionSections() {
const selectedAction = this.actionSelectTarget.value
// Hide all action-specific sections
this.addHeaderSectionTarget.classList.add('hidden')
// Show relevant section based on action
switch(selectedAction) {
case 'add_header':
this.addHeaderSectionTarget.classList.remove('hidden')
break
}
}
toggleExpiration() {
if (this.expirationCheckboxTarget.checked) {
this.expirationFieldTarget.classList.remove('hidden')
} else {
this.expirationFieldTarget.classList.add('hidden')
// Clear the datetime field when unchecked
const datetimeInput = this.expirationFieldTarget.querySelector('input[type="datetime-local"]')
if (datetimeInput) {
datetimeInput.value = ''
}
}
}
}

View File

@@ -0,0 +1,52 @@
# frozen_string_literal: true
# CleanupOldEventsJob - Removes events older than the configured retention period
#
# This job runs periodically (hourly) to clean up old events based on the
# event_retention_days setting. This helps keep the database size manageable
# and improves query performance.
#
# The retention period is configurable via the 'event_retention_days' setting
# (default: 90 days). This allows administrators to balance between historical
# data retention and database performance.
#
# Schedule: Every hour (configured in config/recurring.yml)
class CleanupOldEventsJob < ApplicationJob
queue_as :background
def perform
retention_days = Setting.event_retention_days
# Don't delete if retention is set to 0 or negative (disabled)
if retention_days <= 0
Rails.logger.info "CleanupOldEventsJob: Event retention disabled (retention_days: #{retention_days})"
return 0
end
cutoff_date = retention_days.days.ago
# Count events to be deleted
old_events = Event.where('timestamp < ?', cutoff_date)
count = old_events.count
if count.zero?
Rails.logger.info "CleanupOldEventsJob: No events older than #{retention_days} days found"
return 0
end
Rails.logger.info "CleanupOldEventsJob: Deleting #{count} events older than #{retention_days} days (before #{cutoff_date})"
# Delete in batches to avoid long-running transactions
deleted_count = 0
batch_size = 10_000
old_events.in_batches(of: batch_size) do |batch|
batch_count = batch.delete_all
deleted_count += batch_count
Rails.logger.info "CleanupOldEventsJob: Deleted batch of #{batch_count} events (total: #{deleted_count}/#{count})"
end
Rails.logger.info "CleanupOldEventsJob: Successfully deleted #{deleted_count} events"
deleted_count
end
end

View File

@@ -15,31 +15,14 @@ class FetchIpapiDataJob < ApplicationJob
ipapi_data = Ipapi.lookup(sample_ip)
if ipapi_data.present? && !ipapi_data.key?('error')
# Check if IPAPI returned a different route than our tracking network
ipapi_route = ipapi_data.dig('asn', 'route')
target_network = tracking_network
# Process IPAPI data and create network ranges
result = Ipapi.process_ipapi_data(ipapi_data, tracking_network)
if ipapi_route.present? && ipapi_route != tracking_network.cidr
# IPAPI returned a different CIDR - find or create that network range
Rails.logger.info "IPAPI returned different route: #{ipapi_route} (requested: #{tracking_network.cidr})"
# Mark the tracking network as having been queried
# Use the broadest CIDR returned for deduplication
tracking_network.mark_ipapi_queried!(result[:broadest_cidr])
target_network = NetworkRange.find_or_create_by(network: ipapi_route) do |nr|
nr.source = 'api_imported'
nr.creation_reason = "Created from IPAPI lookup for #{tracking_network.cidr}"
end
Rails.logger.info "Storing IPAPI data on correct network: #{target_network.cidr}"
end
# Store data on the target network (wherever IPAPI said it belongs)
target_network.set_network_data(:ipapi, ipapi_data)
target_network.last_api_fetch = Time.current
target_network.save!
# Mark the tracking network as having been queried, with the CIDR that was returned
tracking_network.mark_ipapi_queried!(target_network.cidr)
Rails.logger.info "Successfully fetched IPAPI data for #{tracking_network.cidr} (stored on #{target_network.cidr})"
Rails.logger.info "Successfully fetched IPAPI data for #{tracking_network.cidr} (created #{result[:networks].length} networks)"
# Broadcast to the tracking network
broadcast_ipapi_update(tracking_network, ipapi_data)

View File

@@ -0,0 +1,26 @@
# frozen_string_literal: true
# ImportAllBotNetworkRangesJob - Background job for importing from all bot sources
class ImportAllBotNetworkRangesJob < ApplicationJob
queue_as :default
def perform(options = {})
Rails.logger.info "Starting batch import of all bot network ranges"
results = BotNetworkRangeImporter.import_all_sources(options)
# Send completion summary
Rails.logger.info "Batch import completed. Summary: #{results}"
# Broadcast summary to clients
ActionCable.server.broadcast(
"bot_imports",
{
type: 'batch_summary',
status: 'completed',
results: results,
message: "Batch import completed for all sources"
}
)
end
end

View File

@@ -0,0 +1,47 @@
# frozen_string_literal: true
# ImportBotNetworkRangesJob - Background job for importing bot network ranges
#
# Imports network ranges from official bot provider sources.
# Runs asynchronously to avoid blocking the web interface.
class ImportBotNetworkRangesJob < ApplicationJob
queue_as :default
def perform(source_key, options = {})
Rails.logger.info "Starting bot network range import for source: #{source_key}"
begin
result = BotNetworkRangeImporter.import_from_source(source_key, options)
# Send notification or log completion
Rails.logger.info "Successfully imported #{result[:imported]} ranges from #{result[:source]}"
# Optionally broadcast via Turbo Streams for real-time updates
ActionCable.server.broadcast(
"bot_imports",
{
source: source_key,
status: 'completed',
imported: result[:imported],
message: "Successfully imported #{result[:imported]} ranges from #{result[:source]}"
}
)
rescue => e
Rails.logger.error "Bot network range import failed for #{source_key}: #{e.message}"
# Broadcast error notification
ActionCable.server.broadcast(
"bot_imports",
{
source: source_key,
status: 'error',
error: e.message,
message: "Failed to import from #{source_key}: #{e.message}"
}
)
raise e
end
end
end

View File

@@ -53,16 +53,15 @@ class ProcessWafEventJob < ApplicationJob
# Queue IPAPI enrichment based on /24 tracking
# The tracking network is the /24 that stores ipapi_queried_at
if NetworkRange.should_fetch_ipapi_for_ip?(event.ip_address)
# Use tracking network for fetch status to avoid race conditions
if tracking_network.is_fetching_api_data?(:ipapi)
Rails.logger.info "Skipping IPAPI fetch for #{tracking_network.cidr} - already being fetched"
else
tracking_network.mark_as_fetching_api_data!(:ipapi)
# Atomically mark as fetching - this prevents duplicate jobs via database lock
if tracking_network.mark_as_fetching_api_data!(:ipapi)
Rails.logger.info "Queueing IPAPI fetch for IP #{event.ip_address} (tracking network: #{tracking_network.cidr})"
FetchIpapiDataJob.perform_later(network_range_id: tracking_network.id)
else
Rails.logger.info "Skipping IPAPI fetch for #{tracking_network.cidr} - another job already started"
end
else
Rails.logger.debug "Skipping IPAPI fetch for IP #{event.ip_address} - already queried recently"
Rails.logger.debug "Skipping IPAPI fetch for IP #{event.ip_address} - already queried or being fetched"
end
# Evaluate WAF policies inline if needed (lazy evaluation)

View File

@@ -0,0 +1,89 @@
# frozen_string_literal: true
# Background job to sync events from PostgreSQL to DuckDB
# Runs every 5 minutes to keep analytics database up-to-date
# Uses watermark tracking to only sync new events
class SyncEventsToDuckdbJob < ApplicationJob
queue_as :default
# Key for storing last sync timestamp in Rails cache
WATERMARK_CACHE_KEY = "duckdb_last_sync_time"
WATERMARK_TTL = 1.week
# Overlap window to catch late-arriving events
SYNC_OVERLAP = 1.minute
def perform
service = AnalyticsDuckdbService.instance
# Determine where to start syncing
from_timestamp = determine_sync_start_time(service)
Rails.logger.info "[DuckDB Sync] Starting sync from #{from_timestamp}"
# Sync new events using PostgreSQL cursor + DuckDB Appender
# (setup_schema is called internally within sync_new_events)
count = service.sync_new_events(from_timestamp)
# Update watermark if we synced any events
if count > 0
update_last_sync_time
Rails.logger.info "[DuckDB Sync] Successfully synced #{count} events"
else
Rails.logger.info "[DuckDB Sync] No new events to sync"
end
rescue StandardError => e
Rails.logger.error "[DuckDB Sync] Job failed: #{e.message}"
Rails.logger.error e.backtrace.join("\n")
raise # Re-raise to mark job as failed in Solid Queue
end
private
# Determine timestamp to start syncing from
# Strategy:
# 1. First run (DuckDB empty): sync from oldest PostgreSQL event
# 2. Subsequent runs: sync from last watermark with overlap
def determine_sync_start_time(service)
oldest_duckdb = service.oldest_event_timestamp
if oldest_duckdb.nil?
# DuckDB is empty - this is the first sync
# Start from oldest PostgreSQL event (or reasonable cutoff)
oldest_pg = Event.minimum(:timestamp)
if oldest_pg.nil?
# No events in PostgreSQL at all
Rails.logger.warn "[DuckDB Sync] No events found in PostgreSQL"
1.day.ago # Default to recent window
else
Rails.logger.info "[DuckDB Sync] First sync - starting from oldest event: #{oldest_pg}"
oldest_pg
end
else
# DuckDB has data - sync from last watermark with overlap
last_sync = Rails.cache.read(WATERMARK_CACHE_KEY)
if last_sync.nil?
# Watermark not in cache (maybe cache expired or restarted)
# Fall back to newest event in DuckDB
newest_duckdb = service.newest_event_timestamp
start_time = newest_duckdb ? newest_duckdb - SYNC_OVERLAP : oldest_duckdb
Rails.logger.info "[DuckDB Sync] Watermark not found, using newest DuckDB event: #{start_time}"
start_time
else
# Normal case: use watermark with overlap to catch late arrivals
start_time = last_sync - SYNC_OVERLAP
Rails.logger.debug "[DuckDB Sync] Using watermark: #{last_sync} (with #{SYNC_OVERLAP}s overlap)"
start_time
end
end
end
# Update last sync watermark in cache
def update_last_sync_time
now = Time.current
Rails.cache.write(WATERMARK_CACHE_KEY, now, expires_in: WATERMARK_TTL)
Rails.logger.debug "[DuckDB Sync] Updated watermark to #{now}"
end
end

View File

@@ -10,6 +10,17 @@ class Event < ApplicationRecord
# Enums for fixed value sets
# Canonical WAF action order - aligned with Rule and Agent models
#
# IMPORTANT: These values were swapped to match baffle-agent convention:
# - deny: 0 (blocked traffic)
# - allow: 1 (allowed traffic)
#
# When using raw integer values in queries:
# - waf_action = 0 -> denied/blocked requests
# - waf_action = 1 -> allowed requests
# - waf_action = 2 -> redirect requests
# - waf_action = 3 -> challenge requests
# - waf_action = 4 -> log-only requests
enum :waf_action, {
deny: 0, # deny/block
allow: 1, # allow/pass
@@ -341,11 +352,11 @@ class Event < ApplicationRecord
end
def blocked?
waf_action.in?(['block', 'deny'])
waf_action == 'deny' # deny = 0
end
def allowed?
waf_action.in?(['allow', 'pass'])
waf_action == 'allow' # allow = 1
end
def logged?

499
app/models/event_ddb.rb Normal file
View File

@@ -0,0 +1,499 @@
# frozen_string_literal: true
require 'ostruct'
# EventDdb - DuckDB-backed analytics queries for events
# Provides an ActiveRecord-like interface for querying DuckDB events table
# Falls back to PostgreSQL Event model if DuckDB is unavailable
class EventDdb
class << self
# Get DuckDB service
def service
AnalyticsDuckdbService.instance
end
# Total events since timestamp
def count_since(start_time)
service.with_connection do |conn|
result = conn.query("SELECT COUNT(*) as count FROM events WHERE timestamp >= ?", start_time)
result.first&.first || 0
end
rescue StandardError => e
Rails.logger.error "[EventDdb] Error in count_since: #{e.message}"
nil # Fallback to PostgreSQL
end
# Event breakdown by WAF action
def breakdown_by_action(start_time)
service.with_connection do |conn|
result = conn.query(<<~SQL, start_time)
SELECT waf_action, COUNT(*) as count
FROM events
WHERE timestamp >= ?
GROUP BY waf_action
SQL
# Convert to hash like ActiveRecord .group.count returns
result.to_a.to_h { |row| [row["waf_action"], row["count"]] }
end
rescue StandardError => e
Rails.logger.error "[EventDdb] Error in breakdown_by_action: #{e.message}"
nil
end
# Top countries with event counts
def top_countries(start_time, limit = 10)
service.with_connection do |conn|
result = conn.query(<<~SQL, start_time, limit)
SELECT country, COUNT(*) as count
FROM events
WHERE timestamp >= ? AND country IS NOT NULL
GROUP BY country
ORDER BY count DESC
LIMIT ?
SQL
# Return array of [country, count] tuples like ActiveRecord
result.to_a.map { |row| [row["country"], row["count"]] }
end
rescue StandardError => e
Rails.logger.error "[EventDdb] Error in top_countries: #{e.message}"
nil
end
# Top blocked IPs
def top_blocked_ips(start_time, limit = 10)
service.with_connection do |conn|
result = conn.query(<<~SQL, start_time, limit)
SELECT ip_address, COUNT(*) as count
FROM events
WHERE timestamp >= ? AND waf_action = 0
GROUP BY ip_address
ORDER BY count DESC
LIMIT ?
SQL
result.to_a.map { |row| [row["ip_address"], row["count"]] }
end
rescue StandardError => e
Rails.logger.error "[EventDdb] Error in top_blocked_ips: #{e.message}"
nil
end
# Hourly timeline aggregation
def hourly_timeline(start_time, end_time)
service.with_connection do |conn|
result = conn.query(<<~SQL, start_time, end_time)
SELECT
DATE_TRUNC('hour', timestamp) as hour,
COUNT(*) as count
FROM events
WHERE timestamp >= ? AND timestamp < ?
GROUP BY hour
ORDER BY hour
SQL
# Convert to hash with Time keys like ActiveRecord
result.to_a.to_h { |row| [row["hour"], row["count"]] }
end
rescue StandardError => e
Rails.logger.error "[EventDdb] Error in hourly_timeline: #{e.message}"
nil
end
# Top networks by traffic volume
# Returns array of arrays: [network_range_id, event_count, unique_ips]
def top_networks(start_time, limit = 50)
service.with_connection do |conn|
result = conn.query(<<~SQL, start_time, limit)
SELECT
network_range_id,
COUNT(*) as event_count,
COUNT(DISTINCT ip_address) as unique_ips
FROM events
WHERE timestamp >= ? AND network_range_id IS NOT NULL
GROUP BY network_range_id
ORDER BY event_count DESC
LIMIT ?
SQL
result.to_a
end
rescue StandardError => e
Rails.logger.error "[EventDdb] Error in top_networks: #{e.message}"
nil
end
# Top companies
# Returns array of OpenStruct objects with: company, event_count, unique_ips, network_count
def top_companies(start_time, limit = 20)
service.with_connection do |conn|
result = conn.query(<<~SQL, start_time, limit)
SELECT
company,
COUNT(*) as event_count,
COUNT(DISTINCT ip_address) as unique_ips,
COUNT(DISTINCT network_range_id) as network_count
FROM events
WHERE timestamp >= ? AND company IS NOT NULL
GROUP BY company
ORDER BY event_count DESC
LIMIT ?
SQL
# Convert arrays to OpenStruct for attribute access
result.to_a.map do |row|
OpenStruct.new(
company: row[0],
event_count: row[1],
unique_ips: row[2],
network_count: row[3]
)
end
end
rescue StandardError => e
Rails.logger.error "[EventDdb] Error in top_companies: #{e.message}"
nil
end
# Top ASNs
# Returns array of OpenStruct objects with: asn, asn_org, event_count, unique_ips, network_count
def top_asns(start_time, limit = 15)
service.with_connection do |conn|
result = conn.query(<<~SQL, start_time, limit)
SELECT
asn,
asn_org,
COUNT(*) as event_count,
COUNT(DISTINCT ip_address) as unique_ips,
COUNT(DISTINCT network_range_id) as network_count
FROM events
WHERE timestamp >= ? AND asn IS NOT NULL
GROUP BY asn, asn_org
ORDER BY event_count DESC
LIMIT ?
SQL
# Convert arrays to OpenStruct for attribute access
result.to_a.map do |row|
OpenStruct.new(
asn: row[0],
asn_org: row[1],
event_count: row[2],
unique_ips: row[3],
network_count: row[4]
)
end
end
rescue StandardError => e
Rails.logger.error "[EventDdb] Error in top_asns: #{e.message}"
nil
end
# Network type breakdown (datacenter, VPN, proxy, standard)
# Returns hash with network_type as key and hash of stats as value
def network_type_breakdown(start_time)
service.with_connection do |conn|
result = conn.query(<<~SQL, start_time)
SELECT
CASE
WHEN is_datacenter THEN 'datacenter'
WHEN is_vpn THEN 'vpn'
WHEN is_proxy THEN 'proxy'
ELSE 'standard'
END as network_type,
COUNT(*) as event_count,
COUNT(DISTINCT ip_address) as unique_ips,
COUNT(DISTINCT network_range_id) as network_count
FROM events
WHERE timestamp >= ?
GROUP BY network_type
SQL
# Convert arrays to hash: network_type => { event_count, unique_ips, network_count }
result.to_a.to_h do |row|
[
row[0], # network_type
{
"event_count" => row[1],
"unique_ips" => row[2],
"network_count" => row[3]
}
]
end
end
rescue StandardError => e
Rails.logger.error "[EventDdb] Error in network_type_breakdown: #{e.message}"
nil
end
# Top countries with detailed stats (event count and unique IPs)
# Returns array of OpenStruct objects with: country, event_count, unique_ips
def top_countries_with_stats(start_time, limit = 15)
service.with_connection do |conn|
result = conn.query(<<~SQL, start_time, limit)
SELECT
country,
COUNT(*) as event_count,
COUNT(DISTINCT ip_address) as unique_ips
FROM events
WHERE timestamp >= ? AND country IS NOT NULL
GROUP BY country
ORDER BY event_count DESC
LIMIT ?
SQL
# Convert arrays to OpenStruct for attribute access
result.to_a.map do |row|
OpenStruct.new(
country: row[0],
event_count: row[1],
unique_ips: row[2]
)
end
end
rescue StandardError => e
Rails.logger.error "[EventDdb] Error in top_countries_with_stats: #{e.message}"
nil
end
# Network type stats with formatted output matching controller expectations
# Returns hash with type keys containing label, networks, events, unique_ips, percentage
def network_type_stats(start_time)
service.with_connection do |conn|
# Get total events for percentage calculation
total_result = conn.query("SELECT COUNT(*) as total FROM events WHERE timestamp >= ?", start_time)
total_events = total_result.first&.first || 0
# Get breakdown by network type
breakdown = network_type_breakdown(start_time)
return nil unless breakdown
# Format results with labels and percentages
results = {}
{
'datacenter' => 'Datacenter',
'vpn' => 'VPN',
'proxy' => 'Proxy',
'standard' => 'Standard'
}.each do |type, label|
stats = breakdown[type]
event_count = stats ? stats["event_count"] : 0
results[type] = {
label: label,
networks: stats ? stats["network_count"] : 0,
events: event_count,
unique_ips: stats ? stats["unique_ips"] : 0,
percentage: total_events > 0 ? ((event_count.to_f / total_events) * 100).round(1) : 0
}
end
results
end
rescue StandardError => e
Rails.logger.error "[EventDdb] Error in network_type_stats: #{e.message}"
nil
end
# Network range traffic statistics
# Returns comprehensive stats for a given network range ID(s)
def network_traffic_stats(network_range_ids)
network_range_ids = Array(network_range_ids)
return nil if network_range_ids.empty?
service.with_connection do |conn|
# Build IN clause with placeholders
placeholders = network_range_ids.map { "?" }.join(", ")
# Get all stats in a single query
result = conn.query(<<~SQL, *network_range_ids)
SELECT
COUNT(*) as total_requests,
COUNT(DISTINCT ip_address) as unique_ips,
SUM(CASE WHEN waf_action = 0 THEN 1 ELSE 0 END) as blocked_requests,
SUM(CASE WHEN waf_action = 1 THEN 1 ELSE 0 END) as allowed_requests
FROM events
WHERE network_range_id IN (#{placeholders})
SQL
stats_row = result.first
return nil unless stats_row
{
total_requests: stats_row[0] || 0,
unique_ips: stats_row[1] || 0,
blocked_requests: stats_row[2] || 0,
allowed_requests: stats_row[3] || 0
}
end
rescue StandardError => e
Rails.logger.error "[EventDdb] Error in network_traffic_stats: #{e.message}"
nil
end
# Top paths for network range(s)
def network_top_paths(network_range_ids, limit = 10)
network_range_ids = Array(network_range_ids)
return nil if network_range_ids.empty?
service.with_connection do |conn|
# Build IN clause with placeholders
placeholders = network_range_ids.map { "?" }.join(", ")
result = conn.query(<<~SQL, *network_range_ids, limit)
SELECT
request_path,
COUNT(*) as count
FROM events
WHERE network_range_id IN (#{placeholders})
AND request_path IS NOT NULL
GROUP BY request_path
ORDER BY count DESC
LIMIT ?
SQL
result.to_a.map { |row| [row[0], row[1]] }
end
rescue StandardError => e
Rails.logger.error "[EventDdb] Error in network_top_paths: #{e.message}"
nil
end
# Top user agents for network range(s)
def network_top_user_agents(network_range_ids, limit = 5)
network_range_ids = Array(network_range_ids)
return nil if network_range_ids.empty?
service.with_connection do |conn|
# Build IN clause with placeholders
placeholders = network_range_ids.map { "?" }.join(", ")
result = conn.query(<<~SQL, *network_range_ids, limit)
SELECT
user_agent,
COUNT(*) as count
FROM events
WHERE network_range_id IN (#{placeholders})
AND user_agent IS NOT NULL
GROUP BY user_agent
ORDER BY count DESC
LIMIT ?
SQL
result.to_a.map { |row| [row[0], row[1]] }
end
rescue StandardError => e
Rails.logger.error "[EventDdb] Error in network_top_user_agents: #{e.message}"
nil
end
# Full user agent tally for network range(s)
# Returns hash of user_agent => count for all agents in the network
def network_agent_tally(network_range_ids)
network_range_ids = Array(network_range_ids)
return nil if network_range_ids.empty?
service.with_connection do |conn|
# Build IN clause with placeholders
placeholders = network_range_ids.map { "?" }.join(", ")
result = conn.query(<<~SQL, *network_range_ids)
SELECT
user_agent,
COUNT(*) as count
FROM events
WHERE network_range_id IN (#{placeholders})
AND user_agent IS NOT NULL
GROUP BY user_agent
SQL
# Convert to hash matching Ruby .tally format
result.to_a.to_h { |row| [row[0], row[1]] }
end
rescue StandardError => e
Rails.logger.error "[EventDdb] Error in network_agent_tally: #{e.message}"
nil
end
# Suspicious network activity patterns
# Detects high-volume networks, high deny rates, and distributed companies
def suspicious_patterns(start_time)
service.with_connection do |conn|
# High volume networks (5x average)
avg_query = conn.query(<<~SQL, start_time)
SELECT
AVG(event_count) as avg_events
FROM (
SELECT network_range_id, COUNT(*) as event_count
FROM events
WHERE timestamp >= ? AND network_range_id IS NOT NULL
GROUP BY network_range_id
) network_stats
SQL
avg_events = avg_query.first&.first || 0
threshold = avg_events * 5
high_volume = conn.query(<<~SQL, start_time, threshold)
SELECT
network_range_id,
COUNT(*) as event_count
FROM events
WHERE timestamp >= ? AND network_range_id IS NOT NULL
GROUP BY network_range_id
HAVING COUNT(*) > ?
ORDER BY event_count DESC
SQL
# High deny rate networks (>50% blocked, min 10 requests)
high_deny = conn.query(<<~SQL, start_time)
SELECT
network_range_id,
SUM(CASE WHEN waf_action = 0 THEN 1 ELSE 0 END) as denied_count,
COUNT(*) as total_count
FROM events
WHERE timestamp >= ? AND network_range_id IS NOT NULL
GROUP BY network_range_id
HAVING CAST(SUM(CASE WHEN waf_action = 0 THEN 1 ELSE 0 END) AS FLOAT) / COUNT(*) > 0.5
AND COUNT(*) >= 10
ORDER BY denied_count DESC
SQL
# Distributed companies (appearing with 5+ unique IPs)
distributed_companies = conn.query(<<~SQL, start_time)
SELECT
company,
COUNT(DISTINCT ip_address) as ip_count
FROM events
WHERE timestamp >= ? AND company IS NOT NULL
GROUP BY company
HAVING COUNT(DISTINCT ip_address) > 5
ORDER BY ip_count DESC
LIMIT 10
SQL
{
high_volume: {
count: high_volume.to_a.length,
networks: high_volume.to_a.map { |row| row[0] } # network_range_id
},
high_deny_rate: {
count: high_deny.to_a.length,
network_ids: high_deny.to_a.map { |row| row[0] } # network_range_id
},
distributed_companies: distributed_companies.to_a.map { |row|
{
company: row[0], # company name
subnets: row[1] # ip_count
}
}
}
end
rescue StandardError => e
Rails.logger.error "[EventDdb] Error in suspicious_patterns: #{e.message}"
nil
end
end
end

View File

@@ -158,13 +158,26 @@ class NetworkRange < ApplicationRecord
end
def mark_as_fetching_api_data!(source)
self.network_data ||= {}
self.network_data['fetching_status'] ||= {}
self.network_data['fetching_status'][source.to_s] = {
'started_at' => Time.current.to_f,
'job_id' => SecureRandom.hex(8)
}
save!
# Use database-level locking to prevent race conditions
transaction do
# Reload with lock to get fresh data
lock!
# Double-check that we're not already fetching
if is_fetching_api_data?(source)
Rails.logger.info "Another job already started fetching #{source} for #{cidr}"
return false
end
self.network_data ||= {}
self.network_data['fetching_status'] ||= {}
self.network_data['fetching_status'][source.to_s] = {
'started_at' => Time.current.to_f,
'job_id' => SecureRandom.hex(8)
}
save!
true
end
end
def clear_fetching_status!(source)
@@ -222,9 +235,29 @@ class NetworkRange < ApplicationRecord
end
def agent_tally
# Rails.cache.fetch("#{to_s}:agent_tally", expires_in: 5.minutes) do
events.map(&:user_agent).tally
# end
Rails.cache.fetch("#{cache_key}:agent_tally", expires_in: 5.minutes) do
# Use DuckDB for fast agent tally instead of loading all events into memory
if persisted? && events_count > 0
# Include child network ranges to capture all traffic within this network block
network_ids = [id] + child_ranges.pluck(:id)
# Try DuckDB first for much faster aggregation
duckdb_tally = with_duckdb_fallback { EventDdb.network_agent_tally(network_ids) }
duckdb_tally || {}
else
# Virtual network - fallback to PostgreSQL CIDR query
events.map(&:user_agent).tally
end
end
end
# Helper method to try DuckDB first, fall back to PostgreSQL
def with_duckdb_fallback(&block)
result = yield
result.nil? ? nil : result # Return result or nil to trigger fallback
rescue StandardError => e
Rails.logger.warn "[NetworkRange] DuckDB query failed, falling back to PostgreSQL: #{e.message}"
nil # Return nil to trigger fallback
end
# Geographic lookup
@@ -334,6 +367,9 @@ class NetworkRange < ApplicationRecord
def self.should_fetch_ipapi_for_ip?(ip_address)
tracking_network = find_or_create_tracking_network_for_ip(ip_address)
# Check if currently being fetched (prevents duplicate jobs)
return false if tracking_network.is_fetching_api_data?(:ipapi)
# Check if /24 has been queried recently
queried_at = tracking_network.network_data&.dig('ipapi_queried_at')
return true if queried_at.nil?

View File

@@ -7,7 +7,7 @@
class Rule < ApplicationRecord
# Rule enums (prefix needed to avoid rate_limit collision)
# Canonical WAF action order - aligned with Agent and Event models
enum :waf_action, { deny: 0, allow: 1, redirect: 2, challenge: 3, log: 4 }, prefix: :action
enum :waf_action, { deny: 0, allow: 1, redirect: 2, challenge: 3, log: 4, add_header: 5 }, prefix: :action
enum :waf_rule_type, { network: 0, rate_limit: 1, path_pattern: 2 }, prefix: :type
SOURCES = %w[manual auto:scanner_detected auto:rate_limit_exceeded auto:bot_detected imported default manual:surgical_block manual:surgical_exception policy].freeze
@@ -120,6 +120,10 @@ class Rule < ApplicationRecord
action_challenge?
end
def add_header_action?
action_add_header?
end
# Redirect/challenge convenience methods
def redirect_url
metadata_hash['redirect_url']
@@ -137,6 +141,14 @@ class Rule < ApplicationRecord
metadata&.dig('challenge_message')
end
def header_name
metadata&.dig('header_name')
end
def header_value
metadata&.dig('header_value')
end
def related_surgical_rules
if surgical_block?
# Find the corresponding exception rule
@@ -421,6 +433,12 @@ class Rule < ApplicationRecord
if source&.start_with?('auto:') || source == 'default'
self.user ||= User.find_by(role: 1) # admin role
end
# Set default header values for add_header action
if add_header_action?
self.metadata['header_name'] ||= 'X-Bot-Agent'
self.metadata['header_value'] ||= 'Unknown'
end
end
def calculate_priority_for_network_rules
@@ -504,6 +522,13 @@ class Rule < ApplicationRecord
if challenge_type_value && !%w[captcha javascript proof_of_work].include?(challenge_type_value)
errors.add(:metadata, "challenge_type must be one of: captcha, javascript, proof_of_work")
end
when "add_header"
unless metadata&.dig("header_name").present?
errors.add(:metadata, "must include 'header_name' for add_header action")
end
unless metadata&.dig("header_value").present?
errors.add(:metadata, "must include 'header_value' for add_header action")
end
end
end

View File

@@ -9,7 +9,7 @@ class WafPolicy < ApplicationRecord
POLICY_TYPES = %w[country asn company network_type path_pattern].freeze
# Actions - what to do when traffic matches this policy
ACTIONS = %w[allow deny redirect challenge].freeze
ACTIONS = %w[allow deny redirect challenge add_header].freeze
# Associations
belongs_to :user
@@ -25,6 +25,7 @@ validate :targets_must_be_array
validate :validate_targets_by_type
validate :validate_redirect_configuration, if: :redirect_policy_action?
validate :validate_challenge_configuration, if: :challenge_policy_action?
validate :validate_add_header_configuration, if: :add_header_policy_action?
# Scopes
scope :enabled, -> { where(enabled: true) }
@@ -95,6 +96,10 @@ validate :targets_must_be_array
policy_action == 'challenge'
end
def add_header_policy_action?
policy_action == 'add_header'
end
# Lifecycle methods
def active?
enabled? && !expired?
@@ -163,7 +168,7 @@ validate :targets_must_be_array
priority: network_range.prefix_length
)
# Handle redirect/challenge specific data
# Handle redirect/challenge/add_header specific data
if redirect_action? && additional_data['redirect_url']
rule.update!(
metadata: rule.metadata.merge(
@@ -178,6 +183,13 @@ validate :targets_must_be_array
challenge_message: additional_data['challenge_message']
)
)
elsif add_header_action?
rule.update!(
metadata: rule.metadata.merge(
header_name: additional_data['header_name'],
header_value: additional_data['header_value']
)
)
end
rule
@@ -212,7 +224,7 @@ validate :targets_must_be_array
priority: 50 # Default priority for path rules
)
# Handle redirect/challenge specific data
# Handle redirect/challenge/add_header specific data
if redirect_action? && additional_data['redirect_url']
rule.update!(
metadata: rule.metadata.merge(
@@ -227,6 +239,13 @@ validate :targets_must_be_array
challenge_message: additional_data['challenge_message']
)
)
elsif add_header_action?
rule.update!(
metadata: rule.metadata.merge(
header_name: additional_data['header_name'],
header_value: additional_data['header_value']
)
)
end
rule
@@ -346,6 +365,12 @@ validate :targets_must_be_array
self.targets ||= []
self.additional_data ||= {}
self.enabled = true if enabled.nil?
# Set default header values for add_header action
if add_header_policy_action?
self.additional_data['header_name'] ||= 'X-Bot-Agent'
self.additional_data['header_value'] ||= 'Unknown'
end
end
def targets_must_be_array
@@ -430,6 +455,15 @@ validate :targets_must_be_array
end
end
def validate_add_header_configuration
if additional_data['header_name'].blank?
errors.add(:additional_data, "must include 'header_name' for add_header action")
end
if additional_data['header_value'].blank?
errors.add(:additional_data, "must include 'header_value' for add_header action")
end
end
# Matching logic for different policy types
def matches_country?(network_range)
country = network_range.country || network_range.inherited_intelligence[:country]

View File

@@ -0,0 +1,284 @@
# frozen_string_literal: true
# Service for managing DuckDB analytics database
# Provides fast analytical queries on events data using columnar storage
class AnalyticsDuckdbService
include Singleton
DUCKDB_PATH = Rails.root.join("storage", "analytics.duckdb").to_s
BATCH_SIZE = 10_000
# Execute block with connection, ensuring database and connection are closed afterward
def with_connection
db = DuckDB::Database.open(DUCKDB_PATH)
conn = db.connect
yield conn
ensure
conn&.close
db&.close
end
# Create events table if it doesn't exist (must be called within with_connection block)
def setup_schema(conn)
conn.execute(<<~SQL)
CREATE TABLE IF NOT EXISTS events (
id BIGINT PRIMARY KEY,
timestamp TIMESTAMP NOT NULL,
ip_address VARCHAR,
network_range_id BIGINT,
country VARCHAR,
company VARCHAR,
asn INTEGER,
asn_org VARCHAR,
is_datacenter BOOLEAN,
is_vpn BOOLEAN,
is_proxy BOOLEAN,
waf_action INTEGER,
request_path VARCHAR,
user_agent VARCHAR
)
SQL
Rails.logger.info "[DuckDB] Schema setup complete"
end
# Get timestamp of oldest event in DuckDB
# Returns nil if table is empty
def oldest_event_timestamp
with_connection do |conn|
result = conn.query("SELECT MIN(timestamp) as oldest FROM events")
first_row = result.first
first_row&.first # Returns the value or nil
end
rescue StandardError => e
Rails.logger.error "[DuckDB] Error getting oldest timestamp: #{e.message}"
nil
end
# Get timestamp of newest event in DuckDB
# Returns nil if table is empty
def newest_event_timestamp
with_connection do |conn|
result = conn.query("SELECT MAX(timestamp) as newest FROM events")
first_row = result.first
first_row&.first # Returns the value or nil
end
rescue StandardError => e
Rails.logger.error "[DuckDB] Error getting newest timestamp: #{e.message}"
nil
end
# Get maximum event ID already synced to DuckDB
def max_synced_id
with_connection do |conn|
result = conn.query("SELECT COALESCE(MAX(id), 0) as max_id FROM events")
first_row = result.first
first_row&.first || 0
end
rescue StandardError => e
Rails.logger.error "[DuckDB] Error getting max ID: #{e.message}"
0
end
# Sync new events from PostgreSQL to DuckDB
# Uses PostgreSQL cursor for memory-efficient streaming
# Uses Appender API for fast bulk inserts
# Filters by ID to avoid duplicates
def sync_new_events(from_timestamp)
total_synced = 0
with_connection do |conn|
# Ensure table exists
setup_schema(conn)
# Get max ID already in DuckDB to avoid duplicates
max_id_result = conn.query("SELECT COALESCE(MAX(id), 0) as max_id FROM events")
max_id = max_id_result.first&.first || 0
Rails.logger.info "[DuckDB] Syncing events from #{from_timestamp}, max_id=#{max_id}"
start_time = Time.current
appender = nil
batch_count = 0
begin
# Use PostgreSQL cursor for memory-efficient streaming
Event.where("timestamp >= ? AND id > ?", from_timestamp, max_id)
.select(
:id,
:timestamp,
:ip_address,
:network_range_id,
:country,
:company,
:asn,
:asn_org,
:is_datacenter,
:is_vpn,
:is_proxy,
:waf_action,
:request_path,
:user_agent
)
.order(:id)
.each_row(block_size: BATCH_SIZE) do |event_data|
# Create new appender for each batch
if batch_count % BATCH_SIZE == 0
appender&.close # Close previous appender
appender = conn.appender("events")
end
# Unpack event data from cursor row (Hash from each_row)
begin
appender.append_row(
event_data["id"],
event_data["timestamp"],
event_data["ip_address"]&.to_s,
event_data["network_range_id"],
event_data["country"],
event_data["company"],
event_data["asn"],
event_data["asn_org"],
event_data["is_datacenter"],
event_data["is_vpn"],
event_data["is_proxy"],
event_data["waf_action"],
event_data["request_path"],
event_data["user_agent"]
)
rescue StandardError => e
Rails.logger.error "[DuckDB] Error appending event #{event_data['id']}: #{e.message}"
Rails.logger.error "[DuckDB] event_data = #{event_data.inspect}"
raise
end
batch_count += 1
total_synced += 1
# Log progress every BATCH_SIZE events
if batch_count % BATCH_SIZE == 0
Rails.logger.info "[DuckDB] Synced batch (total: #{total_synced} events)"
end
end
# Close final appender
appender&.close
duration = Time.current - start_time
rate = total_synced / duration if duration > 0
Rails.logger.info "[DuckDB] Sync complete: #{total_synced} events in #{duration.round(2)}s (~#{rate&.round(0)} events/sec)"
rescue StandardError => e
appender&.close rescue nil # Ensure appender is closed on error
Rails.logger.error "[DuckDB] Error syncing events: #{e.message}"
Rails.logger.error e.backtrace.join("\n")
raise # Re-raise to be caught by outer rescue
end
end
total_synced
rescue StandardError => e
Rails.logger.error "[DuckDB] Sync failed: #{e.message}"
0
end
# Execute analytical query on DuckDB
def query(sql, *params)
with_connection do |conn|
conn.query(sql, *params)
end
rescue StandardError => e
Rails.logger.error "[DuckDB] Query error: #{e.message}"
Rails.logger.error "SQL: #{sql}"
raise
end
# Get event count in DuckDB
def event_count
with_connection do |conn|
result = conn.query("SELECT COUNT(*) as count FROM events")
first_row = result.first
first_row&.first || 0
end
rescue StandardError => e
Rails.logger.error "[DuckDB] Error getting event count: #{e.message}"
0
end
# Analytics query: Total events since timestamp
def total_events_since(start_time)
with_connection do |conn|
result = conn.query("SELECT COUNT(*) as count FROM events WHERE timestamp >= ?", start_time)
result.first&.first || 0
end
end
# Analytics query: Event breakdown by WAF action
def event_breakdown_by_action(start_time)
with_connection do |conn|
result = conn.query(<<~SQL, start_time)
SELECT waf_action, COUNT(*) as count
FROM events
WHERE timestamp >= ?
GROUP BY waf_action
SQL
# Convert to hash like PostgreSQL returns
result.to_a.to_h { |row| [row["waf_action"], row["count"]] }
end
end
# Analytics query: Top countries
def top_countries(start_time, limit = 10)
with_connection do |conn|
result = conn.query(<<~SQL, start_time, limit)
SELECT country, COUNT(*) as count
FROM events
WHERE timestamp >= ? AND country IS NOT NULL
GROUP BY country
ORDER BY count DESC
LIMIT ?
SQL
result.to_a.map { |row| [row["country"], row["count"]] }
end
end
# Analytics query: Top blocked IPs
def top_blocked_ips(start_time, limit = 10)
with_connection do |conn|
result = conn.query(<<~SQL, start_time, limit)
SELECT ip_address, COUNT(*) as count
FROM events
WHERE timestamp >= ? AND waf_action = 0
GROUP BY ip_address
ORDER BY count DESC
LIMIT ?
SQL
result.to_a.map { |row| [row["ip_address"], row["count"]] }
end
end
# Analytics query: Hourly timeline (events grouped by hour)
def hourly_timeline(start_time, end_time)
with_connection do |conn|
result = conn.query(<<~SQL, start_time, end_time)
SELECT
DATE_TRUNC('hour', timestamp) as hour,
COUNT(*) as count
FROM events
WHERE timestamp >= ? AND timestamp < ?
GROUP BY hour
ORDER BY hour
SQL
# Convert to hash with Time keys like PostgreSQL
result.to_a.to_h { |row| [row["hour"], row["count"]] }
end
end
# Close DuckDB connection (for cleanup/testing)
def close
@connection&.close
@connection = nil
end
end

View File

@@ -0,0 +1,573 @@
# frozen_string_literal: true
# BotNetworkRangeImporter - Service for importing official bot network ranges
#
# Imports network ranges from official bot provider sources like:
# - Amazon AWS: https://ip-ranges.amazonaws.com/ip-ranges.json
# - Google: Official crawler IP lists
# - Microsoft/Bing: Bot network ranges
# - Anthropic: Service network ranges
# - OpenAI: Service network ranges
class BotNetworkRangeImporter
class ImportError < StandardError; end
# Official sources for bot network ranges
BOT_SOURCES = {
amazon_aws: {
name: 'Amazon AWS',
url: 'https://ip-ranges.amazonaws.com/ip-ranges.json',
format: :json,
parser: :parse_aws_ranges,
description: 'Official AWS IP ranges including Amazonbot and other services'
},
google: {
name: 'Google',
# Note: These URLs may need to be updated based on current Google documentation
urls: [
'https://developers.google.com/search/docs/files/googlebot.json',
'https://developers.google.com/search/docs/files/special-crawlers.json'
],
format: :json,
parser: :parse_google_ranges,
description: 'Googlebot and other Google crawler IP ranges'
},
microsoft_bing: {
name: 'Microsoft Bing',
# Note: Microsoft may require web scraping or API access
url: 'https://www.bing.com/toolbox/bingbot.json',
format: :json,
parser: :parse_microsoft_ranges,
description: 'Bingbot and other Microsoft crawler IP ranges'
},
anthropic: {
name: 'Anthropic Claude',
# Note: Anthropic ranges may need manual updates or different approach
url: 'https://docs.anthropic.com/claude/reference/ip_ranges',
format: :html,
parser: :parse_anthropic_ranges,
description: 'Anthropic Claude API service IP ranges'
},
openai_searchbot: {
name: 'OpenAI SearchBot',
url: 'https://openai.com/searchbot.json',
format: :json,
parser: :parse_openai_ranges,
description: 'OpenAI SearchBot for ChatGPT search features'
},
openai_chatgpt_user: {
name: 'OpenAI ChatGPT-User',
url: 'https://openai.com/chatgpt-user.json',
format: :json,
parser: :parse_openai_ranges,
description: 'OpenAI ChatGPT-User for user actions in ChatGPT and Custom GPTs'
},
openai_gptbot: {
name: 'OpenAI GPTBot',
url: 'https://openai.com/gptbot.json',
format: :json,
parser: :parse_openai_ranges,
description: 'OpenAI GPTBot for training AI foundation models'
},
cloudflare: {
name: 'Cloudflare',
urls: [
'https://www.cloudflare.com/ips-v4',
'https://www.cloudflare.com/ips-v6'
],
format: :text,
parser: :parse_cloudflare_ranges,
description: 'Cloudflare network ranges including their crawlers and services'
},
facebook: {
name: 'Facebook/Meta',
url: 'https://developers.facebook.com/docs/sharing/webmasters/crawler/',
format: :html,
parser: :parse_facebook_ranges,
description: 'Facebook/Meta crawlers and bots'
},
applebot: {
name: 'Applebot',
url: 'https://support.apple.com/en-us/HT204683',
format: :html,
parser: :parse_applebot_ranges,
description: 'Applebot crawler for Apple search and Siri'
},
duckduckgo: {
name: 'DuckDuckBot',
url: 'https://help.duckduckgo.com/duckduckgo-help-pages/results/duckduckbot/',
format: :html,
parser: :parse_duckduckgo_ranges,
description: 'DuckDuckGo search crawler'
}
}.freeze
def self.import_from_source(source_key, options = {})
source = BOT_SOURCES[source_key.to_sym]
raise ImportError, "Unknown source: #{source_key}" unless source
puts "Importing bot network ranges from #{source[:name]}..."
case source[:parser]
when :parse_aws_ranges
parse_aws_ranges(source, options)
when :parse_google_ranges
parse_google_ranges(source, options)
when :parse_microsoft_ranges
parse_microsoft_ranges(source, options)
when :parse_anthropic_ranges
parse_anthropic_ranges(source, options)
when :parse_openai_ranges
parse_openai_ranges(source, options)
when :parse_cloudflare_ranges
parse_cloudflare_ranges(source, options)
when :parse_facebook_ranges
parse_facebook_ranges(source, options)
when :parse_applebot_ranges
parse_applebot_ranges(source, options)
when :parse_duckduckgo_ranges
parse_duckduckgo_ranges(source, options)
else
raise ImportError, "Unknown parser: #{source[:parser]}"
end
end
def self.import_all_sources(options = {})
results = {}
BOT_SOURCES.each do |source_key, source|
puts "\n" + "="*50
puts "Processing #{source[:name]}..."
puts "="*50
begin
results[source_key] = import_from_source(source_key, options)
rescue => e
Rails.logger.error "Failed to import from #{source[:name]}: #{e.message}"
results[source_key] = { error: e.message, imported: 0 }
end
end
puts "\n" + "="*50
puts "Import Summary"
puts "="*50
results.each do |source, result|
if result[:error]
puts "#{source}: FAILED - #{result[:error]}"
else
puts "#{source}: SUCCESS - #{result[:imported]} ranges imported"
end
end
results
end
private
# Amazon AWS IP ranges parser
def self.parse_aws_ranges(source, options = {})
require 'net/http'
require 'uri'
uri = URI.parse(source[:url])
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.read_timeout = 30
response = http.get(uri.request_uri)
raise ImportError, "Failed to fetch AWS IP ranges: #{response.code}" unless response.code == '200'
data = JSON.parse(response.body)
imported_count = 0
batch_size = options[:batch_size] || 1000
batch = []
# Filter for relevant services (can be customized)
relevant_services = options[:aws_services] || ['AMAZON', 'ROUTE53', 'EC2', 'CLOUDFRONT']
data['prefixes'].each do |prefix|
# Focus on relevant services and regions
next unless relevant_services.include?(prefix['service'])
network_range = {
network: prefix['ip_prefix'],
source: 'bot_import_amazon_aws',
asn: nil, # AWS doesn't provide ASN in this feed
asn_org: 'Amazon Web Services',
company: 'Amazon',
country: nil,
is_datacenter: true,
is_proxy: false,
is_vpn: false,
additional_data: {
aws_service: prefix['service'],
aws_region: prefix['region'],
aws_network_border_group: prefix['network_border_group'],
import_date: Time.current.iso8601
}.to_json
}
batch << network_range
if batch.size >= batch_size
imported_count += import_batch(batch, 'Amazon AWS')
batch = []
puts "Imported #{imported_count} AWS ranges..."
end
end
# Import remaining records
if batch.any?
imported_count += import_batch(batch, 'Amazon AWS')
end
puts "Amazon AWS import completed: #{imported_count} ranges imported"
{ imported: imported_count, source: 'Amazon AWS' }
rescue Net::TimeoutError, Net::OpenTimeout => e
raise ImportError, "Network timeout while fetching AWS ranges: #{e.message}"
rescue JSON::ParserError => e
raise ImportError, "Failed to parse AWS JSON response: #{e.message}"
end
# Google crawler IP ranges parser
def self.parse_google_ranges(source, options = {})
imported_count = 0
# Try each potential URL
urls = Array(source[:urls] || source[:url])
urls.each do |url|
begin
puts "Attempting to fetch Google ranges from: #{url}"
uri = URI.parse(url)
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.read_timeout = 30
response = http.get(uri.request_uri)
next unless response.code == '200'
data = JSON.parse(response.body)
batch_size = options[:batch_size] || 1000
batch = []
# Parse Google crawler format (varies by file type)
if data.is_a?(Array)
data.each do |entry|
next unless entry['cidr'] || entry['prefix']
network_range = {
network: entry['cidr'] || entry['prefix'],
source: 'bot_import_google',
asn: nil,
asn_org: 'Google LLC',
company: 'Google',
country: nil,
is_datacenter: true,
is_proxy: false,
is_vpn: false,
additional_data: {
crawler_type: entry['crawler_type'] || 'unknown',
user_agent: entry['user_agent'],
import_date: Time.current.iso8601
}.to_json
}
batch << network_range
if batch.size >= batch_size
imported_count += import_batch(batch, 'Google')
batch = []
puts "Imported #{imported_count} Google ranges..."
end
end
end
# Import remaining records
if batch.any?
imported_count += import_batch(batch, 'Google')
end
puts "Google import completed: #{imported_count} ranges imported"
return { imported: imported_count, source: 'Google' }
rescue => e
Rails.logger.warn "Failed to fetch from #{url}: #{e.message}"
next
end
end
raise ImportError, "Failed to fetch Google crawler ranges from any URL"
end
# Microsoft Bing crawler IP ranges parser
def self.parse_microsoft_ranges(source, options = {})
# Microsoft requires special handling as they may not provide direct JSON
# This is a placeholder implementation
puts "Microsoft Bing crawler import requires manual configuration or web scraping"
puts "Refer to: https://www.bing.com/webmaster/help/which-crawlers-does-bing-use"
{
imported: 0,
source: 'Microsoft Bing',
note: 'Manual configuration required - Microsoft does not provide direct IP range feeds'
}
end
# Anthropic service IP ranges parser
def self.parse_anthropic_ranges(source, options = {})
# Anthropic ranges may need to be manually configured
# This is a placeholder implementation
puts "Anthropic Claude service ranges require manual configuration"
puts "Refer to: https://docs.anthropic.com/claude/reference/ip_ranges"
{
imported: 0,
source: 'Anthropic',
note: 'Manual configuration required - Anthropic does not provide automated IP range feeds'
}
end
# OpenAI crawler IP ranges parser
def self.parse_openai_ranges(source, options = {})
require 'net/http'
require 'uri'
uri = URI.parse(source[:url])
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.read_timeout = 30
response = http.get(uri.request_uri)
raise ImportError, "Failed to fetch OpenAI IP ranges: #{response.code}" unless response.code == '200'
data = JSON.parse(response.body)
imported_count = 0
batch_size = options[:batch_size] || 1000
batch = []
# Determine crawler type from source name
crawler_type = source[:name].gsub('OpenAI ', '').downcase
data.each do |entry|
# OpenAI provides IP ranges as either CIDR notation or single IPs
ip_range = entry['cidr'] || entry['ip_prefix'] || entry['ip']
next unless ip_range
# Convert single IPs to /32
network = ip_range.include?('/') ? ip_range : "#{ip_range}/32"
network_range = {
network: network,
source: "bot_import_openai_#{crawler_type}",
asn: nil,
asn_org: 'OpenAI',
company: 'OpenAI',
country: nil,
is_datacenter: true,
is_proxy: false,
is_vpn: false,
additional_data: {
crawler_type: crawler_type,
crawler_purpose: crawler_purpose(crawler_type),
user_agent: openai_user_agent(crawler_type),
import_date: Time.current.iso8601,
source_url: source[:url]
}.to_json
}
batch << network_range
if batch.size >= batch_size
imported_count += import_batch(batch, "OpenAI #{crawler_type}")
batch = []
puts "Imported #{imported_count} OpenAI #{crawler_type} ranges..."
end
end
# Import remaining records
if batch.any?
imported_count += import_batch(batch, "OpenAI #{crawler_type}")
end
puts "OpenAI #{crawler_type} import completed: #{imported_count} ranges imported"
{ imported: imported_count, source: "OpenAI #{crawler_type}" }
rescue Net::TimeoutError, Net::OpenTimeout => e
raise ImportError, "Network timeout while fetching OpenAI #{crawler_type} ranges: #{e.message}"
rescue JSON::ParserError => e
raise ImportError, "Failed to parse OpenAI #{crawler_type} JSON response: #{e.message}"
end
def self.import_batch(batch_data, source_name)
# Check for existing ranges to avoid duplicates
existing_networks = NetworkRange.where(network: batch_data.map { |d| d[:network] }).pluck(:network)
new_ranges = batch_data.reject { |d| existing_networks.include?(d[:network]) }
if new_ranges.any?
NetworkRange.insert_all(new_ranges)
puts "Imported #{new_ranges.size} new #{source_name} ranges (#{batch_data.size - new_ranges.size} duplicates skipped)"
else
puts "No new #{source_name} ranges to import (all duplicates)"
end
new_ranges.size
rescue => e
Rails.logger.error "Failed to import #{source_name} batch: #{e.message}"
# Fallback to individual imports
imported = 0
new_ranges.each do |data|
begin
NetworkRange.create!(data)
imported += 1
rescue => individual_error
Rails.logger.error "Failed to import individual #{source_name} record: #{individual_error.message}"
end
end
imported
end
# Helper method to determine crawler purpose based on type
def self.crawler_purpose(crawler_type)
case crawler_type
when 'searchbot'
'Used to link to and surface websites in search results in ChatGPT\'s search features'
when 'chatgpt-user'
'User actions in ChatGPT and Custom GPTs, including GPT Actions'
when 'gptbot'
'Used to crawl content for training OpenAI\'s generative AI foundation models'
else
'Unknown purpose'
end
end
# Helper method to get OpenAI user agent strings
def self.openai_user_agent(crawler_type)
case crawler_type
when 'searchbot'
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot'
when 'chatgpt-user'
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot'
when 'gptbot'
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; GPTBot/1.1; +https://openai.com/gptbot'
else
'Unknown user agent'
end
end
# Cloudflare IP ranges parser
def self.parse_cloudflare_ranges(source, options = {})
require 'net/http'
require 'uri'
imported_count = 0
urls = Array(source[:urls])
batch_size = options[:batch_size] || 1000
batch = []
urls.each do |url|
begin
puts "Fetching Cloudflare ranges from: #{url}"
uri = URI.parse(url)
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.read_timeout = 30
response = http.get(uri.request_uri)
raise ImportError, "Failed to fetch Cloudflare ranges: #{response.code}" unless response.code == '200'
# Cloudflare provides plain text CIDR lists
lines = response.body.split("\n")
ip_version = url.include?('ips-v4') ? 4 : 6
lines.each do |line|
line = line.strip
next if line.empty? || line.start_with?('#')
# Validate CIDR format
next unless line.match?(/\A[0-9a-fA-F:.]+\/\d+\z/)
network_range = {
network: line,
source: 'bot_import_cloudflare',
asn: nil,
asn_org: 'Cloudflare',
company: 'Cloudflare',
country: nil,
is_datacenter: true,
is_proxy: false,
is_vpn: false,
additional_data: {
ip_version: ip_version,
import_date: Time.current.iso8601,
source_url: url,
service_type: 'cdn_and_security'
}.to_json
}
batch << network_range
if batch.size >= batch_size
imported_count += import_batch(batch, 'Cloudflare')
batch = []
puts "Imported #{imported_count} Cloudflare ranges..."
end
end
rescue => e
Rails.logger.warn "Failed to fetch Cloudflare ranges from #{url}: #{e.message}"
next
end
end
# Import remaining records
if batch.any?
imported_count += import_batch(batch, 'Cloudflare')
end
puts "Cloudflare import completed: #{imported_count} ranges imported"
{ imported: imported_count, source: 'Cloudflare' }
end
# Facebook/Meta crawler ranges parser (placeholder)
def self.parse_facebook_ranges(source, options = {})
puts "Facebook/Meta crawler ranges require web scraping or manual configuration"
puts "Refer to: https://developers.facebook.com/docs/sharing/webmasters/crawler/"
{
imported: 0,
source: 'Facebook/Meta',
note: 'Manual configuration required - Facebook does not provide automated IP range feeds'
}
end
# Applebot crawler ranges parser (placeholder)
def self.parse_applebot_ranges(source, options = {})
puts "Applebot ranges require web scraping or manual configuration"
puts "Refer to: https://support.apple.com/en-us/HT204683"
{
imported: 0,
source: 'Applebot',
note: 'Manual configuration required - Apple does not provide automated IP range feeds'
}
end
# DuckDuckBot crawler ranges parser (placeholder)
def self.parse_duckduckgo_ranges(source, options = {})
puts "DuckDuckBot ranges require web scraping or manual configuration"
puts "Refer to: https://help.duckduckgo.com/duckduckgo-help-pages/results/duckduckbot/"
{
imported: 0,
source: 'DuckDuckBot',
note: 'Manual configuration required - DuckDuckGo does not provide automated IP range feeds'
}
end
end

View File

@@ -53,4 +53,107 @@ class Ipapi
next
end
end
# Parse company/datacenter network range from IPAPI data
# Handles "X.X.X.X - Y.Y.Y.Y" format and converts to CIDR
def self.parse_company_network_range(ipapi_data)
# Try company.network first, then datacenter.network
network_range = ipapi_data.dig('company', 'network') || ipapi_data.dig('datacenter', 'network')
return nil if network_range.blank?
# Parse "X.X.X.X - Y.Y.Y.Y" format
if network_range.include?(' - ')
start_ip_str, end_ip_str = network_range.split(' - ').map(&:strip)
begin
start_ip = IPAddr.new(start_ip_str)
end_ip = IPAddr.new(end_ip_str)
# Calculate the number of IPs in the range
num_ips = end_ip.to_i - start_ip.to_i + 1
# Calculate prefix length from number of IPs
# num_ips = 2^(32 - prefix_length) for IPv4
prefix_length = 32 - Math.log2(num_ips).to_i
# Verify it's a valid CIDR block (power of 2)
if 2**(32 - prefix_length) == num_ips
cidr = "#{start_ip_str}/#{prefix_length}"
Rails.logger.debug "Parsed company network range: #{network_range} -> #{cidr}"
return cidr
else
Rails.logger.warn "Network range #{network_range} is not a valid CIDR block (#{num_ips} IPs)"
return nil
end
rescue IPAddr::InvalidAddressError => e
Rails.logger.error "Invalid IP in company network range: #{network_range} (#{e.message})"
return nil
end
elsif network_range.include?('/')
# Already in CIDR format
return network_range
else
Rails.logger.warn "Unknown network range format: #{network_range}"
return nil
end
end
# Populate NetworkRange attributes from IPAPI data
def self.populate_network_attributes(network_range, ipapi_data)
network_range.asn = ipapi_data.dig('asn', 'asn')
network_range.asn_org = ipapi_data.dig('asn', 'org') || ipapi_data.dig('company', 'name')
network_range.company = ipapi_data.dig('company', 'name')
network_range.country = ipapi_data.dig('location', 'country_code')
network_range.is_datacenter = ipapi_data['is_datacenter'] || false
network_range.is_vpn = ipapi_data['is_vpn'] || false
network_range.is_proxy = ipapi_data['is_proxy'] || false
end
# Process IPAPI data and create network ranges
# Returns array of created/updated NetworkRange objects
def self.process_ipapi_data(ipapi_data, tracking_network)
created_networks = []
# Extract and create company/datacenter network range if present
company_network_cidr = parse_company_network_range(ipapi_data)
if company_network_cidr.present?
company_range = NetworkRange.find_or_create_by(network: company_network_cidr) do |nr|
nr.source = 'api_imported'
nr.creation_reason = "Company allocation from IPAPI for #{tracking_network.cidr}"
end
# Always update attributes (whether new or existing)
populate_network_attributes(company_range, ipapi_data)
company_range.set_network_data(:ipapi, ipapi_data)
company_range.last_api_fetch = Time.current
company_range.save!
created_networks << company_range
Rails.logger.info "Created/updated company network: #{company_range.cidr}"
end
# Extract and create ASN route network if present
ipapi_route = ipapi_data.dig('asn', 'route')
if ipapi_route.present? && ipapi_route != tracking_network.cidr
route_network = NetworkRange.find_or_create_by(network: ipapi_route) do |nr|
nr.source = 'api_imported'
nr.creation_reason = "BGP route from IPAPI lookup for #{tracking_network.cidr}"
end
# Always update attributes (whether new or existing)
populate_network_attributes(route_network, ipapi_data)
route_network.set_network_data(:ipapi, ipapi_data)
route_network.last_api_fetch = Time.current
route_network.save!
created_networks << route_network
Rails.logger.info "Created/updated BGP route network: #{route_network.cidr}"
end
# Return both the created networks and the broadest CIDR for deduplication
{
networks: created_networks,
broadest_cidr: company_network_cidr.presence || ipapi_route || tracking_network.cidr
}
end
end

View File

@@ -141,8 +141,11 @@
class: "text-blue-600 hover:text-blue-800 hover:underline font-mono font-medium" %>
</div>
<div class="text-xs text-gray-500">
<% if network.country.present? %>
🏳️ <%= network.country %>
<% if network.display_country.present? %>
🏳️ <%= network.display_country %>
<% if network.has_inherited_data? && network.display_country != network.country %>
<span class="text-blue-600" title="Inherited from parent network">*</span>
<% end %>
<% end %>
<% if network.asn.present? %>
• ASN <%= network.asn %>
@@ -150,7 +153,15 @@
</div>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
<%= network.company || 'Unknown' %>
<div>
<%= network.display_company || 'Unknown' %>
<% if network.has_inherited_data? %>
<div class="text-xs text-blue-600">
from <%= link_to network.inherited_from, network_range_path(NetworkRange.find_by(network: network.inherited_from)),
class: "text-blue-600 hover:text-blue-800 hover:underline" %>
</div>
<% end %>
</div>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm">
<% if network.is_datacenter? %>

View File

@@ -0,0 +1,171 @@
<% content_for :title, "Bot Network Ranges" %>
<div class="max-w-7xl mx-auto px-4 py-8">
<!-- Header -->
<div class="mb-8">
<h1 class="text-3xl font-bold text-gray-900 mb-2">Bot Network Ranges</h1>
<p class="text-gray-600">Import and manage official network ranges for search crawlers and API bots</p>
</div>
<!-- Available Sources -->
<div class="bg-white shadow rounded-lg mb-8">
<div class="px-6 py-4 border-b border-gray-200">
<h2 class="text-lg font-semibold text-gray-900">Available Sources</h2>
</div>
<div class="p-6">
<div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
<% @bot_sources.each do |key, source| %>
<div class="border rounded-lg p-4 hover:bg-gray-50 transition-colors">
<div class="flex items-start justify-between mb-2">
<h3 class="font-medium text-gray-900"><%= source[:name] %></h3>
<span class="px-2 py-1 text-xs font-medium rounded-full <%= source[:url] ? 'bg-green-100 text-green-800' : 'bg-yellow-100 text-yellow-800' %>">
<%= source[:url] ? 'Available' : 'Manual' %>
</span>
</div>
<p class="text-sm text-gray-600 mb-4"><%= source[:description] %></p>
<div class="flex flex-wrap gap-2">
<%= form_with url: import_bot_network_ranges_path, method: :post, class: "inline" do |f| %>
<%= hidden_field_tag :source, key %>
<%= f.submit "Import Now",
class: "px-3 py-1 text-xs font-medium text-white bg-blue-600 rounded hover:bg-blue-700 transition-colors disabled:opacity-50",
disabled: !source[:url] %>
<% end %>
<%= form_with url: import_async_bot_network_ranges_path, method: :post, class: "inline" do |f| %>
<%= hidden_field_tag :source, key %>
<%= f.submit "Import Async",
class: "px-3 py-1 text-xs font-medium text-white bg-purple-600 rounded hover:bg-purple-700 transition-colors disabled:opacity-50",
disabled: !source[:url] %>
<% end %>
<%= link_to "View", bot_network_range_path(key),
class: "px-3 py-1 text-xs font-medium text-gray-700 bg-gray-200 rounded hover:bg-gray-300 transition-colors" %>
</div>
</div>
<% end %>
</div>
</div>
</div>
<!-- Batch Import -->
<div class="bg-white shadow rounded-lg mb-8">
<div class="px-6 py-4 border-b border-gray-200">
<h2 class="text-lg font-semibold text-gray-900">Batch Import</h2>
</div>
<div class="p-6">
<p class="text-gray-600 mb-4">Import from all available sources (this may take several minutes).</p>
<%= form_with url: import_all_bot_network_ranges_path, method: :post do |f| %>
<div class="flex items-center gap-4">
<%= f.submit "Import All Sources",
class: "px-6 py-2 font-medium text-white bg-green-600 rounded hover:bg-green-700 transition-colors",
confirm: "This will import from all available sources and may take several minutes. Continue?" %>
</div>
<% end %>
</div>
</div>
<!-- Recent Imports -->
<% if @recent_imports.any? %>
<div class="bg-white shadow rounded-lg mb-8">
<div class="px-6 py-4 border-b border-gray-200">
<h2 class="text-lg font-semibold text-gray-900">Recent Imports</h2>
</div>
<div class="overflow-x-auto">
<table class="min-w-full divide-y divide-gray-200">
<thead class="bg-gray-50">
<tr>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Source</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Status</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Records</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Date</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Notes</th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200">
<% @recent_imports.each do |import| %>
<tr>
<td class="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
<%= import.source.titleize %>
</td>
<td class="px-6 py-4 whitespace-nowrap">
<span class="px-2 inline-flex text-xs leading-5 font-semibold rounded-full <%= import.status == 'completed' ? 'bg-green-100 text-green-800' : 'bg-yellow-100 text-yellow-800' %>">
<%= import.status.titleize %>
</span>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= import.records_processed&.to_s || '0' %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= import.created_at.strftime('%Y-%m-%d %H:%M') %>
</td>
<td class="px-6 py-4 text-sm text-gray-500">
<%= import.notes %>
</td>
</tr>
<% end %>
</tbody>
</table>
</div>
</div>
<% end %>
<!-- Recent Bot Network Ranges -->
<% if @bot_network_ranges.any? %>
<div class="bg-white shadow rounded-lg">
<div class="px-6 py-4 border-b border-gray-200">
<h2 class="text-lg font-semibold text-gray-900">Recently Imported Bot Ranges</h2>
</div>
<div class="overflow-x-auto">
<table class="min-w-full divide-y divide-gray-200">
<thead class="bg-gray-50">
<tr>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Network</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Source</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Company</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Created</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Details</th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200">
<% @bot_network_ranges.each do |range| %>
<tr>
<td class="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
<%= range.network %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= range.source.gsub('bot_import_', '').titleize %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= range.company || 'Unknown' %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= range.created_at.strftime('%Y-%m-%d %H:%M') %>
</td>
<td class="px-6 py-4 text-sm text-gray-500">
<% if range.additional_data.present? %>
<% data = JSON.parse(range.additional_data) rescue {} %>
<% if data['crawler_type'] %>
<span class="px-2 py-1 text-xs font-medium rounded bg-blue-100 text-blue-800">
<%= data['crawler_type'].titleize %>
</span>
<% end %>
<% if data['aws_service'] %>
<span class="px-2 py-1 text-xs font-medium rounded bg-orange-100 text-orange-800">
<%= data['aws_service'] %>
</span>
<% end %>
<% end %>
</td>
</tr>
<% end %>
</tbody>
</table>
</div>
</div>
<% end %>
</div>
<!-- Real-time updates via Turbo Streams -->
<turbo-stream-source src="/cable" channel="BotImportsChannel"></turbo-stream-source>

View File

@@ -0,0 +1,175 @@
<% content_for :title, "#{@source_name} Network Ranges" %>
<div class="max-w-7xl mx-auto px-4 py-8">
<!-- Header -->
<div class="mb-8">
<div class="flex items-center justify-between">
<div>
<h1 class="text-3xl font-bold text-gray-900 mb-2"><%= @source_name %> Network Ranges</h1>
<p class="text-gray-600">Network ranges imported from <%= @source_name %> official sources</p>
</div>
<div class="flex space-x-3">
<%= link_to "Back to Sources", bot_network_ranges_path,
class: "px-4 py-2 text-sm font-medium text-gray-700 bg-gray-200 rounded hover:bg-gray-300 transition-colors" %>
<%= form_with url: bot_network_range_path(params[:source]), method: :delete, class: "inline" do |f| %>
<%= f.submit "Delete All Ranges",
class: "px-4 py-2 text-sm font-medium text-white bg-red-600 rounded hover:bg-red-700 transition-colors",
confirm: "Are you sure you want to delete all #{@source_name} network ranges? This action cannot be undone." %>
<% end %>
</div>
</div>
</div>
<!-- Statistics -->
<% if @import_stats.any? %>
<div class="bg-white shadow rounded-lg mb-8">
<div class="px-6 py-4 border-b border-gray-200">
<h2 class="text-lg font-semibold text-gray-900">Import Statistics</h2>
</div>
<div class="p-6">
<div class="grid grid-cols-1 md:grid-cols-3 gap-6">
<% @import_stats.each do |source, count| %>
<div class="text-center">
<div class="text-3xl font-bold text-blue-600"><%= count %></div>
<div class="text-sm text-gray-600 mt-1"><%= source.gsub('bot_import_', '').titleize %></div>
</div>
<% end %>
</div>
</div>
</div>
<% end %>
<!-- Network Ranges Table -->
<div class="bg-white shadow rounded-lg">
<div class="px-6 py-4 border-b border-gray-200">
<div class="flex items-center justify-between">
<h2 class="text-lg font-semibold text-gray-900">Network Ranges</h2>
<div class="text-sm text-gray-500">
Showing <%= @network_ranges.offset_value + 1 %> to <%= [@network_ranges.offset_value + @network_ranges.current_page_count, @network_ranges.total_count].min %>
of <%= @network_ranges.total_count %> ranges
</div>
</div>
</div>
<div class="overflow-x-auto">
<table class="min-w-full divide-y divide-gray-200">
<thead class="bg-gray-50">
<tr>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Network</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Source</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Company</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Country</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Created</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Details</th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200">
<% @network_ranges.each do |range| %>
<tr class="hover:bg-gray-50">
<td class="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
<%= link_to range.network, network_range_path(range), class: "text-blue-600 hover:text-blue-800" %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= range.source.gsub('bot_import_', '').titleize %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= range.company || 'Unknown' %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= range.country || 'Unknown' %>
</td>
<td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
<%= range.created_at.strftime('%Y-%m-%d %H:%M') %>
</td>
<td class="px-6 py-4 text-sm text-gray-500">
<% if range.additional_data.present? %>
<% data = JSON.parse(range.additional_data) rescue {} %>
<div class="flex flex-wrap gap-1">
<% if data['crawler_type'] %>
<span class="px-2 py-1 text-xs font-medium rounded bg-blue-100 text-blue-800">
<%= data['crawler_type'].titleize %>
</span>
<% end %>
<% if data['crawler_purpose'] %>
<span class="px-2 py-1 text-xs font-medium rounded bg-purple-100 text-purple-800" title="<%= data['crawler_purpose'] %>">
Purpose
</span>
<% end %>
<% if data['aws_service'] %>
<span class="px-2 py-1 text-xs font-medium rounded bg-orange-100 text-orange-800">
<%= data['aws_service'] %>
</span>
<% end %>
<% if data['aws_region'] %>
<span class="px-2 py-1 text-xs font-medium rounded bg-green-100 text-green-800">
<%= data['aws_region'] %>
</span>
<% end %>
<% if data['ip_version'] %>
<span class="px-2 py-1 text-xs font-medium rounded bg-gray-100 text-gray-800">
IPv<%= data['ip_version'] %>
</span>
<% end %>
</div>
<% end %>
</td>
</tr>
<% end %>
</tbody>
</table>
</div>
<!-- Pagination -->
<% if @network_ranges.total_pages > 1 %>
<div class="px-6 py-4 border-t border-gray-200">
<div class="flex items-center justify-between">
<div class="text-sm text-gray-700">
Page <%= @network_ranges.current_page %> of <%= @network_ranges.total_pages %>
</div>
<div class="flex space-x-2">
<% if @network_ranges.prev_page %>
<%= link_to "Previous", bot_network_range_path(params[:source], page: @network_ranges.prev_page),
class: "px-3 py-1 text-sm font-medium text-gray-700 bg-white border border-gray-300 rounded hover:bg-gray-50" %>
<% end %>
<%# Show page numbers %>
<% (1..@network_ranges.total_pages).select { |p| p == 1 || p == @network_ranges.total_pages || (p - @network_ranges.current_page).abs <= 2 }.each do |page| %>
<% if page == @network_ranges.current_page %>
<span class="px-3 py-1 text-sm font-medium text-white bg-blue-600 rounded">
<%= page %>
</span>
<% else %>
<%= link_to page, bot_network_range_path(params[:source], page: page),
class: "px-3 py-1 text-sm font-medium text-gray-700 bg-white border border-gray-300 rounded hover:bg-gray-50" %>
<% end %>
<% end %>
<% if @network_ranges.next_page %>
<%= link_to "Next", bot_network_range_path(params[:source], page: @network_ranges.next_page),
class: "px-3 py-1 text-sm font-medium text-gray-700 bg-white border border-gray-300 rounded hover:bg-gray-50" %>
<% end %>
</div>
</div>
</div>
<% end %>
</div>
<% if @network_ranges.empty? %>
<div class="bg-white shadow rounded-lg">
<div class="px-6 py-12 text-center">
<div class="text-gray-400 mb-4">
<svg class="mx-auto h-12 w-12 text-gray-400" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z" />
</svg>
</div>
<h3 class="text-lg font-medium text-gray-900 mb-2">No network ranges found</h3>
<p class="text-gray-600 mb-6">
No <%= @source_name %> network ranges have been imported yet.
</p>
<%= link_to "Import #{@source_name} Ranges", bot_network_ranges_path,
class: "inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md shadow-sm text-white bg-blue-600 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500" %>
</div>
</div>
<% end %>
</div>

View File

@@ -25,7 +25,7 @@
<div>
<%= form.label :waf_action, "Action", class: "block text-sm font-medium text-gray-700" %>
<%= form.select :waf_action,
options_for_select([['All', ''], ['Allow', 'allow'], ['Deny', 'deny'], ['Redirect', 'redirect'], ['Challenge', 'challenge']], params[:waf_action]),
options_for_select([['All', ''], ['Allow', 'allow'], ['Deny', 'deny'], ['Redirect', 'redirect'], ['Challenge', 'challenge'], ['Add Header', 'add_header']], params[:waf_action]),
{ }, { class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm" } %>
</div>
<div>
@@ -178,9 +178,10 @@
when 'deny' then 'bg-red-100 text-red-800'
when 'redirect' then 'bg-blue-100 text-blue-800'
when 'challenge' then 'bg-yellow-100 text-yellow-800'
when 'add_header' then 'bg-purple-100 text-purple-800'
else 'bg-gray-100 text-gray-800'
end %>">
<%= event.waf_action %>
<%= event.waf_action.humanize %>
</span>
</td>
<td class="px-6 py-4 text-sm font-mono text-gray-900">

View File

@@ -75,6 +75,8 @@
class: nav_link_class(network_ranges_path) %>
<% if user_signed_in? && current_user_admin? %>
<%= link_to "🤖 Bot Ranges", bot_network_ranges_path,
class: nav_link_class(bot_network_ranges_path) %>
<%= link_to "📊 Data Imports", data_imports_path,
class: nav_link_class(data_imports_path) %>
<%= link_to "🔗 DSNs", dsns_path,
@@ -172,6 +174,8 @@
class: mobile_nav_link_class(network_ranges_path) %>
<% if user_signed_in? && current_user_admin? %>
<%= link_to "🤖 Bot Ranges", bot_network_ranges_path,
class: mobile_nav_link_class(bot_network_ranges_path) %>
<%= link_to "📊 Data Imports", data_imports_path,
class: mobile_nav_link_class(data_imports_path) %>
<%= link_to "🔗 DSNs", dsns_path,

View File

@@ -225,14 +225,16 @@
<td class="px-6 py-4 whitespace-nowrap text-right text-sm font-medium">
<%= link_to "View", rule_path(rule), class: "text-blue-600 hover:text-blue-900 mr-3" %>
<% if rule.enabled? %>
<%= link_to "Disable", disable_rule_path(rule),
<%= button_to "Disable", disable_rule_path(rule),
method: :post,
data: { confirm: "Are you sure you want to disable this rule?" },
class: "text-yellow-600 hover:text-yellow-900 mr-3" %>
form: { style: "display: inline;" },
data: { turbo_confirm: "Are you sure you want to disable this rule?" },
class: "text-yellow-600 hover:text-yellow-900 mr-3 bg-transparent border-0 p-0 cursor-pointer" %>
<% else %>
<%= link_to "Enable", enable_rule_path(rule),
<%= button_to "Enable", enable_rule_path(rule),
method: :post,
class: "text-green-600 hover:text-green-900 mr-3" %>
form: { style: "display: inline;" },
class: "text-green-600 hover:text-green-900 mr-3 bg-transparent border-0 p-0 cursor-pointer" %>
<% end %>
<%= link_to "Edit", edit_rule_path(rule), class: "text-indigo-600 hover:text-indigo-900" %>
</td>

View File

@@ -6,7 +6,7 @@
<p class="mt-2 text-gray-600">Create a WAF rule to allow, block, or rate limit traffic</p>
</div>
<div class="bg-white shadow rounded-lg">
<div class="bg-white shadow rounded-lg" data-controller="rule-form">
<%= form_with(model: @rule, local: true, class: "space-y-6") do |form| %>
<% if @rule.errors.any? %>
<div class="rounded-md bg-red-50 p-4">
@@ -54,7 +54,8 @@
<%= form.select :waf_action,
options_for_select(@waf_actions.map { |action, _| [action.humanize, action] }, @rule.waf_action),
{ prompt: "Select action" },
{ class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm" } %>
{ class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm",
data: { rule_form_target: "actionSelect", action: "change->rule-form#updateActionSections" } } %>
<p class="mt-2 text-sm text-gray-500">What action to take when this rule matches</p>
</div>
</div>
@@ -158,6 +159,27 @@
</div>
</div>
<!-- Add Header Fields (shown for add_header action) -->
<div id="add_header_section" class="hidden space-y-4" data-rule-form-target="addHeaderSection">
<div>
<%= label_tag :header_name, "Header Name", class: "block text-sm font-medium text-gray-700" %>
<%= text_field_tag :header_name, "",
class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm",
placeholder: "X-Bot-Agent",
id: "header_name_input" %>
<p class="mt-2 text-sm text-gray-500">The HTTP header name to add (e.g., X-Bot-Agent, X-Network-Type)</p>
</div>
<div>
<%= label_tag :header_value, "Header Value", class: "block text-sm font-medium text-gray-700" %>
<%= text_field_tag :header_value, "",
class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm",
placeholder: "BingBot",
id: "header_value_input" %>
<p class="mt-2 text-sm text-gray-500">The value for the header (e.g., BingBot, GoogleBot, Unknown)</p>
</div>
</div>
<!-- Metadata -->
<div data-controller="json-validator" data-json-validator-valid-class="json-valid" data-json-validator-invalid-class="json-invalid" data-json-validator-valid-status-class="json-valid-status" data-json-validator-invalid-status-class="json-invalid-status">
<%= form.label :metadata, "Metadata", class: "block text-sm font-medium text-gray-700" %>
@@ -197,10 +219,18 @@
</div>
<div>
<%= form.label :expires_at, "Expires At", class: "block text-sm font-medium text-gray-700" %>
<%= form.datetime_local_field :expires_at,
class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm" %>
<p class="mt-2 text-sm text-gray-500">Leave blank for permanent rule</p>
<div class="flex items-center mb-2">
<%= check_box_tag :set_expiration, "1", false,
class: "h-4 w-4 rounded border-gray-300 text-blue-600 focus:ring-blue-500",
data: { rule_form_target: "expirationCheckbox", action: "change->rule-form#toggleExpiration" } %>
<%= label_tag :set_expiration, "Set expiration", class: "ml-2 block text-sm font-medium text-gray-700" %>
</div>
<div class="hidden" data-rule-form-target="expirationField">
<%= form.label :expires_at, "Expires At", class: "block text-sm font-medium text-gray-700" %>
<%= form.datetime_local_field :expires_at,
class: "mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm" %>
<p class="mt-2 text-sm text-gray-500">When this rule should automatically expire</p>
</div>
</div>
<div class="flex items-center pt-6">

View File

@@ -39,12 +39,12 @@
<div class="flex space-x-3">
<%= link_to "Edit", edit_rule_path(@rule), class: "inline-flex items-center px-4 py-2 border border-gray-300 rounded-md shadow-sm text-sm font-medium text-gray-700 bg-white hover:bg-gray-50" %>
<% if @rule.enabled? %>
<%= link_to "Disable", disable_rule_path(@rule),
<%= button_to "Disable", disable_rule_path(@rule),
method: :post,
data: { confirm: "Are you sure you want to disable this rule?" },
data: { turbo_confirm: "Are you sure you want to disable this rule?" },
class: "inline-flex items-center px-4 py-2 border border-yellow-300 rounded-md shadow-sm text-sm font-medium text-yellow-700 bg-yellow-50 hover:bg-yellow-100" %>
<% else %>
<%= link_to "Enable", enable_rule_path(@rule),
<%= button_to "Enable", enable_rule_path(@rule),
method: :post,
class: "inline-flex items-center px-4 py-2 border border-green-300 rounded-md shadow-sm text-sm font-medium text-green-700 bg-green-50 hover:bg-green-100" %>
<% end %>