Fix some blocked/allow laggards after migrating. Add DuckDB for outstanding analyitcs performance. Start adding an import for all bot networks

This commit is contained in:
Dan Milne
2025-11-18 16:40:05 +11:00
parent ef56779584
commit 3f274c842c
37 changed files with 3522 additions and 151 deletions

View File

@@ -46,8 +46,10 @@ class NetworkRangesController < ApplicationController
authorize @network_range
if @network_range.persisted?
# Real network - use direct IP containment for consistency with stats
events_scope = Event.where("ip_address <<= ?", @network_range.cidr).recent
# Real network - use indexed network_range_id for much better performance
# Include child network ranges to capture all traffic within this network block
network_ids = [@network_range.id] + @network_range.child_ranges.pluck(:id)
events_scope = Event.where(network_range_id: network_ids).recent
else
# Virtual network - find events by IP range containment
events_scope = Event.where("ip_address <<= ?::inet", @network_range.to_s).recent
@@ -58,22 +60,24 @@ class NetworkRangesController < ApplicationController
@child_ranges = @network_range.child_ranges.limit(20)
@parent_ranges = @network_range.parent_ranges.limit(10)
@associated_rules = @network_range.persisted? ? @network_range.rules.includes(:user).order(created_at: :desc) : []
@associated_rules = @network_range.persisted? ? @network_range.rules.includes(:user, :network_range, :waf_policy).order(created_at: :desc) : []
# Load rules from supernets and subnets
@supernet_rules = @network_range.persisted? ? @network_range.supernet_rules.includes(:network_range, :user).limit(10) : []
@subnet_rules = @network_range.persisted? ? @network_range.child_rules.includes(:network_range, :user).limit(20) : []
@supernet_rules = @network_range.persisted? ? @network_range.supernet_rules.includes(:network_range, :user, :waf_policy).limit(10) : []
@subnet_rules = @network_range.persisted? ? @network_range.child_rules.includes(:network_range, :user, :waf_policy).limit(20) : []
# Traffic analytics (if we have events)
@traffic_stats = calculate_traffic_stats(@network_range)
# Check if we have IPAPI data (or if parent has it)
# Check if we have IPAPI data (or if parent has it) - cache expensive parent lookup
@has_ipapi_data = @network_range.has_network_data_from?(:ipapi)
@parent_with_ipapi = nil
unless @has_ipapi_data
# Check if parent has IPAPI data
parent = @network_range.parent_with_intelligence
# Cache expensive parent intelligence lookup
parent = Rails.cache.fetch("network_parent_intel:#{@network_range.cache_key}", expires_in: 1.hour) do
@network_range.parent_with_intelligence
end
if parent&.has_network_data_from?(:ipapi)
@parent_with_ipapi = parent
@has_ipapi_data = true
@@ -194,6 +198,15 @@ class NetworkRangesController < ApplicationController
private
# Helper method to try DuckDB first, fall back to PostgreSQL
def with_duckdb_fallback(&block)
result = yield
result.nil? ? nil : result # Return result or nil to trigger fallback
rescue StandardError => e
Rails.logger.warn "[NetworkRanges] DuckDB query failed, falling back to PostgreSQL: #{e.message}"
nil # Return nil to trigger fallback
end
def set_network_range
# Handle CIDR slugs (e.g., "40.77.167.100_32" -> "40.77.167.100/32")
cidr = params[:id].gsub('_', '/')
@@ -248,27 +261,37 @@ class NetworkRangesController < ApplicationController
# Use indexed network_range_id for much better performance instead of expensive CIDR operator
# Include child network ranges to capture all traffic within this network block
network_ids = [network_range.id] + network_range.child_ranges.pluck(:id)
base_query = Event.where(network_range_id: network_ids)
# Use separate queries: one for grouping (without ordering), one for recent activity (with ordering)
events_for_grouping = base_query.limit(1000)
events_for_activity = base_query.recent.limit(20)
# Try DuckDB first for stats (much faster)
duckdb_stats = with_duckdb_fallback { EventDdb.network_traffic_stats(network_ids) }
duckdb_top_paths = with_duckdb_fallback { EventDdb.network_top_paths(network_ids, 10) }
duckdb_top_agents = with_duckdb_fallback { EventDdb.network_top_user_agents(network_ids, 5) }
# Calculate counts properly - use consistent base_query for all counts
total_requests = base_query.count
unique_ips = base_query.except(:order).distinct.count(:ip_address)
blocked_requests = base_query.blocked.count
allowed_requests = base_query.allowed.count
if duckdb_stats
# DuckDB success - use fast aggregated stats
stats = duckdb_stats.merge(
top_paths: duckdb_top_paths&.to_h || {},
top_user_agents: duckdb_top_agents&.to_h || {},
recent_activity: Event.where(network_range_id: network_ids).recent.limit(20)
)
else
# PostgreSQL fallback
base_query = Event.where(network_range_id: network_ids)
events_for_grouping = base_query.limit(1000)
events_for_activity = base_query.recent.limit(20)
{
total_requests: total_requests,
unique_ips: unique_ips,
blocked_requests: blocked_requests,
allowed_requests: allowed_requests,
top_paths: events_for_grouping.group(:request_path).count.sort_by { |_, count| -count }.first(10),
top_user_agents: events_for_grouping.group(:user_agent).count.sort_by { |_, count| -count }.first(5),
recent_activity: events_for_activity
}
stats = {
total_requests: base_query.count,
unique_ips: base_query.except(:order).distinct.count(:ip_address),
blocked_requests: base_query.blocked.count,
allowed_requests: base_query.allowed.count,
top_paths: events_for_grouping.group(:request_path).count.sort_by { |_, count| -count }.first(10).to_h,
top_user_agents: events_for_grouping.group(:user_agent).count.sort_by { |_, count| -count }.first(5).to_h,
recent_activity: events_for_activity
}
end
stats
else
# No events - return empty stats
{
@@ -296,8 +319,8 @@ class NetworkRangesController < ApplicationController
unique_ips: base_query.except(:order).distinct.count(:ip_address),
blocked_requests: base_query.blocked.count,
allowed_requests: base_query.allowed.count,
top_paths: events_for_grouping.group(:request_path).count.sort_by { |_, count| -count }.first(10),
top_user_agents: events_for_grouping.group(:user_agent).count.sort_by { |_, count| -count }.first(5),
top_paths: events_for_grouping.group(:request_path).count.sort_by { |_, count| -count }.first(10).to_h,
top_user_agents: events_for_grouping.group(:user_agent).count.sort_by { |_, count| -count }.first(5).to_h,
recent_activity: events_for_activity
}
else