Add 'tags' to event model. Add a dataimport system - currently for MaxMind zip files

This commit is contained in:
Dan Milne
2025-11-11 10:31:36 +11:00
parent 772fae7e8b
commit 26216da9ca
34 changed files with 3580 additions and 14 deletions

View File

@@ -0,0 +1,151 @@
# frozen_string_literal: true
# CountryHelper - Service for country display utilities
#
# Provides methods to convert ISO country codes to display names,
# generate country flags, and format country data for UI components.
class CountryHelper
# Convert ISO code to display name
def self.display_name(iso_code)
return iso_code if iso_code.blank?
country = ISO3166::Country[iso_code]
country.local_name
rescue
iso_code
end
# Convert ISO code to flag emoji
def self.flag_emoji(iso_code)
return "" if iso_code.blank? || iso_code.length != 2
# Convert each letter to regional indicator symbol (A + 0x1F1E5)
iso_code.upcase.codepoints.map { |code| (code + 0x1F1E5).chr }.join
rescue
""
end
# Display name with flag
def self.display_with_flag(iso_code)
return iso_code if iso_code.blank?
"#{display_name(iso_code)} (#{iso_code})"
end
# Check if ISO code is valid
def self.valid_iso_code?(iso_code)
return false if iso_code.blank?
ISO3166::Country[iso_code].present?
rescue
false
end
# Get all countries for select dropdowns
# Returns array of [display_name, iso_code] pairs
def self.all_for_select
ISO3166::Country.all.map do |country|
# Try different name sources in order of preference
# Use the proper countries gem methods
name = country.local_name.presence ||
country.iso_short_name.presence ||
country.common_name.presence ||
country.alpha2
display_name = "#{name} (#{country.alpha2})"
[display_name, country.alpha2]
end.sort_by { |name, _| name }
rescue => e
puts "Error in CountryHelper.all_for_select: #{e.message}"
puts e.backtrace
[]
end
# Get countries by common regions for quick selection
def self.by_region
{
'Americas' => [
'US', 'CA', 'MX', 'BR', 'AR', 'CL', 'CO', 'PE', 'VE'
],
'Europe' => [
'GB', 'DE', 'FR', 'IT', 'ES', 'NL', 'BE', 'CH', 'AT', 'SE',
'NO', 'DK', 'FI', 'PL', 'CZ', 'HU', 'RO', 'GR', 'PT'
],
'Asia Pacific' => [
'CN', 'JP', 'KR', 'IN', 'SG', 'AU', 'NZ', 'TH', 'MY', 'ID',
'PH', 'VN', 'HK', 'TW'
],
'Middle East & Africa' => [
'ZA', 'EG', 'NG', 'KE', 'SA', 'AE', 'IL', 'TR', 'IR'
]
}
rescue
{}
end
# Get countries for specific region with display names
def self.countries_for_region(region_name)
country_codes = by_region[region_name] || []
country_codes.map do |code|
{
code: code,
name: display_name(code),
display: display_with_flag(code)
}
end
end
# Format multiple country targets for display
def self.format_targets(targets)
return [] if targets.blank?
targets.map do |target|
{
code: target,
name: display_name(target),
display: display_with_flag(target)
}
end
end
# Get popular countries for quick blocking (common threat sources)
def self.popular_for_blocking
[
{ code: 'CN', name: 'China', display: '🇨🇳 China', reason: 'High bot/scanner activity' },
{ code: 'RU', name: 'Russia', display: '🇷🇺 Russia', reason: 'State-sponsored attacks' },
{ code: 'IN', name: 'India', display: '🇮🇳 India', reason: 'High spam volume' },
{ code: 'BR', name: 'Brazil', display: '🇧🇷 Brazil', reason: 'Scanner activity' },
{ code: 'IR', name: 'Iran', display: '🇮🇷 Iran', reason: 'Attacks on critical infrastructure' },
{ code: 'KP', name: 'North Korea', display: '🇰🇵 North Korea', reason: 'State-sponsored hacking' }
]
end
# Search countries by name or code
def self.search(query)
return [] if query.blank?
query = query.downcase
ISO3166::Country.all.select do |country|
country.alpha2.downcase.include?(query) ||
country.local_name.downcase.include?(query)
end.first(20).map { |c| [display_with_flag(c.alpha2), c.alpha2] }
rescue
[]
end
# Country statistics for analytics
def self.usage_statistics(country_codes)
return {} if country_codes.blank?
stats = {}
country_codes.each do |code|
stats[code] = {
name: display_name(code),
flag: flag_emoji(code),
display: display_with_flag(code)
}
end
stats
end
end

View File

@@ -0,0 +1,182 @@
require 'csv'
class GeoliteAsnImporter
BATCH_SIZE = 1000
def initialize(file_path, data_import:)
@file_path = file_path
@data_import = data_import
@total_records = 0
@processed_records = 0
@failed_records = 0
@errors = []
end
def import
Rails.logger.info "Starting import for file: #{@file_path}"
Rails.logger.info "File exists: #{File.exist?(@file_path)}"
Rails.logger.info "File size: #{File.size(@file_path)} bytes" if File.exist?(@file_path)
# Check if file is actually a zip by reading the magic bytes
is_zip_file = check_if_zip_file
Rails.logger.info "File is zip: #{is_zip_file}"
if is_zip_file
import_from_zip
else
import_csv_file(@file_path)
end
{
total_records: @total_records,
processed_records: @processed_records,
failed_records: @failed_records,
errors: @errors
}
end
private
def check_if_zip_file
# Check if the file starts with ZIP magic bytes (PK\x03\x04)
File.open(@file_path, 'rb') do |file|
header = file.read(4)
return header == "PK\x03\x04"
end
rescue => e
Rails.logger.error "Error checking if file is zip: #{e.message}"
false
end
def import_from_zip
require 'zip'
require 'stringio'
Rails.logger.info "Processing zip file directly: #{@file_path}"
# Read the entire ZIP file content into memory first
zip_content = File.binread(@file_path)
Zip::File.open_buffer(StringIO.new(zip_content)) do |zip_file|
zip_file.each do |entry|
if entry.name.include?('Blocks') && entry.name.end_with?('.csv')
Rails.logger.info "Processing ASN block file from zip: #{entry.name}"
process_csv_from_zip(zip_file, entry)
end
end
end
rescue => e
Rails.logger.error "Error processing ZIP file: #{e.message}"
Rails.logger.error e.backtrace.join("\n")
raise
end
def process_csv_from_zip(zip_file, entry)
zip_file.get_input_stream(entry) do |io|
# Read the entire content from the stream
content = io.read
CSV.parse(content, headers: true, header_converters: :symbol, encoding: 'UTF-8') do |row|
@total_records += 1
begin
import_record(row)
@processed_records += 1
rescue => e
@failed_records += 1
@errors << "Row #{@total_records}: #{e.message} - Data: #{row.to_h}"
end
update_progress_if_needed
end
end
end
def csv_files
if @file_path.end_with?('.zip')
# Look for extracted CSV files in the same directory
base_dir = File.dirname(@file_path)
base_name = File.basename(@file_path, '.zip')
[
File.join(base_dir, "#{base_name}-Blocks-IPv4.csv"),
File.join(base_dir, "#{base_name}-Blocks-IPv6.csv")
].select { |file| File.exist?(file) }
else
[@file_path]
end
end
def import_csv_file(csv_file)
CSV.foreach(csv_file, headers: true, header_converters: :symbol, encoding: 'UTF-8') do |row|
@total_records += 1
begin
import_record(row)
@processed_records += 1
rescue => e
@failed_records += 1
@errors << "Row #{@total_records}: #{e.message} - Data: #{row.to_h}"
# Update progress every 100 records or on error
update_progress_if_needed
end
update_progress_if_needed
end
end
def import_record(row)
network = row[:network]
asn = row[:autonomous_system_number]&.to_i
asn_org = row[:autonomous_system_organization]&.strip
unless network && asn && asn_org
raise "Missing required fields: network=#{network}, asn=#{asn}, asn_org=#{asn_org}"
end
# Validate network format
IPAddr.new(network) # This will raise if invalid
NetworkRange.upsert(
{
network: network,
asn: asn,
asn_org: asn_org,
source: 'geolite_asn',
updated_at: Time.current
},
unique_by: :index_network_ranges_on_network_unique
)
end
def update_progress_if_needed
if (@processed_records + @failed_records) % 100 == 0
@data_import.update_progress(
processed: @processed_records,
failed: @failed_records,
total_records: @total_records,
stats: {
total_records: @total_records,
current_file: File.basename(@file_path),
recent_errors: @errors.last(5)
}
)
end
end
def extract_if_zipfile
return unless @file_path.end_with?('.zip')
require 'zip'
Zip::File.open(@file_path) do |zip_file|
zip_file.each do |entry|
if entry.name.end_with?('.csv')
extract_path = File.join(File.dirname(@file_path), entry.name)
entry.extract(extract_path)
end
end
end
end
end

View File

@@ -0,0 +1,288 @@
require 'csv'
class GeoliteCountryImporter
BATCH_SIZE = 1000
def initialize(file_path, data_import:)
@file_path = file_path
@data_import = data_import
@total_records = 0
@processed_records = 0
@failed_records = 0
@errors = []
@locations_cache = {}
end
def import
Rails.logger.info "Starting import for file: #{@file_path}"
Rails.logger.info "File exists: #{File.exist?(@file_path)}"
Rails.logger.info "File size: #{File.size(@file_path)} bytes" if File.exist?(@file_path)
# Check if file is actually a zip by reading the magic bytes
is_zip_file = check_if_zip_file
Rails.logger.info "File is zip: #{is_zip_file}"
if is_zip_file
Rails.logger.info "Calling import_from_zip"
import_from_zip
else
Rails.logger.info "Calling regular import (not zip)"
load_locations_data
import_csv_file(@file_path)
end
{
total_records: @total_records,
processed_records: @processed_records,
failed_records: @failed_records,
errors: @errors
}
end
private
def check_if_zip_file
# Check if the file starts with ZIP magic bytes (PK\x03\x04)
File.open(@file_path, 'rb') do |file|
header = file.read(4)
return header == "PK\x03\x04"
end
rescue => e
Rails.logger.error "Error checking if file is zip: #{e.message}"
false
end
def import_from_zip
require 'zip'
require 'stringio'
Rails.logger.info "Processing zip file directly: #{@file_path}"
# Read the entire ZIP file content into memory first
zip_content = File.binread(@file_path)
Zip::File.open_buffer(StringIO.new(zip_content)) do |zip_file|
# First, see what's in the zip
Rails.logger.info "Files in zip:"
zip_file.each do |entry|
Rails.logger.info " - #{entry.name} (#{entry.size} bytes)"
end
# First, load location data from zip
load_locations_data_from_zip(zip_file)
# Then process block files from zip
zip_file.each do |entry|
if entry.name.include?('Blocks') && entry.name.end_with?('.csv')
Rails.logger.info "Processing block file from zip: #{entry.name}"
process_csv_from_zip(zip_file, entry)
end
end
end
rescue => e
Rails.logger.error "Error processing ZIP file: #{e.message}"
Rails.logger.error e.backtrace.join("\n")
raise
end
def process_csv_from_zip(zip_file, entry)
zip_file.get_input_stream(entry) do |io|
# Read the entire content from the stream
content = io.read
# Try different encodings if UTF-8 fails
encodings = ['UTF-8', 'ISO-8859-1', 'Windows-1252']
encoding_used = nil
encodings.each do |encoding|
begin
# Parse the CSV content from the string
CSV.parse(content, headers: true, header_converters: :symbol, encoding: encoding) do |row|
@total_records += 1
begin
import_record(row)
@processed_records += 1
rescue => e
@failed_records += 1
@errors << "Row #{@total_records}: #{e.message} - Data: #{row.to_h}"
end
update_progress_if_needed
end
encoding_used = encoding
Rails.logger.info "Successfully processed #{entry.name} with #{encoding} encoding"
break
rescue CSV::InvalidEncodingError => e
Rails.logger.warn "Failed to process #{entry.name} with #{encoding} encoding: #{e.message}"
next if encoding != encodings.last
raise e if encoding == encodings.last
end
end
unless encoding_used
@errors << "Failed to process #{entry.name} with any supported encoding"
end
end
end
def load_locations_data_from_zip(zip_file)
require 'zip'
# Find all location files and prioritize English
location_entries = zip_file.select { |entry| entry.name.include?('Locations') && entry.name.end_with?('.csv') }
# Sort to prioritize English locations file
location_entries.sort_by! { |entry| entry.name.include?('Locations-en') ? 0 : 1 }
location_entries.each do |entry|
Rails.logger.info "Loading locations from: #{entry.name}"
zip_file.get_input_stream(entry) do |io|
# Read the entire content from the stream
content = io.read
# Try different encodings if UTF-8 fails
encodings = ['UTF-8', 'ISO-8859-1', 'Windows-1252']
encodings.each do |encoding|
begin
# Parse the CSV content from the string
CSV.parse(content, headers: true, header_converters: :symbol, encoding: encoding) do |row|
geoname_id = row[:geoname_id]
next unless geoname_id
@locations_cache[geoname_id] = {
country_iso_code: row[:country_iso_code],
country_name: row[:country_name],
continent_code: row[:continent_code],
continent_name: row[:continent_name],
is_in_european_union: row[:is_in_european_union]
}
end
Rails.logger.info "Loaded locations from #{entry.name} with #{encoding} encoding"
break
rescue CSV::InvalidEncodingError => e
Rails.logger.warn "Failed to load locations from #{entry.name} with #{encoding} encoding: #{e.message}"
next if encoding != encodings.last
raise e if encoding == encodings.last
end
end
end
end
Rails.logger.info "Loaded #{@locations_cache.size} location records"
end
def import_csv_file(csv_file)
CSV.foreach(csv_file, headers: true, header_converters: :symbol, encoding: 'UTF-8') do |row|
@total_records += 1
begin
import_record(row)
@processed_records += 1
rescue => e
@failed_records += 1
@errors << "Row #{@total_records}: #{e.message} - Data: #{row.to_h}"
# Update progress every 100 records or on error
update_progress_if_needed
end
update_progress_if_needed
end
end
def import_record(row)
network = row[:network]
geoname_id = row[:geoname_id]
registered_country_geoname_id = row[:registered_country_geoname_id]
is_anonymous_proxy = row[:is_anonymous_proxy] == '1'
is_satellite_provider = row[:is_satellite_provider] == '1'
is_anycast = row[:is_anycast] == '1'
unless network
raise "Missing required field: network"
end
# Validate network format
IPAddr.new(network) # This will raise if invalid
# Get location data - prefer geoname_id, then registered_country_geoname_id
location_data = @locations_cache[geoname_id] || @locations_cache[registered_country_geoname_id] || {}
additional_data = {
geoname_id: geoname_id,
registered_country_geoname_id: registered_country_geoname_id,
represented_country_geoname_id: row[:represented_country_geoname_id],
continent_code: location_data[:continent_code],
continent_name: location_data[:continent_name],
country_name: location_data[:country_name],
is_in_european_union: location_data[:is_in_european_union],
is_satellite_provider: is_satellite_provider,
is_anycast: is_anycast
}.compact
NetworkRange.upsert(
{
network: network,
country: location_data[:country_iso_code],
is_proxy: is_anonymous_proxy,
source: 'geolite_country',
additional_data: additional_data,
updated_at: Time.current
},
unique_by: :index_network_ranges_on_network_unique
)
end
def update_progress_if_needed
if (@processed_records + @failed_records) % 100 == 0
@data_import.update_progress(
processed: @processed_records,
failed: @failed_records,
total_records: @total_records,
stats: {
total_records: @total_records,
current_file: File.basename(@file_path),
locations_loaded: @locations_cache.size,
recent_errors: @errors.last(5)
}
)
end
end
def load_locations_data
locations_files = find_locations_files
locations_files.each do |locations_file|
CSV.foreach(locations_file, headers: true, header_converters: :symbol, encoding: 'UTF-8') do |row|
geoname_id = row[:geoname_id]
next unless geoname_id
@locations_cache[geoname_id] = {
country_iso_code: row[:country_iso_code],
country_name: row[:country_name],
continent_code: row[:continent_code],
continent_name: row[:continent_name],
is_in_european_union: row[:is_in_european_union]
}
end
end
end
def find_locations_files
if @file_path.end_with?('.zip')
base_dir = File.dirname(@file_path)
base_name = File.basename(@file_path, '.zip')
# Look for English locations file first, then any locations file
[
File.join(base_dir, "#{base_name}-Locations-en.csv"),
Dir[File.join(base_dir, "#{base_name}-Locations-*.csv")].first
].compact.select { |file| File.exist?(file) }
else
base_dir = File.dirname(@file_path)
Dir[File.join(base_dir, "*Locations*.csv")].select { |file| File.exist?(file) }
end
end
end

View File

@@ -0,0 +1,159 @@
# frozen_string_literal: true
# Service for automatically creating network ranges for unmatched IPs
class NetworkRangeGenerator
include ActiveModel::Model
include ActiveModel::Attributes
# Minimum network sizes for different IP types
IPV4_MIN_SIZE = 24 # /24 = 256 IPs
IPV6_MIN_SIZE = 64 # /64 = 2^64 IPs (standard IPv6 allocation)
# Special network ranges to avoid
RESERVED_RANGES = [
IPAddr.new('10.0.0.0/8'), # Private
IPAddr.new('172.16.0.0/12'), # Private
IPAddr.new('192.168.0.0/16'), # Private
IPAddr.new('127.0.0.0/8'), # Loopback
IPAddr.new('169.254.0.0/16'), # Link-local
IPAddr.new('224.0.0.0/4'), # Multicast
IPAddr.new('240.0.0.0/4'), # Reserved
IPAddr.new('::1/128'), # IPv6 loopback
IPAddr.new('fc00::/7'), # IPv6 private
IPAddr.new('fe80::/10'), # IPv6 link-local
IPAddr.new('ff00::/8') # IPv6 multicast
].freeze
# Special network ranges to avoid
RESERVED_RANGES = [
IPAddr.new('10.0.0.0/8'), # Private
IPAddr.new('172.16.0.0/12'), # Private
IPAddr.new('192.168.0.0/16'), # Private
IPAddr.new('127.0.0.0/8'), # Loopback
IPAddr.new('169.254.0.0/16'), # Link-local
IPAddr.new('224.0.0.0/4'), # Multicast
IPAddr.new('240.0.0.0/4'), # Reserved
IPAddr.new('::1/128'), # IPv6 loopback
IPAddr.new('fc00::/7'), # IPv6 private
IPAddr.new('fe80::/10'), # IPv6 link-local
IPAddr.new('ff00::/8') # IPv6 multicast
].freeze
class << self
# Find or create a network range for the given IP address
def find_or_create_for_ip(ip_address, user: nil)
ip_str = ip_address.to_s
ip_obj = ip_address.is_a?(IPAddr) ? ip_address : IPAddr.new(ip_str)
# Check if IP already matches existing ranges
existing_range = NetworkRange.contains_ip(ip_str).first
if existing_range
# If we have an existing range and it's a /32 (single IP),
# create a larger network range instead for better analytics
if existing_range.masklen == 32
# Don't overwrite manually created or imported ranges
unless %w[manual user_created api_imported].include?(existing_range.source)
return create_appropriate_network(ip_obj, user: user)
end
end
return existing_range
end
# Create the appropriate network range for this IP
create_appropriate_network(ip_obj, user: user)
end
# Get the appropriate minimum network size for an IP
def minimum_network_size(ip_address)
return IPV6_MIN_SIZE if ip_address.ipv6?
# For IPv4, use larger networks for known datacenter/ranges
if datacenter_ip?(ip_address)
20 # /20 = 4096 IPs for large providers
else
IPV4_MIN_SIZE # /24 = 256 IPs for general use
end
end
# Check if IP is in a datacenter range
def datacenter_ip?(ip_address)
# Known major cloud provider ranges
cloud_ranges = [
IPAddr.new('3.0.0.0/8'), # AWS
IPAddr.new('52.0.0.0/8'), # AWS
IPAddr.new('54.0.0.0/8'), # AWS
IPAddr.new('13.0.0.0/8'), # AWS
IPAddr.new('104.16.0.0/12'), # Cloudflare
IPAddr.new('172.64.0.0/13'), # Cloudflare
IPAddr.new('104.24.0.0/14'), # Cloudflare
IPAddr.new('172.68.0.0/14'), # Cloudflare
IPAddr.new('108.170.0.0/16'), # Google
IPAddr.new('173.194.0.0/16'), # Google
IPAddr.new('209.85.0.0/16'), # Google
IPAddr.new('157.240.0.0/16'), # Facebook/Meta
IPAddr.new('31.13.0.0/16'), # Facebook/Meta
IPAddr.new('69.63.0.0/16'), # Facebook/Meta
IPAddr.new('173.252.0.0/16'), # Facebook/Meta
IPAddr.new('20.0.0.0/8'), # Microsoft Azure
IPAddr.new('40.64.0.0/10'), # Microsoft Azure
IPAddr.new('40.96.0.0/11'), # Microsoft Azure
IPAddr.new('40.112.0.0/12'), # Microsoft Azure
IPAddr.new('40.123.0.0/16'), # Microsoft Azure
IPAddr.new('40.124.0.0/14'), # Microsoft Azure
IPAddr.new('40.126.0.0/15'), # Microsoft Azure
]
cloud_ranges.any? { |range| range.include?(ip_address) }
end
private
# Create the appropriate network range containing the IP
def create_appropriate_network(ip_address, user: nil)
prefix_length = minimum_network_size(ip_address)
# Create the network range with the IP at the center if possible
network_cidr = create_network_with_ip(ip_address, prefix_length)
# Check if network already exists
existing = NetworkRange.find_by(network: network_cidr)
return existing if existing
# Create new network range
NetworkRange.create!(
network: network_cidr,
source: 'auto_generated',
creation_reason: "auto-generated for unmatched IP traffic",
user: user,
company: nil, # Will be filled by enrichment job
asn: nil,
country: nil,
is_datacenter: datacenter_ip?(ip_address),
is_vpn: false,
is_proxy: false
)
end
# Create a network CIDR that contains the given IP with specified prefix length
def create_network_with_ip(ip_address, prefix_length)
# Convert IP to integer and apply mask
ip_int = ip_address.to_i
if ip_address.ipv6?
# For IPv6, mask to prefix length
mask = (2**128 - 1) ^ ((2**(128 - prefix_length)) - 1)
network_int = ip_int & mask
result = IPAddr.new(network_int, Socket::AF_INET6).mask(prefix_length)
else
# For IPv4, mask to prefix length
mask = (2**32 - 1) ^ ((2**(32 - prefix_length)) - 1)
network_int = ip_int & mask
result = IPAddr.new(network_int, Socket::AF_INET).mask(prefix_length)
end
# Return the CIDR notation
result.to_s
end
end
end

View File

@@ -0,0 +1,177 @@
# frozen_string_literal: true
# WafPolicyMatcher - Service to match NetworkRanges against active WafPolicies
#
# This service provides efficient matching of network ranges against firewall policies
# and can generate rules when matches are found.
class WafPolicyMatcher
include ActiveModel::Model
include ActiveModel::Attributes
attr_accessor :network_range
attr_reader :matching_policies, :generated_rules
def initialize(network_range:)
@network_range = network_range
@matching_policies = []
@generated_rules = []
end
# Find all active policies that match the given network range
def find_matching_policies
return [] unless network_range.present?
@matching_policies = active_policies.select do |policy|
policy.matches_network_range?(network_range)
end
# Sort by priority: country > asn > company > network_type, then by creation date
@matching_policies.sort_by do |policy|
priority_score = case policy.policy_type
when 'country'
1
when 'asn'
2
when 'company'
3
when 'network_type'
4
else
99
end
[priority_score, policy.created_at]
end
end
# Generate rules from matching policies
def generate_rules
return [] if matching_policies.empty?
@generated_rules = matching_policies.map do |policy|
# Check if rule already exists for this network range and policy
existing_rule = Rule.find_by(
network_range: network_range,
waf_policy: policy,
enabled: true
)
if existing_rule
Rails.logger.debug "Rule already exists for network_range #{network_range.cidr} and policy #{policy.name}"
existing_rule
else
rule = policy.create_rule_for_network_range(network_range)
if rule
Rails.logger.info "Generated rule for network_range #{network_range.cidr} from policy #{policy.name}"
end
rule
end
end.compact
end
# Find and generate rules in one step
def match_and_generate_rules
find_matching_policies
generate_rules
end
# Class methods for batch processing
def self.process_network_range(network_range)
matcher = new(network_range: network_range)
matcher.match_and_generate_rules
end
def self.batch_process_network_ranges(network_ranges)
results = []
network_ranges.each do |network_range|
matcher = new(network_range: network_range)
result = matcher.match_and_generate_rules
results << {
network_range: network_range,
matching_policies: matcher.matching_policies,
generated_rules: matcher.generated_rules
}
end
results
end
# Process network ranges that need policy evaluation
def self.process_ranges_without_policy_rules(limit: 100)
# Find network ranges that don't have policy-generated rules
# but have intelligence data that could match policies
ranges_needing_evaluation = NetworkRange
.left_joins(:rules)
.where("rules.id IS NULL OR rules.waf_policy_id IS NULL")
.where("(country IS NOT NULL OR asn IS NOT NULL OR company IS NOT NULL OR is_datacenter = true OR is_proxy = true OR is_vpn = true)")
.limit(limit)
.includes(:rules)
batch_process_network_ranges(ranges_needing_evaluation)
end
# Re-evaluate all network ranges for policy changes
def self.reprocess_all_for_policy(waf_policy)
# Find all network ranges that could potentially match this policy
potential_ranges = case waf_policy.policy_type
when 'country'
NetworkRange.where(country: waf_policy.targets)
when 'asn'
NetworkRange.where(asn: waf_policy.targets)
when 'network_type'
NetworkRange.where(
"is_datacenter = ? OR is_proxy = ? OR is_vpn = ?",
waf_policy.targets.include?('datacenter'),
waf_policy.targets.include?('proxy'),
waf_policy.targets.include?('vpn')
)
when 'company'
# For company matching, we need to do text matching
NetworkRange.where("company ILIKE ANY (array[?])",
waf_policy.targets.map { |c| "%#{c}%" })
else
NetworkRange.none
end
results = []
potential_ranges.find_each do |network_range|
matcher = new(network_range: network_range)
if waf_policy.matches_network_range?(network_range)
rule = waf_policy.create_rule_for_network_range(network_range)
results << { network_range: network_range, generated_rule: rule } if rule
end
end
results
end
# Statistics and reporting
def self.matching_policies_for_network_range(network_range)
matcher = new(network_range: network_range)
matcher.find_matching_policies
end
def self.policy_effectiveness_stats(waf_policy, days: 30)
cutoff_date = days.days.ago
rules = waf_policy.generated_rules.where('created_at > ?', cutoff_date)
{
policy_name: waf_policy.name,
policy_type: waf_policy.policy_type,
action: waf_policy.action,
rules_generated: rules.count,
active_rules: rules.active.count,
networks_protected: rules.joins(:network_range).count('distinct network_ranges.id'),
period_days: days,
generation_rate: rules.count.to_f / days
}
end
private
def active_policies
@active_policies ||= WafPolicy.active
end
end