Many updates

This commit is contained in:
Dan Milne
2025-11-13 14:42:43 +11:00
parent 5e5198f113
commit df94ac9720
41 changed files with 4760 additions and 516 deletions

View File

@@ -4,6 +4,10 @@ class Event < ApplicationRecord
# Normalized association for hosts (most valuable compression)
belongs_to :request_host, optional: true
# WAF rule associations
belongs_to :rule, optional: true
has_one :waf_policy, through: :rule
# Enums for fixed value sets
enum :waf_action, {
allow: 0, # allow/pass
@@ -29,7 +33,7 @@ class Event < ApplicationRecord
# This provides direct array access and efficient indexing
attribute :tags, :json, default: -> { [] }
validates :event_id, presence: true, uniqueness: true
validates :request_id, presence: true, uniqueness: true
validates :timestamp, presence: true
scope :recent, -> { order(timestamp: :desc) }
@@ -55,32 +59,42 @@ class Event < ApplicationRecord
where("tags @> ARRAY[?]", tag_array)
}
# Network-based filtering scopes
# Network-based filtering scopes - now using denormalized columns
scope :by_company, ->(company) {
joins("JOIN network_ranges ON events.ip_address <<= network_ranges.network")
.where("network_ranges.company ILIKE ?", "%#{company}%")
where("company ILIKE ?", "%#{company}%")
}
scope :by_country, ->(country) {
where(country: country)
}
scope :by_network_type, ->(type) {
joins("JOIN network_ranges ON events.ip_address <<= network_ranges.network")
.case(type)
.when("datacenter") { where("network_ranges.is_datacenter = ?", true) }
.when("vpn") { where("network_ranges.is_vpn = ?", true) }
.when("proxy") { where("network_ranges.is_proxy = ?", true) }
.when("standard") { where("network_ranges.is_datacenter = ? AND network_ranges.is_vpn = ? AND network_ranges.is_proxy = ?", false, false, false) }
.else { none }
case type.to_s.downcase
when "datacenter"
where(is_datacenter: true)
when "vpn"
where(is_vpn: true)
when "proxy"
where(is_proxy: true)
when "standard"
where(is_datacenter: false, is_vpn: false, is_proxy: false)
else
none
end
}
scope :by_asn, ->(asn) {
joins("JOIN network_ranges ON events.ip_address <<= network_ranges.network")
.where("network_ranges.asn = ?", asn.to_i)
where(asn: asn.to_i)
}
scope :by_network_cidr, ->(cidr) {
joins("JOIN network_ranges ON events.ip_address <<= network_ranges.network")
.where("network_ranges.network = ?", cidr)
# This still requires a join since we need to match CIDR
joins(:network_range).where("network_ranges.network = ?", cidr)
}
# Add association for the optional network_range_id
belongs_to :network_range, optional: true
# Path prefix matching using range queries (uses B-tree index efficiently)
scope :with_path_prefix, ->(prefix_segment_ids) {
return none if prefix_segment_ids.blank?
@@ -130,13 +144,39 @@ class Event < ApplicationRecord
# Normalize event fields after extraction
after_validation :normalize_event_fields, if: :should_normalize?
def self.create_from_waf_payload!(event_id, payload)
# Populate network intelligence from IP address
before_save :populate_network_intelligence, if: :should_populate_network_intelligence?
# Backfill network intelligence for all events
def self.backfill_network_intelligence!(batch_size: 10_000)
total = where(country: nil).count
return 0 if total.zero?
puts "Backfilling network intelligence for #{total} events..."
processed = 0
where(country: nil).find_in_batches(batch_size: batch_size) do |batch|
batch.each(&:save) # Triggers before_save callback
processed += batch.size
puts " Processed #{processed}/#{total} (#{(processed.to_f / total * 100).round(1)}%)"
end
processed
end
# Backfill network intelligence for a specific event
def backfill_network_intelligence!
populate_network_intelligence
save!
end
def self.create_from_waf_payload!(request_id, payload)
# Normalize headers in payload during import phase
normalized_payload = normalize_payload_headers(payload)
# Create the WAF request event
create!(
event_id: event_id,
request_id: request_id,
timestamp: parse_timestamp(normalized_payload["timestamp"]),
payload: normalized_payload,
@@ -150,7 +190,8 @@ class Event < ApplicationRecord
response_status: normalized_payload.dig("response", "status_code"),
response_time_ms: normalized_payload.dig("response", "duration_ms"),
waf_action: normalize_action(normalized_payload["waf_action"]), # Normalize incoming action values
rule_matched: normalized_payload["rule_matched"],
# Support both new (rule_id) and old (rule_matched) field names during cutover
rule_id: normalized_payload["rule_id"] || normalized_payload["rule_matched"],
blocked_reason: normalized_payload["blocked_reason"],
# Server/Environment info
@@ -283,7 +324,7 @@ class Event < ApplicationRecord
end
def rule_matched?
rule_matched.present?
rule_id.present?
end
# New path methods for normalization
@@ -343,40 +384,39 @@ class Event < ApplicationRecord
end
def most_specific_range
matching_network_ranges.first
# Use the cached network_range_id if available (much faster)
return NetworkRange.find_by(id: network_range_id) if network_range_id.present?
# Fallback to expensive lookup
matching_network_ranges.first&.dig(:range)
end
def broadest_range
matching_network_ranges.last
matching_network_ranges.last&.dig(:range)
end
def network_intelligence
most_specific_range&.dig(:intelligence) || {}
# Use denormalized fields instead of expensive lookup
{
country: country,
company: company,
asn: asn,
asn_org: asn_org,
is_datacenter: is_datacenter,
is_vpn: is_vpn,
is_proxy: is_proxy
}
end
def company
network_intelligence[:company]
end
def asn
network_intelligence[:asn]
end
def asn_org
network_intelligence[:asn_org]
end
def is_datacenter?
network_intelligence[:is_datacenter] || false
end
def is_proxy?
network_intelligence[:is_proxy] || false
end
def is_vpn?
network_intelligence[:is_vpn] || false
end
# Denormalized attribute accessors - these now use the columns directly
# No need to override - Rails provides these automatically:
# - country (column)
# - company (column)
# - asn (column)
# - asn_org (column)
# - is_datacenter (column)
# - is_vpn (column)
# - is_proxy (column)
# IP validation
def valid_ipv4?
@@ -480,7 +520,8 @@ class Event < ApplicationRecord
self.request_url = request_data["url"]
self.response_status = response_data["status_code"]
self.response_time_ms = response_data["duration_ms"]
self.rule_matched = payload["rule_matched"]
# Support both new (rule_id) and old (rule_matched) field names during cutover
self.rule_id = payload["rule_id"] || payload["rule_matched"]
self.blocked_reason = payload["blocked_reason"]
# Store original values for normalization only if they don't exist yet