Files
baffle-hub/app/models/network_range.rb
2025-12-27 11:56:19 +11:00

572 lines
18 KiB
Ruby

# frozen_string_literal: true
# NetworkRange - Unified IPv4/IPv6 network range management
#
# Uses PostgreSQL's inet type to handle both IPv4 and IPv4 networks seamlessly.
# Provides network intelligence data including ASN, company, geographic info,
# and classification flags (datacenter, proxy, VPN).
class NetworkRange < ApplicationRecord
# Sources for network range creation
SOURCES = %w[api_imported user_created manual auto_generated inherited geolite_asn geolite_country
bot_import_amazon_aws bot_import_google bot_import_microsoft_bing bot_import_anthropic
bot_import_openai_searchbot bot_import_openai_chatgpt_user bot_import_openai_gptbot
bot_import_cloudflare bot_import_facebook bot_import_applebot bot_import_duckduckgo
production_import].freeze
# Associations
has_many :rules, dependent: :destroy
has_many :events, foreign_key: :network_range_id, dependent: :nullify
belongs_to :user, optional: true
# Validations
validates :network, presence: true, uniqueness: true
validates :source, inclusion: { in: SOURCES }
validates :asn, numericality: { greater_than: 0 }, allow_blank: true
# Scopes
scope :ipv4, -> { where("family(network) = 4") }
scope :ipv6, -> { where("family(network) = 6") }
scope :by_country, ->(country) { where(country: country) }
scope :by_company, ->(company) { where(company: company) }
scope :by_asn, ->(asn) { where(asn: asn) }
scope :datacenter, -> { where(is_datacenter: true) }
scope :proxy, -> { where(is_proxy: true) }
scope :vpn, -> { where(is_vpn: true) }
scope :user_created, -> { where(source: 'user_created') }
scope :api_imported, -> { where(source: 'api_imported') }
scope :geolite_imported, -> { where(source: ['geolite_asn', 'geolite_country']) }
scope :geolite_asn, -> { where(source: 'geolite_asn') }
scope :geolite_country, -> { where(source: 'geolite_country') }
scope :with_events, -> { joins(:events).distinct }
scope :most_active, -> { joins(:events).group('network_ranges.id').order('COUNT(events.id) DESC') }
# Callbacks
before_validation :set_default_source
# after_save :update_children_inheritance!, if: :should_update_children_inheritance? # Disabled for now
# Virtual attribute for CIDR notation
def cidr
network.to_s
end
def cidr=(new_cidr)
self.network = new_cidr
end
# Network properties
def prefix_length
# Get prefix length from IPAddr object
network.prefix
end
def network_address
# Use PostgreSQL's host function or get from IPAddr object
network.to_s
end
def cidr
# Return full CIDR notation
"#{network_address}/#{prefix_length}"
end
def broadcast_address
# Use PostgreSQL's broadcast function
result = self.class.connection.execute("SELECT broadcast('#{network.to_s}')").first
result&.values&.first
end
def family
# Check if it's IPv4 or IPv6 by looking at the address
addr = network.to_s.split('/').first
addr.include?(':') ? 6 : 4
end
def virtual?
# Virtual networks are unsaved instances (not persisted to database)
!persisted?
end
def ipv4?
family == 4
end
def ipv6?
family == 6
end
# Network containment and overlap operations
def contains_ip?(ip_string)
# Use Postgres >>= operator for containment
self.class.where("network >>= ?::inet", ip_string).exists?
rescue => e
Rails.logger.error "Error checking IP containment: #{e.message}"
false
end
def contains_network?(other_cidr)
other_network = IPAddr.new(other_cidr)
network_range = IPAddr.new(network)
network_range.include?(other_network)
rescue IPAddr::InvalidAddressError
false
end
def overlaps?(other_cidr)
network_range = IPAddr.new(network)
other_network = IPAddr.new(other_cidr)
network_range.include?(other_network) || other_network.include?(network_range)
rescue IPAddr::InvalidAddressError
false
end
# Parent/child relationships
def parent_ranges
# Find networks that contain this network (less specific / shorter prefix)
# The << operator implicitly means the containing network has a shorter prefix
# IMPORTANT: Use cidr (not network.to_s) to preserve the network mask
NetworkRange.where("?::inet << network", cidr)
.order("masklen(network) DESC") # Most specific parent first
end
def child_ranges
# Find networks that are contained by this network (more specific / longer prefix)
# The >> operator implicitly means the contained network has a longer prefix
# IMPORTANT: Use cidr (not network.to_s) to preserve the network mask
NetworkRange.where("?::inet >> network", cidr)
.order("masklen(network) ASC") # Least specific child first
end
# Find nearest parent with intelligence data
def parent_with_intelligence
# Find all parent ranges (networks that contain this network)
# and look for any with intelligence data, ordered by specificity
NetworkRange.where("?::inet <<= network", network.to_s)
.where("masklen(network) < ?", prefix_length)
.where("(asn IS NOT NULL OR company IS NOT NULL OR country IS NOT NULL OR is_datacenter = true OR is_vpn = true OR is_proxy = true)")
.order("masklen(network) DESC")
.first
end
# Check if this network or any parent has IPAPI data
def has_ipapi_data_available?
return true if has_network_data_from?(:ipapi)
parent_ranges.any? { |parent| parent.has_network_data_from?(:ipapi) }
end
# Generic API fetching status management
def is_fetching_api_data?(source)
fetching_status = network_data&.dig('fetching_status') || {}
fetching_status[source.to_s] &&
fetching_status[source.to_s]['started_at'] &&
fetching_status[source.to_s]['started_at'] > 5.minutes.ago.to_f
end
def mark_as_fetching_api_data!(source)
# Use database-level locking to prevent race conditions
transaction do
# Reload with lock to get fresh data
lock!
# Double-check that we're not already fetching
if is_fetching_api_data?(source)
Rails.logger.info "Another job already started fetching #{source} for #{cidr}"
return false
end
self.network_data ||= {}
self.network_data['fetching_status'] ||= {}
self.network_data['fetching_status'][source.to_s] = {
'started_at' => Time.current.to_f,
'job_id' => SecureRandom.hex(8)
}
save!
true
end
end
def clear_fetching_status!(source)
if network_data&.dig('fetching_status')&.dig(source.to_s)
self.network_data['fetching_status'].delete(source.to_s)
# Clean up empty fetching_status hash
self.network_data.delete('fetching_status') if self.network_data['fetching_status'].empty?
save!
end
end
# Check if we should fetch API data (not available and not currently being fetched)
def should_fetch_api_data?(source)
return false if send("has_network_data_from?(#{source})") if respond_to?("has_network_data_from?(#{source})")
return false if is_fetching_api_data?(source)
true
end
# Check if this network or any parent has IPAPI data available and no active fetch
def should_fetch_ipapi_data?
return false if has_ipapi_data_available?
return false if is_fetching_api_data?(:ipapi)
# Also check if any parent is currently fetching IPAPI data
return false if parent_ranges.any? { |parent| parent.is_fetching_api_data?(:ipapi) }
true
end
def inherited_intelligence
return own_intelligence if has_intelligence?
parent = parent_with_intelligence
parent ? parent.own_intelligence.merge(inherited: true, parent_cidr: parent.cidr) : {}
end
def has_intelligence?
asn.present? || company.present? || country.present? ||
is_datacenter? || is_proxy? || is_vpn?
end
def own_intelligence
{
asn: asn,
asn_org: asn_org,
company: company,
country: country,
is_datacenter: is_datacenter,
is_proxy: is_proxy,
is_vpn: is_vpn,
inherited: false,
source: source
}
end
def agent_tally
Rails.cache.fetch("#{cache_key}:agent_tally", expires_in: 5.minutes) do
# Use DuckDB for fast agent tally instead of loading all events into memory
if persisted? && has_events?
# Include child network ranges to capture all traffic within this network block
network_ids = [id] + child_ranges.pluck(:id)
# Try DuckDB first for much faster aggregation
duckdb_tally = with_duckdb_fallback { EventDdb.network_agent_tally(network_ids) }
duckdb_tally || {}
else
# Virtual network - fallback to PostgreSQL CIDR query
events.map(&:user_agent).tally
end
end
end
# Helper method to try DuckDB first, fall back to PostgreSQL
def with_duckdb_fallback(&block)
result = yield
result.nil? ? nil : result # Return result or nil to trigger fallback
rescue StandardError => e
Rails.logger.warn "[NetworkRange] DuckDB query failed, falling back to PostgreSQL: #{e.message}"
nil # Return nil to trigger fallback
end
# Geographic lookup
def geo_lookup_country!
return if country.present?
sample_ip = network_address
geo_country = GeoIpService.lookup_country(sample_ip)
update!(country: geo_country) if geo_country.present?
rescue => e
Rails.logger.error "Failed to lookup geo location for network range #{cidr}: #{e.message}"
end
# Class methods for network operations
def self.contains_ip(ip_string)
where("network >>= ?", ip_string)
.order("masklen(network) DESC") # Most specific first
end
def self.overlapping(range_cidr)
where("network && ?", range_cidr)
end
def self.findd(cidr)
cidr = cidr.gsub("_", "/")
cidr = "#{cidr}/24" unless cidr.include?("/")
find_by(network: cidr)
end
def self.find_or_create_by_cidr(cidr, user: nil, source: nil, reason: nil)
find_or_create_by(network: cidr) do |range|
range.user = user
range.source = source || 'user_created'
range.creation_reason = reason
end
end
def self.import_from_cidr(cidr, **attributes)
find_or_create_by(network: cidr) do |range|
range.assign_attributes(attributes)
end
end
# Convenience methods for JSON fields
def abuser_scores_hash
abuser_scores ? JSON.parse(abuser_scores) : {}
rescue JSON::ParserError
{}
end
def abuser_scores_hash=(hash)
self.abuser_scores = hash.to_json
end
def additional_data_hash
additional_data ? JSON.parse(additional_data) : {}
rescue JSON::ParserError
{}
end
def additional_data_hash=(hash)
self.additional_data = hash.to_json
end
# Network data accessors for different data sources
# network_data is a JSONB column with namespaced data:
# {
# geolite: {...}, # MaxMind GeoLite2 data
# ipapi: {...}, # IPAPI.is enrichment data
# abuseipdb: {...}, # Future: AbuseIPDB data
# shodan: {...} # Future: Shodan data
# }
def network_data_for(source)
network_data&.dig(source.to_s) || {}
end
def set_network_data(source, data)
self.network_data ||= {}
self.network_data[source.to_s] = data
end
# Check if we have network data from a specific source
def has_network_data_from?(source)
network_data&.key?(source.to_s) && network_data[source.to_s].present?
end
# IPAPI tracking at /24 granularity
# Find or create the /24 network for a given IP address
def self.find_or_create_tracking_network_for_ip(ip_address)
ip = IPAddr.new(ip_address.to_s)
# Create /24 network for IPv4, /64 for IPv6
tracking_cidr = if ip.ipv4?
"#{ip.mask(24)}/24"
else
"#{ip.mask(64)}/64"
end
find_or_create_by(network: tracking_cidr) do |range|
range.source = 'auto_generated'
range.creation_reason = 'IPAPI tracking network'
end
end
# Check if we should fetch IPAPI data for a given IP address
# Uses /24 networks as the tracking unit
def self.should_fetch_ipapi_for_ip?(ip_address)
tracking_network = find_or_create_tracking_network_for_ip(ip_address)
# Check if currently being fetched (prevents duplicate jobs)
return false if tracking_network.is_fetching_api_data?(:ipapi)
# Check if /24 has been queried recently
queried_at = tracking_network.network_data&.dig('ipapi_queried_at')
return true if queried_at.nil?
# Check if IPAPI returned a CIDR that actually covers this IP
# (handles edge case where IPAPI returns /25 or more specific)
returned_cidr = tracking_network.network_data&.dig('ipapi_returned_cidr')
if returned_cidr.present?
begin
returned_range = IPAddr.new(returned_cidr)
ip = IPAddr.new(ip_address.to_s)
# If the IP is NOT covered by what IPAPI returned, fetch again
return true unless returned_range.include?(ip)
rescue IPAddr::InvalidAddressError => e
Rails.logger.warn "Invalid CIDR stored in ipapi_returned_cidr: #{returned_cidr}"
end
end
# Re-query after 1 year
Time.at(queried_at) < 1.year.ago
rescue => e
Rails.logger.error "Error checking IPAPI fetch status for #{ip_address}: #{e.message}"
true # Default to fetching on error
end
# Mark that we've queried IPAPI for this /24 network
# @param returned_cidr [String] The CIDR that IPAPI actually returned (may be more specific than /24)
def mark_ipapi_queried!(returned_cidr)
self.network_data ||= {}
self.network_data['ipapi_queried_at'] = Time.current.to_i
self.network_data['ipapi_returned_cidr'] = returned_cidr
save!
end
# String representations
def to_s
cidr
end
def to_param
cidr.to_s.gsub('/', '_')
end
# Check if network range has any events using DuckDB for performance
def has_events?
return false unless persisted?
# Include child network ranges to capture all traffic within this network block
network_ids = [id] + child_ranges.pluck(:id)
# Try DuckDB first for fast event count check
event_count = with_duckdb_fallback { EventDdb.network_event_count(network_ids) }
event_count&.positive? || events.exists?
end
def events
Event.where("ip_address <<= ?", cidr)
end
def recent_events(limit: 100)
events.recent.limit(limit)
end
def blocking_rules
rules.where(waf_action: :deny, enabled: true)
end
def active_rules
rules.enabled.where("expires_at IS NULL OR expires_at > ?", Time.current)
end
# Find all network ranges that are contained by this network and have enabled rules
# Used when creating a supernet rule to identify redundant child rules
def child_network_ranges_with_rules
NetworkRange
.where("network << ?::inet", network.to_s) # network is strictly contained by this network
.joins(:rules)
.where(rules: { enabled: true })
.distinct
end
# Find all enabled rules on child network ranges (more specific networks)
# Used after creating a rule to expire redundant child rules
def child_rules
Rule
.joins(:network_range)
.where("network_ranges.network << ?::inet", cidr)
.where(enabled: true)
end
# Find all network ranges that contain this network and have enabled rules
# Used to check if creating a rule would be redundant
def parent_network_ranges_with_rules
NetworkRange
.where("?::inet << network", cidr) # this network is strictly contained by parent
.joins(:rules)
.where(rules: { enabled: true })
.distinct
end
# Find all enabled rules on parent network ranges (less specific networks)
# Used before creating a rule to check if it would be redundant
def supernet_rules
Rule
.joins(:network_range)
.where("?::inet << network_ranges.network", cidr)
.where(enabled: true)
.order("masklen(network_ranges.network) DESC") # Most specific supernet first
end
# Check if this network range needs WAF policy evaluation
# Returns true if:
# - Never been evaluated, OR
# - Any WafPolicy has been updated since last evaluation
def needs_policy_evaluation?
return true if policies_evaluated_at.nil?
latest_policy_update = WafPolicy.maximum(:updated_at)
return false if latest_policy_update.nil? # No policies exist
policies_evaluated_at < latest_policy_update
end
private
def set_default_source
self.source ||= 'api_imported'
end
def should_update_children_inheritance?
saved_change_to_attribute?(:asn) ||
saved_change_to_attribute?(:company) ||
saved_change_to_attribute?(:country) ||
saved_change_to_attribute?(:is_datacenter) ||
saved_change_to_attribute?(:is_proxy) ||
saved_change_to_attribute?(:is_vpn)
end
def update_children_inheritance!
# Find child ranges that don't have their own intelligence
child_without_intelligence = child_ranges.where(
asn: nil,
company: nil,
country: nil,
is_datacenter: false,
is_proxy: false,
is_vpn: false
)
child_without_intelligence.find_each do |child|
Rails.logger.info "Child range #{child.cidr} can now inherit from parent #{cidr}"
# The inherited_intelligence method will pick up the new parent data
end
end
# Import-related class methods
def self.import_stats_by_source
group(:source)
.select(:source, 'COUNT(*) as count', 'MIN(created_at) as first_import', 'MAX(updated_at) as last_update')
.order(:source)
end
def self.geolite_coverage_stats
{
total_networks: geolite_imported.count,
asn_networks: geolite_asn.count,
country_networks: geolite_country.count,
with_asn_data: geolite_imported.where.not(asn: nil).count,
with_country_data: geolite_imported.where.not(country: nil).count,
with_proxy_data: geolite_imported.where(is_proxy: true).count,
unique_countries: geolite_imported.distinct.count(:country),
unique_asns: geolite_imported.distinct.count(:asn),
ipv4_networks: geolite_imported.ipv4.count,
ipv6_networks: geolite_imported.ipv6.count
}
end
def self.find_by_ip_or_network(query)
return none if query.blank?
begin
# Try to parse as IP address first
ip = IPAddr.new(query)
where("network >>= ?", ip.to_s)
rescue IPAddr::InvalidAddressError
# Try to parse as network
begin
network = IPAddr.new(query)
where(network: network.to_s)
rescue IPAddr::InvalidAddressError
none
end
end
end
end