Use only parquet files for events

This commit is contained in:
Dan Milne
2025-12-03 17:16:38 +11:00
parent 032243ba6a
commit 693851f664
12 changed files with 673 additions and 165 deletions

View File

@@ -15,6 +15,7 @@ class NetworkRange < ApplicationRecord
# Associations
has_many :rules, dependent: :destroy
has_many :events, foreign_key: :network_range_id, dependent: :nullify
belongs_to :user, optional: true
# Validations
@@ -36,8 +37,8 @@ class NetworkRange < ApplicationRecord
scope :geolite_imported, -> { where(source: ['geolite_asn', 'geolite_country']) }
scope :geolite_asn, -> { where(source: 'geolite_asn') }
scope :geolite_country, -> { where(source: 'geolite_country') }
scope :with_events, -> { where("events_count > 0") }
scope :most_active, -> { order(events_count: :desc) }
scope :with_events, -> { joins(:events).distinct }
scope :most_active, -> { joins(:events).group('network_ranges.id').order('COUNT(events.id) DESC') }
# Callbacks
before_validation :set_default_source
@@ -241,7 +242,7 @@ class NetworkRange < ApplicationRecord
def agent_tally
Rails.cache.fetch("#{cache_key}:agent_tally", expires_in: 5.minutes) do
# Use DuckDB for fast agent tally instead of loading all events into memory
if persisted? && events_count > 0
if persisted? && has_events?
# Include child network ranges to capture all traffic within this network block
network_ids = [id] + child_ranges.pluck(:id)
@@ -417,10 +418,16 @@ class NetworkRange < ApplicationRecord
cidr.to_s.gsub('/', '_')
end
# Analytics methods - events_count is now a counter cache column maintained by database triggers
# This is much more performant than the previous implementation that did complex network queries
def events_count
self[:events_count] || 0
# Check if network range has any events using DuckDB for performance
def has_events?
return false unless persisted?
# Include child network ranges to capture all traffic within this network block
network_ids = [id] + child_ranges.pluck(:id)
# Try DuckDB first for fast event count check
event_count = with_duckdb_fallback { EventDdb.network_event_count(network_ids) }
event_count&.positive? || events.exists?
end
def events