Fix some blocked/allow laggards after migrating. Add DuckDB for outstanding analyitcs performance. Start adding an import for all bot networks

This commit is contained in:
Dan Milne
2025-11-18 16:40:05 +11:00
parent ef56779584
commit 3f274c842c
37 changed files with 3522 additions and 151 deletions

View File

@@ -158,13 +158,26 @@ class NetworkRange < ApplicationRecord
end
def mark_as_fetching_api_data!(source)
self.network_data ||= {}
self.network_data['fetching_status'] ||= {}
self.network_data['fetching_status'][source.to_s] = {
'started_at' => Time.current.to_f,
'job_id' => SecureRandom.hex(8)
}
save!
# Use database-level locking to prevent race conditions
transaction do
# Reload with lock to get fresh data
lock!
# Double-check that we're not already fetching
if is_fetching_api_data?(source)
Rails.logger.info "Another job already started fetching #{source} for #{cidr}"
return false
end
self.network_data ||= {}
self.network_data['fetching_status'] ||= {}
self.network_data['fetching_status'][source.to_s] = {
'started_at' => Time.current.to_f,
'job_id' => SecureRandom.hex(8)
}
save!
true
end
end
def clear_fetching_status!(source)
@@ -222,9 +235,29 @@ class NetworkRange < ApplicationRecord
end
def agent_tally
# Rails.cache.fetch("#{to_s}:agent_tally", expires_in: 5.minutes) do
events.map(&:user_agent).tally
# end
Rails.cache.fetch("#{cache_key}:agent_tally", expires_in: 5.minutes) do
# Use DuckDB for fast agent tally instead of loading all events into memory
if persisted? && events_count > 0
# Include child network ranges to capture all traffic within this network block
network_ids = [id] + child_ranges.pluck(:id)
# Try DuckDB first for much faster aggregation
duckdb_tally = with_duckdb_fallback { EventDdb.network_agent_tally(network_ids) }
duckdb_tally || {}
else
# Virtual network - fallback to PostgreSQL CIDR query
events.map(&:user_agent).tally
end
end
end
# Helper method to try DuckDB first, fall back to PostgreSQL
def with_duckdb_fallback(&block)
result = yield
result.nil? ? nil : result # Return result or nil to trigger fallback
rescue StandardError => e
Rails.logger.warn "[NetworkRange] DuckDB query failed, falling back to PostgreSQL: #{e.message}"
nil # Return nil to trigger fallback
end
# Geographic lookup
@@ -334,6 +367,9 @@ class NetworkRange < ApplicationRecord
def self.should_fetch_ipapi_for_ip?(ip_address)
tracking_network = find_or_create_tracking_network_for_ip(ip_address)
# Check if currently being fetched (prevents duplicate jobs)
return false if tracking_network.is_fetching_api_data?(:ipapi)
# Check if /24 has been queried recently
queried_at = tracking_network.network_data&.dig('ipapi_queried_at')
return true if queried_at.nil?