Add 'tags' to event model. Add a dataimport system - currently for MaxMind zip files

This commit is contained in:
Dan Milne
2025-11-11 10:31:36 +11:00
parent 772fae7e8b
commit 26216da9ca
34 changed files with 3580 additions and 14 deletions

View File

@@ -0,0 +1,101 @@
class GeoliteAsnImportJob < ApplicationJob
queue_as :default
# No retry needed for CSV processing - either works or fails immediately
def perform(data_import)
Rails.logger.info "Starting GeoLite ASN import job for DataImport #{data_import.id}"
# Check if file is attached
unless data_import.file.attached?
Rails.logger.error "No file attached to DataImport #{data_import.id}"
data_import.fail!("No file attached")
return
end
# Download the file to a temporary location
temp_file = download_to_temp_file(data_import.file.blob)
if temp_file.nil?
Rails.logger.error "Failed to download file from storage"
data_import.fail!("Failed to download file from storage")
return
end
Rails.logger.info "File downloaded to: #{temp_file}"
Rails.logger.info "File exists: #{File.exist?(temp_file)}"
Rails.logger.info "File size: #{File.size(temp_file)} bytes" if File.exist?(temp_file)
# Mark as processing
data_import.start_processing!
importer = nil
begin
Rails.logger.info "Creating GeoliteAsnImporter"
importer = GeoliteAsnImporter.new(temp_file, data_import: data_import)
Rails.logger.info "Calling importer.import"
result = importer.import
# Update final stats
data_import.update_progress(
processed: result[:processed_records],
failed: result[:failed_records],
stats: {
total_records: result[:total_records],
errors: result[:errors].last(10), # Keep last 10 errors
completed_at: Time.current
}
)
data_import.complete!
# Log completion
Rails.logger.info "GeoLite ASN import completed: #{result[:processed_records]} processed, #{result[:failed_records]} failed"
rescue => e
Rails.logger.error "GeoLite ASN import failed: #{e.message}"
Rails.logger.error e.backtrace.join("\n")
# Update final stats even on failure
if importer
data_import.update_progress(
processed: importer.instance_variable_get(:@processed_records),
failed: importer.instance_variable_get(:@failed_records),
stats: {
total_records: importer.instance_variable_get(:@total_records),
current_file: File.basename(temp_file),
errors: importer.instance_variable_get(:@errors).last(10),
failed_at: Time.current
}
)
end
data_import.fail!(e.message)
raise
ensure
# Cleanup temporary files
File.delete(temp_file) if temp_file && File.exist?(temp_file)
end
end
private
def download_to_temp_file(blob)
# Create a temporary file with the original filename
temp_file = Tempfile.new([blob.filename.to_s])
temp_file.binmode
# Download the blob content
blob.open do |file|
temp_file.write(file.read)
end
temp_file.close
temp_file.path
rescue => e
Rails.logger.error "Error downloading file: #{e.message}"
Rails.logger.error e.backtrace.join("\n")
temp_file&.close
temp_file&.unlink
nil
end
end

View File

@@ -0,0 +1,101 @@
class GeoliteCountryImportJob < ApplicationJob
queue_as :default
# No retry needed for CSV processing - either works or fails immediately
def perform(data_import)
Rails.logger.info "Starting GeoLite Country import job for DataImport #{data_import.id}"
# Check if file is attached
unless data_import.file.attached?
Rails.logger.error "No file attached to DataImport #{data_import.id}"
data_import.fail!("No file attached")
return
end
# Download the file to a temporary location
temp_file = download_to_temp_file(data_import.file.blob)
if temp_file.nil?
Rails.logger.error "Failed to download file from storage"
data_import.fail!("Failed to download file from storage")
return
end
Rails.logger.info "File downloaded to: #{temp_file}"
Rails.logger.info "File exists: #{File.exist?(temp_file)}"
Rails.logger.info "File size: #{File.size(temp_file)} bytes" if File.exist?(temp_file)
# Mark as processing
data_import.start_processing!
importer = nil
begin
Rails.logger.info "Creating GeoliteCountryImporter"
importer = GeoliteCountryImporter.new(temp_file, data_import: data_import)
Rails.logger.info "Calling importer.import"
result = importer.import
# Update final stats
data_import.update_progress(
processed: result[:processed_records],
failed: result[:failed_records],
stats: {
total_records: result[:total_records],
errors: result[:errors].last(10), # Keep last 10 errors
completed_at: Time.current
}
)
data_import.complete!
# Log completion
Rails.logger.info "GeoLite Country import completed: #{result[:processed_records]} processed, #{result[:failed_records]} failed"
rescue => e
Rails.logger.error "GeoLite Country import failed: #{e.message}"
Rails.logger.error e.backtrace.join("\n")
# Update final stats even on failure
if importer
data_import.update_progress(
processed: importer.instance_variable_get(:@processed_records),
failed: importer.instance_variable_get(:@failed_records),
stats: {
total_records: importer.instance_variable_get(:@total_records),
current_file: File.basename(temp_file),
errors: importer.instance_variable_get(:@errors).last(10),
failed_at: Time.current
}
)
end
data_import.fail!(e.message)
raise
ensure
# Cleanup temporary files
File.delete(temp_file) if temp_file && File.exist?(temp_file)
end
end
private
def download_to_temp_file(blob)
# Create a temporary file with the original filename
temp_file = Tempfile.new([blob.filename.to_s])
temp_file.binmode
# Download the blob content
blob.open do |file|
temp_file.write(file.read)
end
temp_file.close
temp_file.path
rescue => e
Rails.logger.error "Error downloading file: #{e.message}"
Rails.logger.error e.backtrace.join("\n")
temp_file&.close
temp_file&.unlink
nil
end
end

View File

@@ -44,16 +44,20 @@ class ProcessWafAnalyticsJob < ApplicationJob
end
def analyze_geographic_distribution(event)
return unless event.country_code.present?
return unless event.has_geo_data?
# Check if this country is unusual globally
country_code = event.lookup_country
return unless country_code.present?
# Check if this country is unusual globally by joining through network ranges
country_events = Event
.where(country_code: event.country_code)
.joins("JOIN network_ranges ON events.ip_address <<= network_ranges.network")
.where("network_ranges.country = ?", country_code)
.where(timestamp: 1.hour.ago..Time.current)
# If this is the first event from this country or unusual spike
if country_events.count == 1 || country_events.count > 100
Rails.logger.info "Unusual geographic activity from #{event.country_code}"
Rails.logger.info "Unusual geographic activity from #{country_code}"
end
end

View File

@@ -26,12 +26,14 @@ class ProcessWafEventJob < ApplicationJob
# Create the WAF event record
event = Event.create_from_waf_payload!(event_id, single_event_data)
# Enrich with geo-location data if missing
if event.ip_address.present? && event.country_code.blank?
# Log geo-location data status (uses NetworkRange delegation)
if event.ip_address.present?
begin
event.enrich_geo_location!
unless event.has_geo_data?
Rails.logger.debug "No geo data available for event #{event.id} with IP #{event.ip_address}"
end
rescue => e
Rails.logger.warn "Failed to enrich geo location for event #{event.id}: #{e.message}"
Rails.logger.warn "Failed to check geo data for event #{event.id}: #{e.message}"
end
end

View File

@@ -7,7 +7,7 @@
class ProcessWafPoliciesJob < ApplicationJob
queue_as :waf_policies
retry_on StandardError, wait: :exponentially_longer, attempts: 3
retry_on StandardError, wait: 5.seconds, attempts: 3
def perform(network_range_id:, event_id: nil)
# Find the network range