Files
baffle-hub/app/services/network_data_importer.rb

201 lines
6.3 KiB
Ruby

# frozen_string_literal: true
# NetworkDataImporter - Service for importing production network data
#
# Imports network ranges from JSONL format with rich metadata.
# Optimized for bulk importing large datasets.
class NetworkDataImporter
def self.import_from_jsonl(file_path, limit: nil, batch_size: 1000)
puts "Starting import from #{file_path}"
imported_count = 0
batch = []
File.foreach(file_path) do |line|
break if limit && imported_count >= limit
begin
data = JSON.parse(line)
batch << convert_to_network_range(data)
if batch.size >= batch_size
import_batch(batch)
imported_count += batch.size
puts "Imported #{imported_count} records..."
batch = []
end
rescue JSON::ParserError => e
Rails.logger.error "Failed to parse line: #{e.message}"
rescue => e
Rails.logger.error "Error processing record: #{e.message}"
end
end
# Import remaining records
if batch.any?
import_batch(batch)
imported_count += batch.size
end
puts "Import completed. Total records: #{imported_count}"
imported_count
end
def self.import_sample(file_path, sample_size: 1000)
puts "Importing sample of #{sample_size} records from #{file_path}"
imported_count = 0
batch = []
File.foreach(file_path) do |line|
break if imported_count >= sample_size
begin
data = JSON.parse(line)
batch << convert_to_network_range(data)
if batch.size >= 100
import_batch(batch)
imported_count += batch.size
batch = []
end
rescue JSON::ParserError => e
Rails.logger.error "Failed to parse line: #{e.message}"
rescue => e
Rails.logger.error "Error processing record: #{e.message}"
end
end
# Import remaining records
if batch.any?
import_batch(batch)
imported_count += batch.size
end
puts "Sample import completed. Total records: #{imported_count}"
imported_count
end
def self.test_import_with_lookup(file_path, test_ips: ['8.8.8.8', '1.1.1.1', '192.168.1.100'])
puts "Importing sample data and testing IP lookups..."
# Import a small sample first
import_sample(file_path, sample_size: 10000)
# Test IP resolution
puts "\n=== Testing IP Resolution ==="
test_ips.each do |ip|
puts "\nTesting IP: #{ip}"
# Find matching ranges
ranges = NetworkRange.contains_ip(ip)
puts "Found #{ranges.count} matching ranges"
ranges.each_with_index do |range, index|
puts " #{index + 1}. #{range.cidr} (#{range.prefix_length})"
puts " Company: #{range.company || 'Unknown'}"
puts " ASN: #{range.asn || 'Unknown'}"
puts " Country: #{range.country || 'Unknown'}"
puts " Datacenter: #{range.is_datacenter? ? 'Yes' : 'No'}"
puts " VPN: #{range.is_vpn? ? 'Yes' : 'No'}"
puts " Proxy: #{range.is_proxy? ? 'Yes' : 'No'}"
end
# Test IpRangeResolver
puts "\nUsing IpRangeResolver:"
resolved = IpRangeResolver.resolve(ip)
puts "Resolved #{resolved.count} ranges"
resolved.first(3).each_with_index do |range_data, index|
intel = range_data[:intelligence]
puts " #{index + 1}. #{range_data[:cidr]} (specificity: #{range_data[:specificity]})"
puts " Company: #{intel[:company] || 'Unknown'}"
puts " Inherited: #{intel[:inherited] ? 'Yes' : 'No'}"
end
end
# Test rule creation
puts "\n=== Testing Rule Creation ==="
test_ip = test_ips.first
matching_range = NetworkRange.contains_ip(test_ip).first
if matching_range
puts "Creating rule for #{matching_range.cidr}"
user = User.first || User.create!(email_address: 'test@example.com', password: 'password123')
rule = Rule.create_network_rule(matching_range.cidr, action: 'deny', user: user)
puts "Rule created: #{rule.id} - #{rule.cidr}"
puts "Rule network intelligence: #{rule.network_intelligence[:company]}"
# Test surgical blocking
puts "\nTesting surgical blocking for IP #{test_ip}"
parent_cidr = matching_range.cidr
block_rule, exception_rule = Rule.create_surgical_block(
test_ip, parent_cidr, user: user, reason: 'Test surgical block'
)
puts "Block rule: #{block_rule.id} - #{block_rule.cidr}"
puts "Exception rule: #{exception_rule.id} - #{exception_rule.cidr}"
end
end
private
def self.convert_to_network_range(data)
# Convert integer network_start to IP address
network_start_ip = integer_to_ip(data['network_start'], data['ip_version'])
network_end_ip = integer_to_ip(data['network_end'], data['ip_version'])
# Create CIDR notation
cidr = if data['ip_version'] == 4
"#{network_start_ip}/#{data['network_prefix']}"
else
"#{network_start_ip}/#{data['network_prefix']}"
end
metadata = data['metadata'] || {}
{
network: cidr,
source: 'production_import',
asn: metadata['asn'],
asn_org: metadata['org'],
company: metadata['company_name'],
country: metadata['country_code'],
is_datacenter: metadata['is_datacenter'] || false,
is_proxy: metadata['is_proxy'] || false,
is_vpn: metadata['is_vpn'] || false,
abuser_scores: metadata['abuser_score'] ? { score: metadata['abuser_score'] } : nil,
additional_data: metadata.except('asn', 'org', 'company_name', 'country_code',
'is_datacenter', 'is_proxy', 'is_vpn', 'abuser_score').to_json
}
end
def self.integer_to_ip(integer, version)
if version == 4
IPAddr.new(integer, Socket::AF_INET).to_s
else
# For IPv6, convert 128-bit integer
IPAddr.new(integer, Socket::AF_INET6).to_s
end
rescue => e
Rails.logger.error "Failed to convert integer #{integer} to IP: #{e.message}"
"0.0.0.0"
end
def self.import_batch(batch_data)
NetworkRange.insert_all(batch_data)
rescue => e
Rails.logger.error "Failed to import batch: #{e.message}"
# Fallback to individual imports
batch_data.each do |data|
begin
NetworkRange.create!(data)
rescue => individual_error
Rails.logger.error "Failed to import individual record: #{individual_error.message}"
end
end
end
end