# frozen_string_literal: true require 'ostruct' # EventDdb - DuckDB-backed analytics queries for events # Provides an ActiveRecord-like interface for querying DuckDB events table # Falls back to PostgreSQL Event model if DuckDB is unavailable class EventDdb class << self # Get DuckDB service def service AnalyticsDuckdbService.instance end # Total events since timestamp def count_since(start_time) service.with_connection do |conn| result = conn.query("SELECT COUNT(*) as count FROM events WHERE timestamp >= ?", start_time) result.first&.first || 0 end rescue StandardError => e Rails.logger.error "[EventDdb] Error in count_since: #{e.message}" nil # Fallback to PostgreSQL end # Event breakdown by WAF action def breakdown_by_action(start_time) service.with_connection do |conn| result = conn.query(<<~SQL, start_time) SELECT waf_action, COUNT(*) as count FROM events WHERE timestamp >= ? GROUP BY waf_action SQL # Convert to hash like ActiveRecord .group.count returns # DuckDB returns integer enum values, map to string names # 0=deny, 1=allow, 2=redirect, 3=challenge, 4=log action_map = { 0 => "deny", 1 => "allow", 2 => "redirect", 3 => "challenge", 4 => "log" } result.to_a.to_h { |row| [action_map[row[0]] || "unknown", row[1]] } end rescue StandardError => e Rails.logger.error "[EventDdb] Error in breakdown_by_action: #{e.message}" nil end # Top countries with event counts def top_countries(start_time, limit = 10) service.with_connection do |conn| result = conn.query(<<~SQL, start_time, limit) SELECT country, COUNT(*) as count FROM events WHERE timestamp >= ? AND country IS NOT NULL GROUP BY country ORDER BY count DESC LIMIT ? SQL # Return array of [country, count] tuples like ActiveRecord # DuckDB returns arrays: [country, count] result.to_a.map { |row| [row[0], row[1]] } end rescue StandardError => e Rails.logger.error "[EventDdb] Error in top_countries: #{e.message}" nil end # Top blocked IPs def top_blocked_ips(start_time, limit = 10) service.with_connection do |conn| result = conn.query(<<~SQL, start_time, limit) SELECT ip_address, COUNT(*) as count FROM events WHERE timestamp >= ? AND waf_action = 0 GROUP BY ip_address ORDER BY count DESC LIMIT ? SQL # DuckDB returns arrays: [ip_address, count] result.to_a.map { |row| [row[0], row[1]] } end rescue StandardError => e Rails.logger.error "[EventDdb] Error in top_blocked_ips: #{e.message}" nil end # Hourly timeline aggregation def hourly_timeline(start_time, end_time) service.with_connection do |conn| result = conn.query(<<~SQL, start_time, end_time) SELECT DATE_TRUNC('hour', timestamp) as hour, COUNT(*) as count FROM events WHERE timestamp >= ? AND timestamp < ? GROUP BY hour ORDER BY hour SQL # Convert to hash with Time keys like ActiveRecord # DuckDB returns arrays: [hour, count] result.to_a.to_h { |row| [row[0], row[1]] } end rescue StandardError => e Rails.logger.error "[EventDdb] Error in hourly_timeline: #{e.message}" nil end # Top networks by traffic volume # Returns array of arrays: [network_range_id, event_count, unique_ips] def top_networks(start_time, limit = 50) service.with_connection do |conn| result = conn.query(<<~SQL, start_time, limit) SELECT network_range_id, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips FROM events WHERE timestamp >= ? AND network_range_id IS NOT NULL GROUP BY network_range_id ORDER BY event_count DESC LIMIT ? SQL result.to_a end rescue StandardError => e Rails.logger.error "[EventDdb] Error in top_networks: #{e.message}" nil end # Top companies # Returns array of OpenStruct objects with: company, event_count, unique_ips, network_count def top_companies(start_time, limit = 20) service.with_connection do |conn| result = conn.query(<<~SQL, start_time, limit) SELECT company, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips, COUNT(DISTINCT network_range_id) as network_count FROM events WHERE timestamp >= ? AND company IS NOT NULL GROUP BY company ORDER BY event_count DESC LIMIT ? SQL # Convert arrays to OpenStruct for attribute access result.to_a.map do |row| OpenStruct.new( company: row[0], event_count: row[1], unique_ips: row[2], network_count: row[3] ) end end rescue StandardError => e Rails.logger.error "[EventDdb] Error in top_companies: #{e.message}" nil end # Top ASNs # Returns array of OpenStruct objects with: asn, asn_org, event_count, unique_ips, network_count def top_asns(start_time, limit = 15) service.with_connection do |conn| result = conn.query(<<~SQL, start_time, limit) SELECT asn, asn_org, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips, COUNT(DISTINCT network_range_id) as network_count FROM events WHERE timestamp >= ? AND asn IS NOT NULL GROUP BY asn, asn_org ORDER BY event_count DESC LIMIT ? SQL # Convert arrays to OpenStruct for attribute access result.to_a.map do |row| OpenStruct.new( asn: row[0], asn_org: row[1], event_count: row[2], unique_ips: row[3], network_count: row[4] ) end end rescue StandardError => e Rails.logger.error "[EventDdb] Error in top_asns: #{e.message}" nil end # Network type breakdown (datacenter, VPN, proxy, standard) # Returns hash with network_type as key and hash of stats as value def network_type_breakdown(start_time) service.with_connection do |conn| result = conn.query(<<~SQL, start_time) SELECT CASE WHEN is_datacenter THEN 'datacenter' WHEN is_vpn THEN 'vpn' WHEN is_proxy THEN 'proxy' ELSE 'standard' END as network_type, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips, COUNT(DISTINCT network_range_id) as network_count FROM events WHERE timestamp >= ? GROUP BY network_type SQL # Convert arrays to hash: network_type => { event_count, unique_ips, network_count } result.to_a.to_h do |row| [ row[0], # network_type { "event_count" => row[1], "unique_ips" => row[2], "network_count" => row[3] } ] end end rescue StandardError => e Rails.logger.error "[EventDdb] Error in network_type_breakdown: #{e.message}" nil end # Top countries with detailed stats (event count and unique IPs) # Returns array of OpenStruct objects with: country, event_count, unique_ips def top_countries_with_stats(start_time, limit = 15) service.with_connection do |conn| result = conn.query(<<~SQL, start_time, limit) SELECT country, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips FROM events WHERE timestamp >= ? AND country IS NOT NULL GROUP BY country ORDER BY event_count DESC LIMIT ? SQL # Convert arrays to OpenStruct for attribute access result.to_a.map do |row| OpenStruct.new( country: row[0], event_count: row[1], unique_ips: row[2] ) end end rescue StandardError => e Rails.logger.error "[EventDdb] Error in top_countries_with_stats: #{e.message}" nil end # Network type stats with formatted output matching controller expectations # Returns hash with type keys containing label, networks, events, unique_ips, percentage def network_type_stats(start_time) service.with_connection do |conn| # Get total events for percentage calculation total_result = conn.query("SELECT COUNT(*) as total FROM events WHERE timestamp >= ?", start_time) total_events = total_result.first&.first || 0 # Get breakdown by network type breakdown = network_type_breakdown(start_time) return nil unless breakdown # Format results with labels and percentages results = {} { 'datacenter' => 'Datacenter', 'vpn' => 'VPN', 'proxy' => 'Proxy', 'standard' => 'Standard' }.each do |type, label| stats = breakdown[type] event_count = stats ? stats["event_count"] : 0 results[type] = { label: label, networks: stats ? stats["network_count"] : 0, events: event_count, unique_ips: stats ? stats["unique_ips"] : 0, percentage: total_events > 0 ? ((event_count.to_f / total_events) * 100).round(1) : 0 } end results end rescue StandardError => e Rails.logger.error "[EventDdb] Error in network_type_stats: #{e.message}" nil end # Network range traffic statistics # Returns comprehensive stats for a given network range ID(s) def network_traffic_stats(network_range_ids) network_range_ids = Array(network_range_ids) return nil if network_range_ids.empty? service.with_connection do |conn| # Build IN clause with placeholders placeholders = network_range_ids.map { "?" }.join(", ") # Get all stats in a single query result = conn.query(<<~SQL, *network_range_ids) SELECT COUNT(*) as total_requests, COUNT(DISTINCT ip_address) as unique_ips, SUM(CASE WHEN waf_action = 0 THEN 1 ELSE 0 END) as blocked_requests, SUM(CASE WHEN waf_action = 1 THEN 1 ELSE 0 END) as allowed_requests FROM events WHERE network_range_id IN (#{placeholders}) SQL stats_row = result.first return nil unless stats_row { total_requests: stats_row[0] || 0, unique_ips: stats_row[1] || 0, blocked_requests: stats_row[2] || 0, allowed_requests: stats_row[3] || 0 } end rescue StandardError => e Rails.logger.error "[EventDdb] Error in network_traffic_stats: #{e.message}" nil end # Top paths for network range(s) def network_top_paths(network_range_ids, limit = 10) network_range_ids = Array(network_range_ids) return nil if network_range_ids.empty? service.with_connection do |conn| # Build IN clause with placeholders placeholders = network_range_ids.map { "?" }.join(", ") result = conn.query(<<~SQL, *network_range_ids, limit) SELECT request_path, COUNT(*) as count FROM events WHERE network_range_id IN (#{placeholders}) AND request_path IS NOT NULL GROUP BY request_path ORDER BY count DESC LIMIT ? SQL result.to_a.map { |row| [row[0], row[1]] } end rescue StandardError => e Rails.logger.error "[EventDdb] Error in network_top_paths: #{e.message}" nil end # Top user agents for network range(s) def network_top_user_agents(network_range_ids, limit = 5) network_range_ids = Array(network_range_ids) return nil if network_range_ids.empty? service.with_connection do |conn| # Build IN clause with placeholders placeholders = network_range_ids.map { "?" }.join(", ") result = conn.query(<<~SQL, *network_range_ids, limit) SELECT user_agent, COUNT(*) as count FROM events WHERE network_range_id IN (#{placeholders}) AND user_agent IS NOT NULL GROUP BY user_agent ORDER BY count DESC LIMIT ? SQL result.to_a.map { |row| [row[0], row[1]] } end rescue StandardError => e Rails.logger.error "[EventDdb] Error in network_top_user_agents: #{e.message}" nil end # Full user agent tally for network range(s) # Returns hash of user_agent => count for all agents in the network def network_agent_tally(network_range_ids) network_range_ids = Array(network_range_ids) return nil if network_range_ids.empty? service.with_connection do |conn| # Build IN clause with placeholders placeholders = network_range_ids.map { "?" }.join(", ") result = conn.query(<<~SQL, *network_range_ids) SELECT user_agent, COUNT(*) as count FROM events WHERE network_range_id IN (#{placeholders}) AND user_agent IS NOT NULL GROUP BY user_agent SQL # Convert to hash matching Ruby .tally format result.to_a.to_h { |row| [row[0], row[1]] } end rescue StandardError => e Rails.logger.error "[EventDdb] Error in network_agent_tally: #{e.message}" nil end # Suspicious network activity patterns # Detects high-volume networks, high deny rates, and distributed companies def suspicious_patterns(start_time) service.with_connection do |conn| # High volume networks (5x average) avg_query = conn.query(<<~SQL, start_time) SELECT AVG(event_count) as avg_events FROM ( SELECT network_range_id, COUNT(*) as event_count FROM events WHERE timestamp >= ? AND network_range_id IS NOT NULL GROUP BY network_range_id ) network_stats SQL avg_events = avg_query.first&.first || 0 threshold = avg_events * 5 high_volume = conn.query(<<~SQL, start_time, threshold) SELECT network_range_id, COUNT(*) as event_count FROM events WHERE timestamp >= ? AND network_range_id IS NOT NULL GROUP BY network_range_id HAVING COUNT(*) > ? ORDER BY event_count DESC SQL # High deny rate networks (>50% blocked, min 10 requests) high_deny = conn.query(<<~SQL, start_time) SELECT network_range_id, SUM(CASE WHEN waf_action = 0 THEN 1 ELSE 0 END) as denied_count, COUNT(*) as total_count FROM events WHERE timestamp >= ? AND network_range_id IS NOT NULL GROUP BY network_range_id HAVING CAST(SUM(CASE WHEN waf_action = 0 THEN 1 ELSE 0 END) AS FLOAT) / COUNT(*) > 0.5 AND COUNT(*) >= 10 ORDER BY denied_count DESC SQL # Distributed companies (appearing with 5+ unique IPs) distributed_companies = conn.query(<<~SQL, start_time) SELECT company, COUNT(DISTINCT ip_address) as ip_count FROM events WHERE timestamp >= ? AND company IS NOT NULL GROUP BY company HAVING COUNT(DISTINCT ip_address) > 5 ORDER BY ip_count DESC LIMIT 10 SQL { high_volume: { count: high_volume.to_a.length, networks: high_volume.to_a.map { |row| row[0] } # network_range_id }, high_deny_rate: { count: high_deny.to_a.length, network_ids: high_deny.to_a.map { |row| row[0] } # network_range_id }, distributed_companies: distributed_companies.to_a.map { |row| { company: row[0], # company name subnets: row[1] # ip_count } } } end rescue StandardError => e Rails.logger.error "[EventDdb] Error in suspicious_patterns: #{e.message}" nil end # Bot traffic analysis - breakdown of bot vs human traffic def bot_traffic_breakdown(start_time) service.with_connection do |conn| result = conn.query(<<~SQL, start_time) SELECT is_bot, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips FROM events WHERE timestamp >= ? GROUP BY is_bot SQL # Convert to hash: is_bot => { event_count, unique_ips } # DuckDB returns arrays: [is_bot, event_count, unique_ips] result.to_a.to_h do |row| [ row[0] ? "bot" : "human", # row[0] = is_bot { "event_count" => row[1], # row[1] = event_count "unique_ips" => row[2] # row[2] = unique_ips } ] end end rescue StandardError => e Rails.logger.error "[EventDdb] Error in bot_traffic_breakdown: #{e.message}" nil end # Count human traffic (non-bot) since timestamp def human_traffic_count(start_time) service.with_connection do |conn| result = conn.query(<<~SQL, start_time) SELECT COUNT(*) as count FROM events WHERE timestamp >= ? AND is_bot = false SQL result.first&.first || 0 end rescue StandardError => e Rails.logger.error "[EventDdb] Error in human_traffic_count: #{e.message}" nil end # Count bot traffic since timestamp def bot_traffic_count(start_time) service.with_connection do |conn| result = conn.query(<<~SQL, start_time) SELECT COUNT(*) as count FROM events WHERE timestamp >= ? AND is_bot = true SQL result.first&.first || 0 end rescue StandardError => e Rails.logger.error "[EventDdb] Error in bot_traffic_count: #{e.message}" nil end # Top bot user agents def top_bot_user_agents(start_time, limit = 20) service.with_connection do |conn| result = conn.query(<<~SQL, start_time, limit) SELECT user_agent, COUNT(*) as event_count, COUNT(DISTINCT ip_address) as unique_ips FROM events WHERE timestamp >= ? AND is_bot = true AND user_agent IS NOT NULL GROUP BY user_agent ORDER BY event_count DESC LIMIT ? SQL # DuckDB returns arrays: [user_agent, event_count, unique_ips] result.to_a.map do |row| { user_agent: row[0], # row[0] = user_agent event_count: row[1], # row[1] = event_count unique_ips: row[2] # row[2] = unique_ips } end end rescue StandardError => e Rails.logger.error "[EventDdb] Error in top_bot_user_agents: #{e.message}" nil end # Bot traffic timeline (hourly breakdown) def bot_traffic_timeline(start_time, end_time) service.with_connection do |conn| result = conn.query(<<~SQL, start_time, end_time) SELECT DATE_TRUNC('hour', timestamp) as hour, SUM(CASE WHEN is_bot = true THEN 1 ELSE 0 END) as bot_count, SUM(CASE WHEN is_bot = false THEN 1 ELSE 0 END) as human_count FROM events WHERE timestamp >= ? AND timestamp < ? GROUP BY hour ORDER BY hour SQL # Convert to hash with Time keys # DuckDB returns arrays: [hour, bot_count, human_count] result.to_a.to_h do |row| [ row[0], # row[0] = hour { "bot_count" => row[1], # row[1] = bot_count "human_count" => row[2], # row[2] = human_count "total" => row[1] + row[2] } ] end end rescue StandardError => e Rails.logger.error "[EventDdb] Error in bot_traffic_timeline: #{e.message}" nil end end end