Update duckdb. use more duckdb. Fix the display of stats
This commit is contained in:
@@ -241,43 +241,83 @@ class AnalyticsController < ApplicationController
|
||||
end
|
||||
|
||||
def prepare_chart_data_with_split_cache(cache_key_base, cache_ttl)
|
||||
# Split timeline into historical (completed hours) and current (incomplete hour)
|
||||
# Historical hours are cached for full TTL, current hour cached briefly for freshness
|
||||
# Generate timeline based on selected time period
|
||||
case @time_period
|
||||
when :hour
|
||||
# Show last 60 minutes for hour view
|
||||
timeline_data = Rails.cache.fetch("#{cache_key_base}/chart_hourly", expires_in: 1.minute) do
|
||||
# For hour view, show minute-by-minute data for the last hour
|
||||
(0..59).map do |minutes_ago|
|
||||
time_point = minutes_ago.minutes.ago
|
||||
count = Event.where("timestamp >= ? AND timestamp < ?", time_point, time_point + 1.minute).count
|
||||
{
|
||||
time_iso: time_point.iso8601,
|
||||
total: count
|
||||
}
|
||||
end.reverse
|
||||
end
|
||||
|
||||
# Cache historical hours (1-23 hours ago) - these are complete and won't change
|
||||
# No expiration - will stick around until evicted by cache store (uses DuckDB if available)
|
||||
historical_timeline = Rails.cache.fetch("#{cache_key_base}/chart_historical") do
|
||||
historical_start = 23.hours.ago.beginning_of_hour
|
||||
current_hour_start = Time.current.beginning_of_hour
|
||||
when :day
|
||||
# Show last 24 hours (existing logic)
|
||||
# Split timeline into historical (completed hours) and current (incomplete hour)
|
||||
# Historical hours are cached for full TTL, current hour cached briefly for freshness
|
||||
|
||||
events_by_hour = with_duckdb_fallback { EventDdb.hourly_timeline(historical_start, current_hour_start) } ||
|
||||
Event.where("timestamp >= ? AND timestamp < ?", historical_start, current_hour_start)
|
||||
.group("DATE_TRUNC('hour', timestamp)")
|
||||
.count
|
||||
# Cache historical hours (1-23 hours ago) - these are complete and won't change
|
||||
# Use DuckDB directly for performance, no PostgreSQL fallback
|
||||
historical_timeline = Rails.cache.fetch("#{cache_key_base}/chart_historical", expires_in: 1.hour) do
|
||||
historical_start = 23.hours.ago.beginning_of_hour
|
||||
current_hour_start = Time.current.beginning_of_hour
|
||||
|
||||
(1..23).map do |hour_ago|
|
||||
hour_time = hour_ago.hours.ago.beginning_of_hour
|
||||
hour_key = hour_time.utc
|
||||
# Use DuckDB directly - if it fails, we'll show empty data rather than slow PostgreSQL
|
||||
events_by_hour = BaffleDl.hourly_timeline(historical_start, current_hour_start) || {}
|
||||
|
||||
(1..23).map do |hour_ago|
|
||||
hour_time = hour_ago.hours.ago.beginning_of_hour
|
||||
hour_key = hour_time.utc
|
||||
{
|
||||
time_iso: hour_time.iso8601,
|
||||
total: events_by_hour[hour_key] || 0
|
||||
}
|
||||
end.reverse
|
||||
end
|
||||
|
||||
# Current hour (0 hours ago) - cache very briefly since it's actively accumulating
|
||||
# ALWAYS use PostgreSQL for current hour to get real-time data (DuckDB syncs every minute)
|
||||
current_hour_data = Rails.cache.fetch("#{cache_key_base}/chart_current_hour", expires_in: 1.minute) do
|
||||
hour_time = Time.current.beginning_of_hour
|
||||
count = Event.where("timestamp >= ?", hour_time).count
|
||||
{
|
||||
time_iso: hour_time.iso8601,
|
||||
total: events_by_hour[hour_key] || 0
|
||||
total: count
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
# Current hour (0 hours ago) - cache very briefly since it's actively accumulating
|
||||
# ALWAYS use PostgreSQL for current hour to get real-time data (DuckDB syncs every minute)
|
||||
current_hour_data = Rails.cache.fetch("#{cache_key_base}/chart_current_hour", expires_in: 1.minute) do
|
||||
hour_time = Time.current.beginning_of_hour
|
||||
count = Event.where("timestamp >= ?", hour_time).count
|
||||
{
|
||||
time_iso: hour_time.iso8601,
|
||||
total: count
|
||||
}
|
||||
end
|
||||
# Combine current + historical for full 24-hour timeline
|
||||
timeline_data = [current_hour_data] + historical_timeline
|
||||
|
||||
# Combine current + historical for full 24-hour timeline
|
||||
timeline_data = [current_hour_data] + historical_timeline
|
||||
when :week, :month
|
||||
# Show daily data for week/month views
|
||||
days_to_show = @time_period == :week ? 7 : 30
|
||||
timeline_data = Rails.cache.fetch("#{cache_key_base}/chart_daily_#{days_to_show}", expires_in: cache_ttl) do
|
||||
historical_start = days_to_show.days.ago.beginning_of_day
|
||||
current_day_end = Time.current.end_of_day
|
||||
|
||||
# Use DuckDB for all data including current day (max 1 minute delay)
|
||||
daily_events = BaffleDl.daily_timeline(historical_start, current_day_end) || {}
|
||||
|
||||
(0..days_to_show-1).map do |days_ago|
|
||||
day_time = days_ago.days.ago.beginning_of_day
|
||||
{
|
||||
time_iso: day_time.iso8601,
|
||||
total: daily_events[day_time] || 0
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
else
|
||||
# Default to 24 hours
|
||||
timeline_data = []
|
||||
end
|
||||
|
||||
# Action distribution and other chart data (cached with main cache)
|
||||
other_chart_data = Rails.cache.fetch("#{cache_key_base}/chart_metadata", expires_in: cache_ttl) do
|
||||
@@ -323,7 +363,7 @@ class AnalyticsController < ApplicationController
|
||||
time_iso: hour_time.iso8601,
|
||||
total: events_by_hour[hour_key] || 0
|
||||
}
|
||||
end
|
||||
end.reverse
|
||||
|
||||
# Action distribution for pie chart
|
||||
action_distribution = @event_breakdown.map do |action, count|
|
||||
@@ -348,8 +388,8 @@ class AnalyticsController < ApplicationController
|
||||
end
|
||||
|
||||
def calculate_network_type_stats(start_time)
|
||||
# Try DuckDB first, fallback to PostgreSQL
|
||||
duckdb_stats = with_duckdb_fallback { EventDdb.network_type_stats(start_time) }
|
||||
# Try DuckLake first, fallback to PostgreSQL
|
||||
duckdb_stats = with_duckdb_fallback { BaffleDl.network_type_stats(start_time) }
|
||||
|
||||
return duckdb_stats if duckdb_stats
|
||||
|
||||
@@ -397,8 +437,8 @@ class AnalyticsController < ApplicationController
|
||||
end
|
||||
|
||||
def calculate_suspicious_patterns(start_time)
|
||||
# Try DuckDB first, fallback to PostgreSQL
|
||||
duckdb_patterns = with_duckdb_fallback { EventDdb.suspicious_patterns(start_time) }
|
||||
# Try DuckLake first, fallback to PostgreSQL
|
||||
duckdb_patterns = with_duckdb_fallback { BaffleDl.suspicious_patterns(start_time) }
|
||||
|
||||
return duckdb_patterns if duckdb_patterns
|
||||
|
||||
|
||||
@@ -37,8 +37,8 @@ class EventsController < ApplicationController
|
||||
filters[:network_range_id] = range.id if range
|
||||
end
|
||||
|
||||
# Try DuckDB first, fallback to PostgreSQL if unavailable
|
||||
result = EventDdb.search(filters, page: params[:page]&.to_i || 1, per_page: 50)
|
||||
# Try DuckLake first, fallback to PostgreSQL if unavailable
|
||||
result = BaffleDl.search(filters, page: params[:page]&.to_i || 1, per_page: 50)
|
||||
|
||||
if result
|
||||
# DuckDB query succeeded
|
||||
|
||||
@@ -262,10 +262,10 @@ class NetworkRangesController < ApplicationController
|
||||
# Include child network ranges to capture all traffic within this network block
|
||||
network_ids = [network_range.id] + network_range.child_ranges.pluck(:id)
|
||||
|
||||
# Try DuckDB first for stats (much faster)
|
||||
duckdb_stats = with_duckdb_fallback { EventDdb.network_traffic_stats(network_ids) }
|
||||
duckdb_top_paths = with_duckdb_fallback { EventDdb.network_top_paths(network_ids, 10) }
|
||||
duckdb_top_agents = with_duckdb_fallback { EventDdb.network_top_user_agents(network_ids, 5) }
|
||||
# Try DuckLake first for stats (much faster)
|
||||
duckdb_stats = with_duckdb_fallback { BaffleDl.network_traffic_stats(network_ids) }
|
||||
duckdb_top_paths = with_duckdb_fallback { BaffleDl.network_top_paths(network_ids, 10) }
|
||||
duckdb_top_agents = with_duckdb_fallback { BaffleDl.network_top_user_agents(network_ids, 5) }
|
||||
|
||||
if duckdb_stats
|
||||
# DuckDB success - use fast aggregated stats
|
||||
|
||||
@@ -37,20 +37,49 @@ export default class extends Controller {
|
||||
|
||||
// Convert ISO time to local time
|
||||
const date = new Date(timeIso)
|
||||
const localTime = date.toLocaleTimeString(undefined, {
|
||||
hour: '2-digit',
|
||||
minute: '2-digit',
|
||||
hour12: false
|
||||
})
|
||||
|
||||
timeElement.textContent = localTime
|
||||
// Determine if we should show date based on time range
|
||||
const now = new Date()
|
||||
const timeDiff = now - date
|
||||
const hoursDiff = timeDiff / (1000 * 60 * 60)
|
||||
|
||||
let displayTime
|
||||
if (hoursDiff > 25) {
|
||||
// For periods longer than 25 hours, show date only (no time)
|
||||
displayTime = date.toLocaleDateString(undefined, {
|
||||
month: 'short',
|
||||
day: 'numeric'
|
||||
})
|
||||
} else {
|
||||
// Check if this is midnight UTC data (daily timeline) vs actual time data (hourly timeline)
|
||||
// Daily timeline: time is at UTC midnight (hours/minutes/seconds = 0)
|
||||
// Hourly timeline: time has actual hours/minutes
|
||||
const utcHours = date.getUTCHours()
|
||||
const utcMinutes = date.getUTCMinutes()
|
||||
const utcSeconds = date.getUTCSeconds()
|
||||
|
||||
if (utcHours === 0 && utcMinutes === 0 && utcSeconds === 0) {
|
||||
// This is midnight UTC - treat as daily data, show date only
|
||||
displayTime = date.toLocaleDateString(undefined, {
|
||||
month: 'short',
|
||||
day: 'numeric'
|
||||
})
|
||||
} else {
|
||||
// This is actual time data - show time only
|
||||
displayTime = date.toLocaleTimeString(undefined, {
|
||||
hour: '2-digit',
|
||||
minute: '2-digit',
|
||||
hour12: false
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
timeElement.textContent = displayTime
|
||||
timeElement.title = date.toLocaleString(undefined, {
|
||||
weekday: 'short',
|
||||
year: 'numeric',
|
||||
month: 'short',
|
||||
day: 'numeric',
|
||||
hour: '2-digit',
|
||||
minute: '2-digit',
|
||||
timeZoneName: 'short'
|
||||
})
|
||||
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
|
||||
require 'ostruct'
|
||||
|
||||
# EventDdb - DuckDB-backed analytics queries for events
|
||||
# Provides an ActiveRecord-like interface for querying DuckDB events table
|
||||
# Falls back to PostgreSQL Event model if DuckDB is unavailable
|
||||
# EventDdb - DuckLake-backed analytics queries for events
|
||||
# Provides an ActiveRecord-like interface for querying DuckLake events table
|
||||
# Falls back to PostgreSQL Event model if DuckLake is unavailable
|
||||
class EventDdb
|
||||
# Enum mappings from integer to string (matching Event model)
|
||||
ACTION_MAP = {
|
||||
@@ -26,30 +26,24 @@ class EventDdb
|
||||
}.freeze
|
||||
|
||||
class << self
|
||||
# Get DuckDB service
|
||||
# Get DuckLake service
|
||||
def service
|
||||
AnalyticsDuckdbService.instance
|
||||
AnalyticsDucklakeService.new
|
||||
end
|
||||
|
||||
# Helper to load parquet files into in-memory events view
|
||||
# Helper to work with DuckLake events table
|
||||
# This allows all existing queries to work without modification
|
||||
# Uses glob pattern to read all parquet files (excluding .temp files)
|
||||
def with_events_from_parquet(&block)
|
||||
service.with_connection do |conn|
|
||||
# Create events view from all parquet files using glob pattern
|
||||
# Pattern matches: minute/*.parquet, hours/*.parquet, days/*.parquet, weeks/*.parquet
|
||||
# Excludes .temp files automatically (they don't match *.parquet)
|
||||
parquet_pattern = "#{AnalyticsDuckdbService::PARQUET_BASE_PATH}/**/*.parquet"
|
||||
|
||||
conn.execute(<<~SQL)
|
||||
CREATE VIEW events AS
|
||||
SELECT * FROM read_parquet('#{parquet_pattern}')
|
||||
SQL
|
||||
# Ensure schema exists
|
||||
service.setup_schema(conn)
|
||||
|
||||
# Use the DuckLake events table directly
|
||||
# DuckLake automatically manages the Parquet files underneath
|
||||
yield conn
|
||||
end
|
||||
rescue StandardError => e
|
||||
Rails.logger.error "[EventDdb] Error loading parquet files: #{e.message}"
|
||||
Rails.logger.error "[EventDdb] Error accessing DuckLake events: #{e.message}"
|
||||
nil
|
||||
end
|
||||
|
||||
@@ -691,70 +685,14 @@ class EventDdb
|
||||
|
||||
# Search events with filters and pagination
|
||||
# Returns { total_count:, events:[], page:, per_page: }
|
||||
# Supports filters: ip, waf_action, country, rule_id, company, asn, network_type, network_range_id, exclude_bots
|
||||
# Supports filters: ip, waf_action, country, rule_id, company, asn, network_type, network_range_id, exclude_bots, request_path
|
||||
def search(filters = {}, page: 1, per_page: 50)
|
||||
# Get list of Parquet files to query
|
||||
parquet_files = service.parquet_files_for_range(1.year.ago, Time.current)
|
||||
|
||||
if parquet_files.empty?
|
||||
Rails.logger.warn "[EventDdb] No Parquet files found, falling back to DuckDB"
|
||||
return search_duckdb(filters, page, per_page)
|
||||
end
|
||||
|
||||
# Query Parquet files using in-memory DuckDB (no file locks!)
|
||||
service.with_parquet_connection do |conn|
|
||||
# Build WHERE clause
|
||||
where_clause, params = build_where_clause(filters)
|
||||
|
||||
# Build file list for read_parquet
|
||||
file_list = parquet_files.map { |f| "'#{f}'" }.join(", ")
|
||||
|
||||
# Get total count
|
||||
count_sql = "SELECT COUNT(*) FROM read_parquet([#{file_list}])#{where_clause}"
|
||||
count_result = conn.query(count_sql, *params)
|
||||
total_count = count_result.first&.first || 0
|
||||
|
||||
# Get paginated results
|
||||
offset = (page - 1) * per_page
|
||||
|
||||
data_sql = <<~SQL
|
||||
SELECT
|
||||
id, timestamp, ip_address, network_range_id, country, company,
|
||||
asn, asn_org, is_datacenter, is_vpn, is_proxy, is_bot,
|
||||
waf_action, request_method, response_status, rule_id,
|
||||
request_path, user_agent, tags
|
||||
FROM read_parquet([#{file_list}])
|
||||
#{where_clause}
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT ? OFFSET ?
|
||||
SQL
|
||||
|
||||
result = conn.query(data_sql, *params, per_page, offset)
|
||||
|
||||
# Convert rows to event-like objects
|
||||
events = result.to_a.map { |row| row_to_event(row) }
|
||||
|
||||
{
|
||||
total_count: total_count,
|
||||
events: events,
|
||||
page: page,
|
||||
per_page: per_page
|
||||
}
|
||||
end
|
||||
rescue StandardError => e
|
||||
Rails.logger.error "[EventDdb] Error in Parquet search: #{e.message}"
|
||||
Rails.logger.error e.backtrace.join("\n")
|
||||
nil
|
||||
end
|
||||
|
||||
# Fallback to querying DuckDB directly (for backward compatibility)
|
||||
def search_duckdb(filters = {}, page: 1, per_page: 50)
|
||||
with_events_from_parquet do |conn|
|
||||
# Build WHERE clause
|
||||
where_clause, params = build_where_clause(filters)
|
||||
|
||||
# Get total count
|
||||
count_sql = "SELECT COUNT(*) FROM events#{where_clause}"
|
||||
count_sql = "SELECT COUNT(*) FROM baffle.events#{where_clause}"
|
||||
count_result = conn.query(count_sql, *params)
|
||||
total_count = count_result.first&.first || 0
|
||||
|
||||
@@ -767,7 +705,7 @@ class EventDdb
|
||||
asn, asn_org, is_datacenter, is_vpn, is_proxy, is_bot,
|
||||
waf_action, request_method, response_status, rule_id,
|
||||
request_path, user_agent, tags
|
||||
FROM events
|
||||
FROM baffle.events
|
||||
#{where_clause}
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT ? OFFSET ?
|
||||
@@ -786,7 +724,7 @@ class EventDdb
|
||||
}
|
||||
end
|
||||
rescue StandardError => e
|
||||
Rails.logger.error "[EventDdb] Error in DuckDB search: #{e.message}"
|
||||
Rails.logger.error "[EventDdb] Error in DuckLake search: #{e.message}"
|
||||
Rails.logger.error e.backtrace.join("\n")
|
||||
nil
|
||||
end
|
||||
@@ -852,6 +790,12 @@ class EventDdb
|
||||
end
|
||||
end
|
||||
|
||||
# Path filtering
|
||||
if filters[:request_path].present?
|
||||
conditions << "request_path = ?"
|
||||
params << filters[:request_path]
|
||||
end
|
||||
|
||||
# Bot filtering
|
||||
if filters[:exclude_bots] == true || filters[:exclude_bots] == "true"
|
||||
conditions << "is_bot = false"
|
||||
|
||||
@@ -2,6 +2,11 @@
|
||||
|
||||
# Service for managing DuckDB analytics database
|
||||
# Provides fast analytical queries on events data using columnar storage
|
||||
|
||||
# INSTALL ducklake;
|
||||
# INSTALL sqlite;
|
||||
# ATTACH 'ducklake:sqlite3:storage/ducklake.sqlite3' AS events (DATA_PATH 'storage/ducklake/events.ducklake');
|
||||
|
||||
class AnalyticsDuckdbService
|
||||
include Singleton
|
||||
|
||||
|
||||
@@ -185,7 +185,13 @@
|
||||
<div class="bg-white shadow rounded-lg">
|
||||
<div class="px-6 py-4 border-b border-gray-200">
|
||||
<div class="flex items-center justify-between">
|
||||
<h3 class="text-lg font-medium text-gray-900">Events Timeline (Last 24 Hours)</h3>
|
||||
<h3 class="text-lg font-medium text-gray-900">Events Timeline (<%= case @time_period
|
||||
when :hour then "Last Hour"
|
||||
when :day then "Last 24 Hours"
|
||||
when :week then "Last 7 Days"
|
||||
when :month then "Last 30 Days"
|
||||
else "Last 24 Hours"
|
||||
end %>)</h3>
|
||||
<span class="text-sm text-gray-500">Times shown in your local timezone</span>
|
||||
</div>
|
||||
</div>
|
||||
@@ -381,7 +387,7 @@
|
||||
<h3 class="text-lg font-medium text-gray-900">Quick Actions</h3>
|
||||
</div>
|
||||
<div class="p-6">
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4">
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-5 gap-4">
|
||||
<%= link_to new_rule_path, class: "flex items-center justify-center px-4 py-3 bg-blue-600 text-white rounded-md hover:bg-blue-700 transition-colors" do %>
|
||||
<svg class="w-5 h-5 mr-2" fill="currentColor" viewBox="0 0 24 24">
|
||||
<path d="M19 13h-6v6h-2v-6H5v-2h6V5h2v6h6v2z"/>
|
||||
@@ -393,17 +399,24 @@
|
||||
<svg class="w-5 h-5 mr-2" fill="currentColor" viewBox="0 0 24 24">
|
||||
<path d="M19 13h-6v6h-2v-6H5v-2h6V5h2v6h6v2z"/>
|
||||
</svg>
|
||||
Add Network Range
|
||||
Add Network
|
||||
<% end %>
|
||||
|
||||
<%= link_to events_path, class: "flex items-center justify-center px-4 py-3 bg-purple-600 text-white rounded-md hover:bg-purple-700 transition-colors" do %>
|
||||
<%= link_to analytics_networks_path, class: "flex items-center justify-center px-4 py-3 bg-purple-600 text-white rounded-md hover:bg-purple-700 transition-colors" do %>
|
||||
<svg class="w-5 h-5 mr-2" fill="currentColor" viewBox="0 0 24 24">
|
||||
<path d="M12 2l3.09 6.26L22 9.27l-5 4.87 1.18 6.88L12 17.77l-6.18 3.25L7 14.14 2 9.27l6.91-1.01L12 2z"/>
|
||||
</svg>
|
||||
Network Analytics
|
||||
<% end %>
|
||||
|
||||
<%= link_to events_path, class: "flex items-center justify-center px-4 py-3 bg-orange-600 text-white rounded-md hover:bg-orange-700 transition-colors" do %>
|
||||
<svg class="w-5 h-5 mr-2" fill="currentColor" viewBox="0 0 24 24">
|
||||
<path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm1 15h-2v-2h2v2zm0-4h-2V7h2v6z"/>
|
||||
</svg>
|
||||
View Events
|
||||
<% end %>
|
||||
|
||||
<%= link_to rules_path, class: "flex items-center justify-center px-4 py-3 bg-orange-600 text-white rounded-md hover:bg-orange-700 transition-colors" do %>
|
||||
<%= link_to rules_path, class: "flex items-center justify-center px-4 py-3 bg-gray-600 text-white rounded-md hover:bg-gray-700 transition-colors" do %>
|
||||
<svg class="w-5 h-5 mr-2" fill="currentColor" viewBox="0 0 24 24">
|
||||
<path d="M12 1L3 5v6c0 5.55 3.84 10.74 9 12 5.16-1.26 9-6.45 9-12V5l-9-4z"/>
|
||||
</svg>
|
||||
|
||||
Reference in New Issue
Block a user