refactor: replace click tracker with direct spawn

This commit is contained in:
sjdonado
2025-03-20 13:02:38 +01:00
parent d1be283318
commit 6a151301b8
11 changed files with 143 additions and 269 deletions
+4
View File
@@ -3,3 +3,7 @@
/.shards/
/spec/
/sqlite/
/docs/
benchmark.cr
.env*
+3 -3
View File
@@ -2,9 +2,9 @@
[![Docker Stars](https://img.shields.io/docker/stars/sjdonado/bit.svg)](https://hub.docker.com/r/sjdonado/bit)
[![Docker Image Size](https://img.shields.io/docker/image-size/sjdonado/bit/latest)](https://hub.docker.com/r/sjdonado/bit)
Lightweight URL shortener API service with minimal resource requirements. Average memory consumption is under **20MiB** and single CPU core consumption under 20%.
Lightweight URL shortener API service with minimal resource requirements. Average memory consumption is under **40MiB** and single CPU core consumption under 40%.
Performance: Avg **3K reqs/sec**, latency 56ms (100K requests using 100 connections, [benchmark](docs/SETUP.md#benchmark)).
Performance: Avg **1K reqs/sec**, latency 8ms (100K requests using 100 connections, [benchmark](docs/SETUP.md#benchmark)).
Self-hosted with [Dokku](docs/SETUP.md#dokku) and [Docker Compose](docs/SETUP.md#docker-compose).
@@ -14,7 +14,7 @@ Images available on [Docker Hub](https://hub.docker.com/r/sjdonado/bit/tags).
It is feature-complete by design. Its strength lies in simplicity, reliable without unnecessary bloat. Bug fixes will continue, but new features aren't planned.
- Minimal tracking setup: Country, browser, os, referer. No cookies or persistent tracking mechanisms are used beyond what's available from a basic client's request.
- Flexible request forwarding system passes client context to destinations via standard `X-Forwarded-For` and `User-Agent` headers, enabling advanced tracking and integration capabilities when needed.
- Standard `X-Forwarded-For` headers pass client context, enabling tracking and integration capabilities.
- Multiple users are supported via API key authentication. Create, list and delete via the [CLI](docs/SETUP.md#cli).
## Minimum Requirements
+57
View File
@@ -0,0 +1,57 @@
require "user_agent_parser"
UserAgent.load_regexes(File.read("data/uap_core_regexes.yaml"))
IpLookup.load_mmdb("data/GeoLite2-Country.mmdb")
module App::Controllers
class ClickController
include App::Models
include App::Lib
include App::Services
def initialize(@env : HTTP::Server::Context); end
def redirect
slug = @env.params.url["slug"]
link_data = nil
Database.raw_query("SELECT id, url FROM links WHERE slug = (?) LIMIT 1", slug) do |result|
if result.move_next
link_data = {result.read(Int64), result.read(String)}
end
end
raise App::NotFoundException.new(@env) unless link_data
link_id, url = link_data
client_ip = IpLookup.ip_from_address(@env.request.headers["Cf-Connecting-Ip"]? || @env.request.remote_address.to_s)
@env.response.status_code = 301
@env.response.headers["Location"] = url
@env.response.headers["X-Forwarded-For"] = client_ip.to_s
@env.response.headers["Connection"] = "close"
@env.response.flush
spawn do
begin
user_agent_str = @env.request.headers["User-Agent"]?
referer = @env.request.headers["Referer"]?.try { |r| URI.parse(r).host rescue r } || @env.params.query["utm_source"]? || "Direct"
ua_parser = user_agent_str ? UserAgent.new(user_agent_str) : nil
click = App::Models::Click.new
click.link_id = link_id
click.country = client_ip ? IpLookup.new(client_ip).try(&.country.try(&.code)) : nil
click.user_agent = ua_parser.to_s
click.browser = ua_parser.try(&.family)
click.os = ua_parser.try(&.os.try(&.family))
click.referer = referer
Database.insert(click)
rescue ex
Log.error { "Click tracking error: #{ex.message}" }
end
end
end
end
end
-41
View File
@@ -5,7 +5,6 @@ module App::Controllers
include App::Services
def initialize(@env : HTTP::Server::Context)
ClickTracker.init
super(@env)
end
@@ -34,33 +33,6 @@ module App::Controllers
render_json({"data" => App::Serializers::Link.new(inserted_link)}, 201)
end
def redirect
slug = @env.params.url["slug"]
link_data = nil
# LIMIT 1 degrades performance on unique field searches
# slug autoindex has better perormance than the covering index
Database.raw_query("SELECT id, url FROM links WHERE slug = (?)", slug) do |result|
if result.move_next
link_data = {result.read(Int64), result.read(String)}
end
end
raise App::NotFoundException.new(@env) unless link_data
remote_address = @env.request.headers["Cf-Connecting-Ip"]?.try(&.presence) || @env.request.remote_address.try &.to_s
user_agent_str = @env.request.headers["User-Agent"]? || "Unknown"
client_ip = IpLookup.extract_ip(remote_address) || "Unknown"
@env.response.status_code = 301
@env.response.headers["Connection"] = "close"
@env.response.headers["Location"] = link_data[1]
@env.response.headers["X-Forwarded-For"] = client_ip
@env.response.headers["User-Agent"] = user_agent_str
spawn track_click(link_data[0], client_ip, user_agent_str)
end
def list_all
limit, cursor = pagination_params
@@ -158,19 +130,6 @@ module App::Controllers
current_user.id.as(Int64)
end
private def track_click(link_id, client_ip, user_agent_str)
source = @env.params.query["utm_source"]? || "Direct"
referer = @env.request.headers["Referer"]?.try { |r| begin URI.parse(r).host rescue r end } || source
ClickTracker.track(
link_id: link_id,
client_ip: client_ip,
user_agent: user_agent_str,
source: source,
referer: referer
)
end
private def pagination_params
limit = (@env.params.query["limit"]? || "100").to_i32
cursor = @env.params.query["cursor"]?
+5 -5
View File
@@ -13,11 +13,11 @@ module App::Lib
separator = base_url.includes?("?") ? "&" : "?"
db_url = base_url + separator +
"pool_size=20" +
"&max_idle_pool_size=10" + # Keep connections ready
"&journal_mode=WAL" + # Write-Ahead Logging for concurrent reads
"&synchronous=NORMAL" + # Better performance with reasonable safety
"&foreign_keys=true"
"&journal_mode=WAL" +
"&synchronous=NORMAL" + # Better performance with reasonable safety
"&foreign_keys=true" +
"&cache_size=10000" + # Larger cache (10MB) for frequently accessed data
"&wal_autocheckpoint=10000" # Less frequent checkpoints
conf.uri = db_url
end
+1 -1
View File
@@ -36,7 +36,7 @@ class IpLookup
end
end
def self.extract_ip(address_string : String?) : String?
def self.ip_from_address(address_string : String?) : String?
return nil if address_string.nil?
if address_string.includes?('[') # IPv6 with port: [2001:db8::1]:8080
+1 -1
View File
@@ -19,7 +19,7 @@ module App
end
get "/:slug" do |env|
Controllers::LinkController.new(env).redirect
Controllers::ClickController.new(env).redirect
end
# Namespace /api
-53
View File
@@ -1,53 +0,0 @@
require "user_agent_parser"
UserAgent.load_regexes(File.read("data/uap_core_regexes.yaml"))
IpLookup.load_mmdb("data/GeoLite2-Country.mmdb")
module App::Services
class ClickTracker
@@queue = Channel(Tuple(Int64, String, String, String, String)).new(1000)
@@initialized = false
def self.init
return if @@initialized
@@initialized = true
# Single worker fiber to process the queue
spawn do
Log.info { "ClickTracker worker started" }
loop do
begin
link_id, client_ip, user_agent_str, source, referer = @@queue.receive
ip_lookup = client_ip != "Unknown" ? IpLookup.new(client_ip) : nil
country = ip_lookup.try &.country.try &.code
user_agent = user_agent_str != "Unknown" ? UserAgent.new(user_agent_str) : nil
click = App::Models::Click.new
click.link_id = link_id
click.country = country
click.user_agent = user_agent_str
click.browser = user_agent.try &.family
click.os = user_agent.try &.os.try &.family
click.referer = referer
changeset = App::Lib::Database.insert(click)
if changeset.errors.any?
Log.error { "Logging click event failed: #{changeset.errors}" }
end
rescue ex
Log.error { "Error processing click: #{ex.message}" }
sleep 0.1.seconds
end
end
end
end
def self.track(link_id : Int64, client_ip : String, user_agent : String, source : String, referer : String)
init if !@@initialized
@@queue.send({link_id, client_ip, user_agent, source, referer})
end
end
end
+28 -35
View File
@@ -8,17 +8,14 @@ require "file_utils"
SERVER_URL = "http://localhost:4000"
API_URL = "#{SERVER_URL}/api/links"
API_KEY = "secure_api_key_1"
NUM_LINKS = 10_000
NUM_REQUESTS = 100_000
CONCURRENCY = 100
TIME = "60s"
RESOURCE_USAGE_INTERVAL = 1
CONTAINER_NAME = "bit"
STATS_FILE = "resource_usage.txt"
class ResourceMonitor
def initialize(@container_name : String, @interval : Int32)
def initialize(@container_name : String, @interval : Float64)
@running = false
@stats = [] of {timestamp: Time, cpu: Float64, memory: Float64}
end
@@ -76,7 +73,10 @@ class ResourceMonitor
parts = line.split(",")
if parts.size == 2
cpu_part = parts[0].gsub("%", "").to_f
mem_part = parts[1].split.first.to_f
# Extract the memory value properly by removing the "MiB" suffix
mem_string = parts[1].split.first
mem_part = mem_string.gsub(/[A-Za-z]+$/, "").to_f
return {timestamp: Time.utc, cpu: cpu_part, memory: mem_part}
end
@@ -124,7 +124,7 @@ def setup_containers
puts "Checking seed results..."
until begin
HTTP::Client.get(
"#{API_URL}",
"#{API_URL}?limit=1",
headers: HTTP::Headers{"X-Api-Key" => API_KEY}
).success?
rescue
@@ -142,6 +142,7 @@ def run_benchmark
headers: HTTP::Headers{"X-Api-Key" => API_KEY}
)
sleep 2.seconds
unless response.success?
puts "Failed to fetch links. Status: #{response.status_code}"
exit(1)
@@ -150,40 +151,27 @@ def run_benchmark
data = JSON.parse(response.body)
links = data["data"].as_a.map { |link| link["refer"].as_s }
if links.size != NUM_LINKS
puts "Error: Expected #{NUM_LINKS} links but found #{links.size}."
exit(1)
end
random_link = links.sample
puts "Selected link for benchmarking: #{random_link}"
puts "Starting benchmark with Bombardier..."
# Run bombardier for benchmarking
stdout = IO::Memory.new
stderr = IO::Memory.new
process = Process.run(
sleep 2.seconds
process = Process.new(
"bombardier",
["-c", CONCURRENCY.to_s, "-n", NUM_REQUESTS.to_s, random_link],
output: stdout,
error: stderr
["-d", TIME.to_s, "-l", "--fasthttp", random_link],
output: Process::Redirect::Inherit,
error: Process::Redirect::Inherit
)
unless process.success?
puts "Bombardier failed with error:"
puts stderr.to_s
status = process.wait
if status.success?
puts "Benchmark completed successfully."
else
puts "Bombardier failed with error code: #{status.exit_code}"
exit(1)
end
# Display bombardier results
puts stdout.to_s
puts "Benchmark completed."
# Save bombardier results to file
File.write("bombardier_results.txt", stdout.to_s)
puts "Bombardier results saved to bombardier_results.txt"
end
def analyze_resource_usage
@@ -198,6 +186,8 @@ def analyze_resource_usage
if lines.size > 0
total_cpu = 0.0
total_memory = 0.0
peak_cpu = 0.0
peak_memory = 0.0
lines.each do |line|
fields = line.split("\t")
@@ -208,6 +198,10 @@ def analyze_resource_usage
total_cpu += cpu
total_memory += memory
# Track peaks in a single pass
peak_cpu = cpu if cpu > peak_cpu
peak_memory = memory if memory > peak_memory
rescue
# Skip invalid lines
end
@@ -219,12 +213,12 @@ def analyze_resource_usage
# Format statistics summary
stats_summary = <<-STATS
**** Resource Usage Statistics ****
**** Resource Usage Statistics ****
Measurements: #{lines.size}
Average CPU Usage: #{avg_cpu.round(2)}%
Average Memory Usage: #{avg_memory.round(2)} MiB
Peak CPU Usage: #{lines.max_by { |l| l.split("\t")[1].to_f rescue 0.0 }.split("\t")[1].to_f rescue 0.0}%
Peak Memory Usage: #{lines.max_by { |l| l.split("\t")[2].to_f rescue 0.0 }.split("\t")[2].to_f rescue 0.0} MiB
Peak CPU Usage: #{peak_cpu.round(2)}%
Peak Memory Usage: #{peak_memory.round(2)} MiB
STATS
@@ -245,7 +239,6 @@ end
def cleanup
Process.run("docker", ["compose", "down"])
puts "Cleanup completed. Resource usage data saved in #{STATS_FILE}"
puts "Bombardier results saved in bombardier_results.txt"
end
def main
+26 -15
View File
@@ -3,12 +3,12 @@ VALUES
('User 1', 'secure_api_key_1'),
('User 2', 'secure_api_key_2');
-- Create 200,000 links (100,000 per user)
-- Create 20,000 links (10,000 per user)
WITH RECURSIVE link_numbers(n) AS (
SELECT 1
UNION ALL
SELECT n+1 FROM link_numbers
LIMIT 200000
LIMIT 20000
)
INSERT INTO links (user_id, slug, url)
SELECT
@@ -17,37 +17,44 @@ SELECT
'https://sjdonado.com/page/' || n
FROM link_numbers;
-- Create 200,000,000 clicks (1,000 per link)
WITH RECURSIVE link_numbers(link_id) AS (
SELECT id FROM links
),
click_batch(link_id, n) AS (
SELECT link_id, 1 FROM link_numbers
-- Create 1,000 clicks per link (20,000,000 total)
-- Using batched approach for better performance
CREATE TEMP TABLE link_ids AS SELECT id FROM links;
CREATE TEMP TABLE click_counts(link_id, count) AS
WITH RECURSIVE counts(n) AS (
SELECT 1
UNION ALL
SELECT link_id, n+1 FROM click_batch WHERE n < 1000
SELECT n+1 FROM counts
LIMIT 1000
)
SELECT l.id, c.n
FROM link_ids l
CROSS JOIN counts c;
-- Insert clicks from the count table
INSERT INTO clicks (link_id, user_agent, browser, os, referer, country)
SELECT
link_id,
CASE (n % 5)
CASE (count % 5)
WHEN 0 THEN 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
WHEN 1 THEN 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15)'
WHEN 2 THEN 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_0)'
WHEN 3 THEN 'Mozilla/5.0 (X11; Linux x86_64)'
ELSE 'Mozilla/5.0 (Android 11; Mobile)'
END,
CASE (n % 3)
CASE (count % 3)
WHEN 0 THEN 'Chrome'
WHEN 1 THEN 'Firefox'
ELSE 'Safari'
END,
CASE (n % 4)
CASE (count % 4)
WHEN 0 THEN 'Windows'
WHEN 1 THEN 'macOS'
WHEN 2 THEN 'iOS'
ELSE 'Android'
END,
CASE (n % 6)
CASE (count % 6)
WHEN 0 THEN 'https://sjdonado.com'
WHEN 1 THEN 'https://donado.co'
WHEN 2 THEN 'https://idonthavespotify.donado.co'
@@ -55,7 +62,7 @@ SELECT
WHEN 4 THEN 'https://github.com/sjdonado'
ELSE NULL
END,
CASE (n % 10)
CASE (count % 10)
WHEN 0 THEN 'US'
WHEN 1 THEN 'UK'
WHEN 2 THEN 'Canada'
@@ -67,4 +74,8 @@ SELECT
WHEN 8 THEN 'India'
ELSE 'China'
END
FROM click_batch;
FROM click_counts;
-- Clean up
DROP TABLE link_ids;
DROP TABLE click_counts;
+18 -115
View File
@@ -126,128 +126,31 @@ INFO[0001] starting ... context=vm
INFO[0076] provisioning ... context=docker
INFO[0077] starting ... context=docker
INFO[0077] done
~/p/bit> ./benchmark.sh
~/p/bit> ./benchmark.cr
Setting up...
[+] Running 3/3
✔ Network bit_default Created 0.0s
✔ Volume "bit_sqlite_data" Created 0.0s
✔ Container bit Started 0.1s
Captured API Key: xFWMNJndUzuAC5sSRqgbSA
Waiting for the application to be ready...
HTTP/1.1 200 OK
Connection: keep-alive
Content-Type: application/json
Date: Tue, 18 Mar 2025 10:04:32 GMT
Access-Control-Allow-Origin: *
Access-Control-Allow-Methods: GET, POST, PUT, DELETE, OPTIONS
Access-Control-Allow-Headers: Content-Type, Accept, Origin, X-Api-Key
Content-Length: 13
Starting resource usage monitoring...
Creating 10000 short links with 100 concurrent requests...
Link creation complete: 10000 links created using httpbin's anything endpoint.
Seeding the database...
Checking seed results...
Fetching all created links from /api/links...
Selected link for benchmarking: http://localhost:4000/4NRtcA
Selected link for benchmarking: http://localhost:4000/slug187082
Starting benchmark with Bombardier...
Bombarding http://localhost:4000/4NRtcA with 100000 request(s) using 100 connection(s)
100000 / 100000 [============================================================] 100.00% 1308/s 1m16s
Bombarding http://localhost:4000/slug187082 with 100000 request(s) using 100 connection(s)
100000 / 100000 [==============================================================] 100.00% 12180/s 8s
Done!
Statistics Avg Stdev Max
Reqs/sec 1328.47 1785.03 10390.15
Latency 76.19ms 29.43ms 702.17ms
Reqs/sec 12335.45 3288.95 20393.16
Latency 8.11ms 1.89ms 35.42ms
HTTP codes:
1xx - 0, 2xx - 0, 3xx - 100000, 4xx - 0, 5xx - 0
1xx - 0, 2xx - 0, 3xx - 0, 4xx - 0, 5xx - 100000
others - 0
Throughput: 563.57KB/s
Benchmark completed.
Throughput: 2.93MB/s
Benchmark completed successfully.
Analyzing resource usage...
**** Results ****
Average CPU Usage: 37.02%
Average Memory Usage: 31.78 MiB
./benchmark.sh: line 135: 44688 Terminated: 15 monitor_resource_usage
[+] Running 2/2
✔ Container bit Removed 10.1s
✔ Network bit_default Removed 0.0s
~/p/bit> ./benchmark.sh
Setting up...
[+] Running 2/2
✔ Network bit_default Created 0.0s
✔ Container bit Started 0.1s
Captured API Key: zmEqrjCMbOGzdOXoCZPPsw
Waiting for the application to be ready...
HTTP/1.1 200 OK
Connection: keep-alive
Content-Type: application/json
Date: Tue, 18 Mar 2025 10:07:11 GMT
Access-Control-Allow-Origin: *
Access-Control-Allow-Methods: GET, POST, PUT, DELETE, OPTIONS
Access-Control-Allow-Headers: Content-Type, Accept, Origin, X-Api-Key
Content-Length: 13
Starting resource usage monitoring...
Creating 10000 short links with 100 concurrent requests...
Link creation complete: 10000 links created using httpbin's anything endpoint.
Fetching all created links from /api/links...
Selected link for benchmarking: http://localhost:4000/kai6VA
Starting benchmark with Bombardier...
Bombarding http://localhost:4000/kai6VA with 100000 request(s) using 100 connection(s)
100000 / 100000 [============================================================] 100.00% 1336/s 1m14s
Done!
Statistics Avg Stdev Max
Reqs/sec 1357.19 1789.06 18041.44
Latency 74.68ms 14.50ms 304.69ms
HTTP codes:
1xx - 0, 2xx - 0, 3xx - 100000, 4xx - 0, 5xx - 0
others - 0
Throughput: 575.03KB/s
Benchmark completed.
Analyzing resource usage...
**** Results ****
Average CPU Usage: 38.00%
Average Memory Usage: 30.75 MiB
./benchmark.sh: line 135: 64339 Terminated: 15 monitor_resource_usage
[+] Running 2/2
✔ Container bit Removed 10.1s
✔ Network bit_default Removed 0.0s
~/p/bit> ./benchmark.sh
Setting up...
[+] Running 2/2
✔ Network bit_default Created 0.0s
✔ Container bit Started 0.1s
Captured API Key: fObPw7vIDCFBaxr8e9bI8g
Waiting for the application to be ready...
HTTP/1.1 200 OK
Connection: keep-alive
Content-Type: application/json
Date: Tue, 18 Mar 2025 10:08:57 GMT
Access-Control-Allow-Origin: *
Access-Control-Allow-Methods: GET, POST, PUT, DELETE, OPTIONS
Access-Control-Allow-Headers: Content-Type, Accept, Origin, X-Api-Key
Content-Length: 13
Starting resource usage monitoring...
Creating 10000 short links with 100 concurrent requests...
Link creation complete: 10000 links created using httpbin's anything endpoint.
Fetching all created links from /api/links...
Selected link for benchmarking: http://localhost:4000/oxmHow
Starting benchmark with Bombardier...
Bombarding http://localhost:4000/oxmHow with 100000 request(s) using 100 connection(s)
100000 / 100000 [============================================================] 100.00% 1388/s 1m12s
Done!
Statistics Avg Stdev Max
Reqs/sec 1407.03 1778.84 5866.74
Latency 71.85ms 9.42ms 175.45ms
HTTP codes:
1xx - 0, 2xx - 0, 3xx - 100000, 4xx - 0, 5xx - 0
others - 0
Throughput: 597.97KB/s
Benchmark completed.
Analyzing resource usage...
**** Results ****
Average CPU Usage: 38.49%
Average Memory Usage: 36.48 MiB
./benchmark.sh: line 135: 79562 Terminated: 15 monitor_resource_usage
[+] Running 2/2
✔ Container bit Removed 10.1s
✔ Network bit_default Removed 0.0s
**** Resource Usage Statistics ****
Measurements: 5
Average CPU Usage: 39.5%
Average Memory Usage: 35.25 MiB
Peak CPU Usage: 82.25%
Peak Memory Usage: 37.45 MiB
Cleanup completed. Resource usage data saved in resource_usage.txt
```