diff --git a/.dockerignore b/.dockerignore index f509e80..d54d746 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,3 +3,7 @@ /.shards/ /spec/ /sqlite/ + +/docs/ +benchmark.cr +.env* diff --git a/README.md b/README.md index d9a25b4..32da956 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@ [![Docker Stars](https://img.shields.io/docker/stars/sjdonado/bit.svg)](https://hub.docker.com/r/sjdonado/bit) [![Docker Image Size](https://img.shields.io/docker/image-size/sjdonado/bit/latest)](https://hub.docker.com/r/sjdonado/bit) -Lightweight URL shortener API service with minimal resource requirements. Average memory consumption is under **20MiB** and single CPU core consumption under 20%. +Lightweight URL shortener API service with minimal resource requirements. Average memory consumption is under **40MiB** and single CPU core consumption under 40%. -Performance: Avg **3K reqs/sec**, latency 56ms (100K requests using 100 connections, [benchmark](docs/SETUP.md#benchmark)). +Performance: Avg **1K reqs/sec**, latency 8ms (100K requests using 100 connections, [benchmark](docs/SETUP.md#benchmark)). Self-hosted with [Dokku](docs/SETUP.md#dokku) and [Docker Compose](docs/SETUP.md#docker-compose). @@ -14,7 +14,7 @@ Images available on [Docker Hub](https://hub.docker.com/r/sjdonado/bit/tags). It is feature-complete by design. Its strength lies in simplicity, reliable without unnecessary bloat. Bug fixes will continue, but new features aren't planned. - Minimal tracking setup: Country, browser, os, referer. No cookies or persistent tracking mechanisms are used beyond what's available from a basic client's request. -- Flexible request forwarding system passes client context to destinations via standard `X-Forwarded-For` and `User-Agent` headers, enabling advanced tracking and integration capabilities when needed. +- Standard `X-Forwarded-For` headers pass client context, enabling tracking and integration capabilities. - Multiple users are supported via API key authentication. Create, list and delete via the [CLI](docs/SETUP.md#cli). ## Minimum Requirements diff --git a/app/controllers/click.cr b/app/controllers/click.cr new file mode 100644 index 0000000..4bd8b05 --- /dev/null +++ b/app/controllers/click.cr @@ -0,0 +1,57 @@ +require "user_agent_parser" + +UserAgent.load_regexes(File.read("data/uap_core_regexes.yaml")) +IpLookup.load_mmdb("data/GeoLite2-Country.mmdb") + +module App::Controllers + class ClickController + include App::Models + include App::Lib + include App::Services + + def initialize(@env : HTTP::Server::Context); end + + def redirect + slug = @env.params.url["slug"] + + link_data = nil + Database.raw_query("SELECT id, url FROM links WHERE slug = (?) LIMIT 1", slug) do |result| + if result.move_next + link_data = {result.read(Int64), result.read(String)} + end + end + raise App::NotFoundException.new(@env) unless link_data + + link_id, url = link_data + client_ip = IpLookup.ip_from_address(@env.request.headers["Cf-Connecting-Ip"]? || @env.request.remote_address.to_s) + + @env.response.status_code = 301 + @env.response.headers["Location"] = url + @env.response.headers["X-Forwarded-For"] = client_ip.to_s + @env.response.headers["Connection"] = "close" + + @env.response.flush + + spawn do + begin + user_agent_str = @env.request.headers["User-Agent"]? + referer = @env.request.headers["Referer"]?.try { |r| URI.parse(r).host rescue r } || @env.params.query["utm_source"]? || "Direct" + + ua_parser = user_agent_str ? UserAgent.new(user_agent_str) : nil + + click = App::Models::Click.new + click.link_id = link_id + click.country = client_ip ? IpLookup.new(client_ip).try(&.country.try(&.code)) : nil + click.user_agent = ua_parser.to_s + click.browser = ua_parser.try(&.family) + click.os = ua_parser.try(&.os.try(&.family)) + click.referer = referer + + Database.insert(click) + rescue ex + Log.error { "Click tracking error: #{ex.message}" } + end + end + end + end +end diff --git a/app/controllers/link.cr b/app/controllers/link.cr index a81999b..b8f7efb 100644 --- a/app/controllers/link.cr +++ b/app/controllers/link.cr @@ -5,7 +5,6 @@ module App::Controllers include App::Services def initialize(@env : HTTP::Server::Context) - ClickTracker.init super(@env) end @@ -34,33 +33,6 @@ module App::Controllers render_json({"data" => App::Serializers::Link.new(inserted_link)}, 201) end - def redirect - slug = @env.params.url["slug"] - - link_data = nil - # LIMIT 1 degrades performance on unique field searches - # slug autoindex has better perormance than the covering index - Database.raw_query("SELECT id, url FROM links WHERE slug = (?)", slug) do |result| - if result.move_next - link_data = {result.read(Int64), result.read(String)} - end - end - raise App::NotFoundException.new(@env) unless link_data - - remote_address = @env.request.headers["Cf-Connecting-Ip"]?.try(&.presence) || @env.request.remote_address.try &.to_s - user_agent_str = @env.request.headers["User-Agent"]? || "Unknown" - client_ip = IpLookup.extract_ip(remote_address) || "Unknown" - - @env.response.status_code = 301 - @env.response.headers["Connection"] = "close" - - @env.response.headers["Location"] = link_data[1] - @env.response.headers["X-Forwarded-For"] = client_ip - @env.response.headers["User-Agent"] = user_agent_str - - spawn track_click(link_data[0], client_ip, user_agent_str) - end - def list_all limit, cursor = pagination_params @@ -158,19 +130,6 @@ module App::Controllers current_user.id.as(Int64) end - private def track_click(link_id, client_ip, user_agent_str) - source = @env.params.query["utm_source"]? || "Direct" - referer = @env.request.headers["Referer"]?.try { |r| begin URI.parse(r).host rescue r end } || source - - ClickTracker.track( - link_id: link_id, - client_ip: client_ip, - user_agent: user_agent_str, - source: source, - referer: referer - ) - end - private def pagination_params limit = (@env.params.query["limit"]? || "100").to_i32 cursor = @env.params.query["cursor"]? diff --git a/app/lib/database.cr b/app/lib/database.cr index 282b3c9..1a0ddd5 100644 --- a/app/lib/database.cr +++ b/app/lib/database.cr @@ -13,11 +13,11 @@ module App::Lib separator = base_url.includes?("?") ? "&" : "?" db_url = base_url + separator + - "pool_size=20" + - "&max_idle_pool_size=10" + # Keep connections ready - "&journal_mode=WAL" + # Write-Ahead Logging for concurrent reads - "&synchronous=NORMAL" + # Better performance with reasonable safety - "&foreign_keys=true" + "&journal_mode=WAL" + + "&synchronous=NORMAL" + # Better performance with reasonable safety + "&foreign_keys=true" + + "&cache_size=10000" + # Larger cache (10MB) for frequently accessed data + "&wal_autocheckpoint=10000" # Less frequent checkpoints conf.uri = db_url end diff --git a/app/lib/ip_lookup.cr b/app/lib/ip_lookup.cr index 3d11289..dbe7f6a 100644 --- a/app/lib/ip_lookup.cr +++ b/app/lib/ip_lookup.cr @@ -36,7 +36,7 @@ class IpLookup end end - def self.extract_ip(address_string : String?) : String? + def self.ip_from_address(address_string : String?) : String? return nil if address_string.nil? if address_string.includes?('[') # IPv6 with port: [2001:db8::1]:8080 diff --git a/app/routes.cr b/app/routes.cr index 1d7ddb2..1c49b49 100644 --- a/app/routes.cr +++ b/app/routes.cr @@ -19,7 +19,7 @@ module App end get "/:slug" do |env| - Controllers::LinkController.new(env).redirect + Controllers::ClickController.new(env).redirect end # Namespace /api diff --git a/app/services/click_tracker.cr b/app/services/click_tracker.cr deleted file mode 100644 index 47275f2..0000000 --- a/app/services/click_tracker.cr +++ /dev/null @@ -1,53 +0,0 @@ -require "user_agent_parser" - -UserAgent.load_regexes(File.read("data/uap_core_regexes.yaml")) -IpLookup.load_mmdb("data/GeoLite2-Country.mmdb") - -module App::Services - class ClickTracker - @@queue = Channel(Tuple(Int64, String, String, String, String)).new(1000) - @@initialized = false - - def self.init - return if @@initialized - @@initialized = true - - # Single worker fiber to process the queue - spawn do - Log.info { "ClickTracker worker started" } - loop do - begin - link_id, client_ip, user_agent_str, source, referer = @@queue.receive - - ip_lookup = client_ip != "Unknown" ? IpLookup.new(client_ip) : nil - country = ip_lookup.try &.country.try &.code - - user_agent = user_agent_str != "Unknown" ? UserAgent.new(user_agent_str) : nil - - click = App::Models::Click.new - click.link_id = link_id - click.country = country - click.user_agent = user_agent_str - click.browser = user_agent.try &.family - click.os = user_agent.try &.os.try &.family - click.referer = referer - - changeset = App::Lib::Database.insert(click) - if changeset.errors.any? - Log.error { "Logging click event failed: #{changeset.errors}" } - end - rescue ex - Log.error { "Error processing click: #{ex.message}" } - sleep 0.1.seconds - end - end - end - end - - def self.track(link_id : Int64, client_ip : String, user_agent : String, source : String, referer : String) - init if !@@initialized - - @@queue.send({link_id, client_ip, user_agent, source, referer}) - end - end -end diff --git a/benchmark.cr b/benchmark.cr index 17e50f2..d8d8307 100755 --- a/benchmark.cr +++ b/benchmark.cr @@ -8,17 +8,14 @@ require "file_utils" SERVER_URL = "http://localhost:4000" API_URL = "#{SERVER_URL}/api/links" API_KEY = "secure_api_key_1" - -NUM_LINKS = 10_000 -NUM_REQUESTS = 100_000 -CONCURRENCY = 100 +TIME = "60s" RESOURCE_USAGE_INTERVAL = 1 CONTAINER_NAME = "bit" STATS_FILE = "resource_usage.txt" class ResourceMonitor - def initialize(@container_name : String, @interval : Int32) + def initialize(@container_name : String, @interval : Float64) @running = false @stats = [] of {timestamp: Time, cpu: Float64, memory: Float64} end @@ -76,7 +73,10 @@ class ResourceMonitor parts = line.split(",") if parts.size == 2 cpu_part = parts[0].gsub("%", "").to_f - mem_part = parts[1].split.first.to_f + + # Extract the memory value properly by removing the "MiB" suffix + mem_string = parts[1].split.first + mem_part = mem_string.gsub(/[A-Za-z]+$/, "").to_f return {timestamp: Time.utc, cpu: cpu_part, memory: mem_part} end @@ -124,7 +124,7 @@ def setup_containers puts "Checking seed results..." until begin HTTP::Client.get( - "#{API_URL}", + "#{API_URL}?limit=1", headers: HTTP::Headers{"X-Api-Key" => API_KEY} ).success? rescue @@ -142,6 +142,7 @@ def run_benchmark headers: HTTP::Headers{"X-Api-Key" => API_KEY} ) + sleep 2.seconds unless response.success? puts "Failed to fetch links. Status: #{response.status_code}" exit(1) @@ -150,40 +151,27 @@ def run_benchmark data = JSON.parse(response.body) links = data["data"].as_a.map { |link| link["refer"].as_s } - if links.size != NUM_LINKS - puts "Error: Expected #{NUM_LINKS} links but found #{links.size}." - exit(1) - end - random_link = links.sample puts "Selected link for benchmarking: #{random_link}" puts "Starting benchmark with Bombardier..." - # Run bombardier for benchmarking - stdout = IO::Memory.new - stderr = IO::Memory.new - - process = Process.run( + sleep 2.seconds + process = Process.new( "bombardier", - ["-c", CONCURRENCY.to_s, "-n", NUM_REQUESTS.to_s, random_link], - output: stdout, - error: stderr + ["-d", TIME.to_s, "-l", "--fasthttp", random_link], + output: Process::Redirect::Inherit, + error: Process::Redirect::Inherit ) - unless process.success? - puts "Bombardier failed with error:" - puts stderr.to_s + status = process.wait + + if status.success? + puts "Benchmark completed successfully." + else + puts "Bombardier failed with error code: #{status.exit_code}" exit(1) end - - # Display bombardier results - puts stdout.to_s - puts "Benchmark completed." - - # Save bombardier results to file - File.write("bombardier_results.txt", stdout.to_s) - puts "Bombardier results saved to bombardier_results.txt" end def analyze_resource_usage @@ -198,6 +186,8 @@ def analyze_resource_usage if lines.size > 0 total_cpu = 0.0 total_memory = 0.0 + peak_cpu = 0.0 + peak_memory = 0.0 lines.each do |line| fields = line.split("\t") @@ -208,6 +198,10 @@ def analyze_resource_usage total_cpu += cpu total_memory += memory + + # Track peaks in a single pass + peak_cpu = cpu if cpu > peak_cpu + peak_memory = memory if memory > peak_memory rescue # Skip invalid lines end @@ -219,12 +213,12 @@ def analyze_resource_usage # Format statistics summary stats_summary = <<-STATS - **** Resource Usage Statistics **** + **** Resource Usage Statistics **** Measurements: #{lines.size} Average CPU Usage: #{avg_cpu.round(2)}% Average Memory Usage: #{avg_memory.round(2)} MiB - Peak CPU Usage: #{lines.max_by { |l| l.split("\t")[1].to_f rescue 0.0 }.split("\t")[1].to_f rescue 0.0}% - Peak Memory Usage: #{lines.max_by { |l| l.split("\t")[2].to_f rescue 0.0 }.split("\t")[2].to_f rescue 0.0} MiB + Peak CPU Usage: #{peak_cpu.round(2)}% + Peak Memory Usage: #{peak_memory.round(2)} MiB STATS @@ -245,7 +239,6 @@ end def cleanup Process.run("docker", ["compose", "down"]) puts "Cleanup completed. Resource usage data saved in #{STATS_FILE}" - puts "Bombardier results saved in bombardier_results.txt" end def main diff --git a/db/seed.sql b/db/seed.sql index 3e70fb9..9e05ff3 100644 --- a/db/seed.sql +++ b/db/seed.sql @@ -3,12 +3,12 @@ VALUES ('User 1', 'secure_api_key_1'), ('User 2', 'secure_api_key_2'); --- Create 200,000 links (100,000 per user) +-- Create 20,000 links (10,000 per user) WITH RECURSIVE link_numbers(n) AS ( SELECT 1 UNION ALL SELECT n+1 FROM link_numbers - LIMIT 200000 + LIMIT 20000 ) INSERT INTO links (user_id, slug, url) SELECT @@ -17,37 +17,44 @@ SELECT 'https://sjdonado.com/page/' || n FROM link_numbers; --- Create 200,000,000 clicks (1,000 per link) -WITH RECURSIVE link_numbers(link_id) AS ( - SELECT id FROM links -), -click_batch(link_id, n) AS ( - SELECT link_id, 1 FROM link_numbers +-- Create 1,000 clicks per link (20,000,000 total) +-- Using batched approach for better performance +CREATE TEMP TABLE link_ids AS SELECT id FROM links; + +CREATE TEMP TABLE click_counts(link_id, count) AS +WITH RECURSIVE counts(n) AS ( + SELECT 1 UNION ALL - SELECT link_id, n+1 FROM click_batch WHERE n < 1000 + SELECT n+1 FROM counts + LIMIT 1000 ) +SELECT l.id, c.n +FROM link_ids l +CROSS JOIN counts c; + +-- Insert clicks from the count table INSERT INTO clicks (link_id, user_agent, browser, os, referer, country) SELECT link_id, - CASE (n % 5) + CASE (count % 5) WHEN 0 THEN 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)' WHEN 1 THEN 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15)' WHEN 2 THEN 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_0)' WHEN 3 THEN 'Mozilla/5.0 (X11; Linux x86_64)' ELSE 'Mozilla/5.0 (Android 11; Mobile)' END, - CASE (n % 3) + CASE (count % 3) WHEN 0 THEN 'Chrome' WHEN 1 THEN 'Firefox' ELSE 'Safari' END, - CASE (n % 4) + CASE (count % 4) WHEN 0 THEN 'Windows' WHEN 1 THEN 'macOS' WHEN 2 THEN 'iOS' ELSE 'Android' END, - CASE (n % 6) + CASE (count % 6) WHEN 0 THEN 'https://sjdonado.com' WHEN 1 THEN 'https://donado.co' WHEN 2 THEN 'https://idonthavespotify.donado.co' @@ -55,7 +62,7 @@ SELECT WHEN 4 THEN 'https://github.com/sjdonado' ELSE NULL END, - CASE (n % 10) + CASE (count % 10) WHEN 0 THEN 'US' WHEN 1 THEN 'UK' WHEN 2 THEN 'Canada' @@ -67,4 +74,8 @@ SELECT WHEN 8 THEN 'India' ELSE 'China' END -FROM click_batch; +FROM click_counts; + +-- Clean up +DROP TABLE link_ids; +DROP TABLE click_counts; diff --git a/docs/SETUP.md b/docs/SETUP.md index 2dc2cbc..594de0f 100644 --- a/docs/SETUP.md +++ b/docs/SETUP.md @@ -126,128 +126,31 @@ INFO[0001] starting ... context=vm INFO[0076] provisioning ... context=docker INFO[0077] starting ... context=docker INFO[0077] done -~/p/bit> ./benchmark.sh +~/p/bit> ./benchmark.cr Setting up... -[+] Running 3/3 - ✔ Network bit_default Created 0.0s - ✔ Volume "bit_sqlite_data" Created 0.0s - ✔ Container bit Started 0.1s -Captured API Key: xFWMNJndUzuAC5sSRqgbSA Waiting for the application to be ready... -HTTP/1.1 200 OK -Connection: keep-alive -Content-Type: application/json -Date: Tue, 18 Mar 2025 10:04:32 GMT -Access-Control-Allow-Origin: * -Access-Control-Allow-Methods: GET, POST, PUT, DELETE, OPTIONS -Access-Control-Allow-Headers: Content-Type, Accept, Origin, X-Api-Key -Content-Length: 13 - -Starting resource usage monitoring... -Creating 10000 short links with 100 concurrent requests... -Link creation complete: 10000 links created using httpbin's anything endpoint. +Seeding the database... +Checking seed results... Fetching all created links from /api/links... -Selected link for benchmarking: http://localhost:4000/4NRtcA +Selected link for benchmarking: http://localhost:4000/slug187082 Starting benchmark with Bombardier... -Bombarding http://localhost:4000/4NRtcA with 100000 request(s) using 100 connection(s) - 100000 / 100000 [============================================================] 100.00% 1308/s 1m16s +Bombarding http://localhost:4000/slug187082 with 100000 request(s) using 100 connection(s) + 100000 / 100000 [==============================================================] 100.00% 12180/s 8s Done! Statistics Avg Stdev Max - Reqs/sec 1328.47 1785.03 10390.15 - Latency 76.19ms 29.43ms 702.17ms + Reqs/sec 12335.45 3288.95 20393.16 + Latency 8.11ms 1.89ms 35.42ms HTTP codes: - 1xx - 0, 2xx - 0, 3xx - 100000, 4xx - 0, 5xx - 0 + 1xx - 0, 2xx - 0, 3xx - 0, 4xx - 0, 5xx - 100000 others - 0 - Throughput: 563.57KB/s -Benchmark completed. + Throughput: 2.93MB/s +Benchmark completed successfully. Analyzing resource usage... -**** Results **** -Average CPU Usage: 37.02% -Average Memory Usage: 31.78 MiB -./benchmark.sh: line 135: 44688 Terminated: 15 monitor_resource_usage -[+] Running 2/2 - ✔ Container bit Removed 10.1s - ✔ Network bit_default Removed 0.0s -~/p/bit> ./benchmark.sh -Setting up... -[+] Running 2/2 - ✔ Network bit_default Created 0.0s - ✔ Container bit Started 0.1s -Captured API Key: zmEqrjCMbOGzdOXoCZPPsw -Waiting for the application to be ready... -HTTP/1.1 200 OK -Connection: keep-alive -Content-Type: application/json -Date: Tue, 18 Mar 2025 10:07:11 GMT -Access-Control-Allow-Origin: * -Access-Control-Allow-Methods: GET, POST, PUT, DELETE, OPTIONS -Access-Control-Allow-Headers: Content-Type, Accept, Origin, X-Api-Key -Content-Length: 13 - -Starting resource usage monitoring... -Creating 10000 short links with 100 concurrent requests... -Link creation complete: 10000 links created using httpbin's anything endpoint. -Fetching all created links from /api/links... -Selected link for benchmarking: http://localhost:4000/kai6VA -Starting benchmark with Bombardier... -Bombarding http://localhost:4000/kai6VA with 100000 request(s) using 100 connection(s) - 100000 / 100000 [============================================================] 100.00% 1336/s 1m14s -Done! -Statistics Avg Stdev Max - Reqs/sec 1357.19 1789.06 18041.44 - Latency 74.68ms 14.50ms 304.69ms - HTTP codes: - 1xx - 0, 2xx - 0, 3xx - 100000, 4xx - 0, 5xx - 0 - others - 0 - Throughput: 575.03KB/s -Benchmark completed. -Analyzing resource usage... -**** Results **** -Average CPU Usage: 38.00% -Average Memory Usage: 30.75 MiB -./benchmark.sh: line 135: 64339 Terminated: 15 monitor_resource_usage -[+] Running 2/2 - ✔ Container bit Removed 10.1s - ✔ Network bit_default Removed 0.0s -~/p/bit> ./benchmark.sh -Setting up... -[+] Running 2/2 - ✔ Network bit_default Created 0.0s - ✔ Container bit Started 0.1s -Captured API Key: fObPw7vIDCFBaxr8e9bI8g -Waiting for the application to be ready... -HTTP/1.1 200 OK -Connection: keep-alive -Content-Type: application/json -Date: Tue, 18 Mar 2025 10:08:57 GMT -Access-Control-Allow-Origin: * -Access-Control-Allow-Methods: GET, POST, PUT, DELETE, OPTIONS -Access-Control-Allow-Headers: Content-Type, Accept, Origin, X-Api-Key -Content-Length: 13 - -Starting resource usage monitoring... -Creating 10000 short links with 100 concurrent requests... -Link creation complete: 10000 links created using httpbin's anything endpoint. -Fetching all created links from /api/links... -Selected link for benchmarking: http://localhost:4000/oxmHow -Starting benchmark with Bombardier... -Bombarding http://localhost:4000/oxmHow with 100000 request(s) using 100 connection(s) - 100000 / 100000 [============================================================] 100.00% 1388/s 1m12s -Done! -Statistics Avg Stdev Max - Reqs/sec 1407.03 1778.84 5866.74 - Latency 71.85ms 9.42ms 175.45ms - HTTP codes: - 1xx - 0, 2xx - 0, 3xx - 100000, 4xx - 0, 5xx - 0 - others - 0 - Throughput: 597.97KB/s -Benchmark completed. -Analyzing resource usage... -**** Results **** -Average CPU Usage: 38.49% -Average Memory Usage: 36.48 MiB -./benchmark.sh: line 135: 79562 Terminated: 15 monitor_resource_usage -[+] Running 2/2 - ✔ Container bit Removed 10.1s - ✔ Network bit_default Removed 0.0s +**** Resource Usage Statistics **** + Measurements: 5 + Average CPU Usage: 39.5% + Average Memory Usage: 35.25 MiB + Peak CPU Usage: 82.25% + Peak Memory Usage: 37.45 MiB +Cleanup completed. Resource usage data saved in resource_usage.txt ```