From 660d53661814470b83ebcd9d6e30cdfc2b9716e5 Mon Sep 17 00:00:00 2001 From: sjdonado Date: Sun, 23 Mar 2025 12:27:52 +0100 Subject: [PATCH] fix: replace user_agent_parser with UserAgent + pre compiled regexes --- app/controllers/click.cr | 10 +--- app/lib/ua_parser.cr | 123 +++++++++++++++++++++++++++++++++++++++ shard.lock | 8 --- shard.yml | 2 - 4 files changed, 126 insertions(+), 17 deletions(-) create mode 100644 app/lib/ua_parser.cr diff --git a/app/controllers/click.cr b/app/controllers/click.cr index a71f413..db9e3e9 100644 --- a/app/controllers/click.cr +++ b/app/controllers/click.cr @@ -1,7 +1,3 @@ -require "user_agent_parser" - -UserAgent.load_regexes(File.read("data/uap_core_regexes.yaml")) - module App::Controllers struct ClickController include App::Models @@ -26,14 +22,14 @@ module App::Controllers user_agent_str = env.request.headers["User-Agent"]? referer = env.request.headers["Referer"]?.try { |r| URI.parse(r).host rescue r } || env.params.query["utm_source"]? || "Direct" - ua_parser = user_agent_str ? UserAgent.new(user_agent_str) : nil + family, _, _, os = user_agent_str ? UserAgent.parse(user_agent_str) : {nil, nil, nil, nil} click = App::Models::Click.new click.link_id = link_id click.country = client_ip ? IpLookup.country(client_ip) : nil click.user_agent = user_agent_str - click.browser = ua_parser.try(&.family) - click.os = ua_parser.try(&.os.try(&.family)) + click.browser = family + click.os = os.try &.[0] # Access the first element of the os tuple (the family) click.referer = referer Database.insert(click) diff --git a/app/lib/ua_parser.cr b/app/lib/ua_parser.cr new file mode 100644 index 0000000..7143243 --- /dev/null +++ b/app/lib/ua_parser.cr @@ -0,0 +1,123 @@ +require "yaml" +require "semantic_version" + +module App::Lib + struct UserAgent + REGEXES_PATH = "data/uap_core_regexes.yaml" + + @@regexes_cache : YAML::Any? = nil + @@compiled_regexes = {} of String => Array(Tuple(Regex, YAML::Any)) + @@mutex = Mutex.new + + private def self.load_regexes + @@mutex.synchronize do + if @@regexes_cache.nil? + begin + regexes_yaml = File.read(REGEXES_PATH) + @@regexes_cache = YAML.parse(regexes_yaml) + + # Pre-compile all regexes for better performance + ["user_agent_parsers", "os_parsers", "device_parsers"].each do |parser_type| + @@compiled_regexes[parser_type] = [] of Tuple(Regex, YAML::Any) + + @@regexes_cache.not_nil![parser_type].as_a.each do |parser| + regex_str = parser["regex"].as_s + options = parser["regex_flag"]?.try(&.as_s) == "i" ? + Regex::Options::IGNORE_CASE : Regex::Options::None + + begin + compiled_regex = Regex.new(regex_str, options) + @@compiled_regexes[parser_type] << {compiled_regex, parser} + rescue + # Skip invalid regexes + end + end + end + rescue ex + # If loading fails, set an empty cache to prevent repeated failures + @@regexes_cache = YAML.parse("{}") + @@compiled_regexes = {} of String => Array(Tuple(Regex, YAML::Any)) + end + end + end + end + + def self.parse(user_agent_string : String) + return {nil, nil, nil, nil} if user_agent_string.empty? + + # Load regexes only once and cache them + load_regexes + + family = nil + version = nil + device = nil + os = nil + + @@compiled_regexes["user_agent_parsers"]?.try &.each do |regex_tuple| + regex, parser = regex_tuple + match = regex.match(user_agent_string) + next unless match + + family = match[1]? || nil + v1 = (match[2]? || "0").to_i + v2 = (match[3]? || "0").to_i + v3 = (match[4]? || "0").to_i + + # Apply replacements if defined + if replacement = parser["family_replacement"]? + family = replacement.as_s.gsub("$1", family.to_s) + end + + version = SemanticVersion.new(v1, v2, v3) + break + end + + @@compiled_regexes["os_parsers"]?.try &.each do |regex_tuple| + regex, parser = regex_tuple + match = regex.match(user_agent_string) + next unless match + + os_family = match[1]? || nil + os_v1 = (match[2]? || "0").to_i + os_v2 = (match[3]? || "0").to_i + os_v3 = (match[4]? || "0").to_i + + # Apply replacements if defined + if replacement = parser["os_replacement"]? + os_family = replacement.as_s.gsub("$1", os_family.to_s) + end + + os = {os_family, SemanticVersion.new(os_v1, os_v2, os_v3)} + break + end + + @@compiled_regexes["device_parsers"]?.try &.each do |regex_tuple| + regex, parser = regex_tuple + match = regex.match(user_agent_string) + next unless match + + model = match[1]? || nil + device_name = model + brand = nil + + # Apply replacements if defined + if device_replacement = parser["device_replacement"]? + device_name = device_replacement.as_s.gsub("$1", device_name.to_s) + end + + if model_replacement = parser["model_replacement"]? + model = model_replacement.as_s.gsub("$1", model.to_s) + end + + if brand_replacement = parser["brand_replacement"]? + brand = brand_replacement.as_s + end + + device = {model, brand, device_name} + break + end + + {family, version, device, os} + end + end +end diff --git a/shard.lock b/shard.lock index b7276aa..36d2245 100644 --- a/shard.lock +++ b/shard.lock @@ -1,9 +1,5 @@ version: 2.0 shards: - async: - git: https://github.com/protoncr/async.git - version: 0.2.0 - backtracer: git: https://github.com/sija/backtracer.cr.git version: 1.2.2 @@ -60,7 +56,3 @@ shards: git: https://github.com/crystal-lang/crystal-sqlite3.git version: 0.19.0 - user_agent_parser: - git: https://github.com/busyloop/user_agent_parser.git - version: 2.0.1 - diff --git a/shard.yml b/shard.yml index 2dac443..2093fcb 100644 --- a/shard.yml +++ b/shard.yml @@ -20,8 +20,6 @@ dependencies: micrate: github: amberframework/micrate version: 0.15.1 - user_agent_parser: - github: busyloop/user_agent_parser maxminddb: github: delef/maxminddb.cr