diff --git a/app/controllers/link.cr b/app/controllers/link.cr index 8afc14e..c70f0fe 100644 --- a/app/controllers/link.cr +++ b/app/controllers/link.cr @@ -1,9 +1,11 @@ require "uuid" require "user_agent_parser" -UserAgent.load_regexes(File.read("data/regexes.yaml")) - require "../lib/controller.cr" +require "../lib/ip_lookup" + +UserAgent.load_regexes(File.read("data/regexes.yaml")) +IpLookup.load_mmdb("data/GeoLite2-Country.mmdb") module App::Controllers::Link class Create < App::Lib::BaseController @@ -51,12 +53,10 @@ module App::Controllers::Link link = Database.get_by(Link, slug: slug) raise App::NotFoundException.new(env) if !link - client_ip = env.request.remote_address.try &.to_s || "Unknown" + remote_address = env.request.remote_address.try &.to_s user_agent_str = env.request.headers["User-Agent"]? || "Unknown" - user_agent = user_agent_str != "Unknown" ? UserAgent.new(user_agent_str) : nil - language_header = env.request.headers["Accept-Language"]? || "Unknown" - language = language_header.split(',').first.split(';').first - referer = env.request.headers["Referer"]? + + client_ip = IpLookup.extract_ip(remote_address) || "Unknown" env.response.status_code = 301 env.response.headers["Location"] = link.url! @@ -65,14 +65,22 @@ module App::Controllers::Link env.response.headers["X-Forwarded-User-Agent"] = user_agent_str spawn do + ip_lookup = client_ip != "Unknown" ? IpLookup.new(client_ip) : nil + country = ip_lookup.try &.country.try &.code + + user_agent = user_agent_str != "Unknown" ? UserAgent.new(user_agent_str) : nil + + source = env.params.query["utm_source"]? || "Direct" + referer_host = env.request.headers["Referer"]?.try { |r| begin URI.parse(r).host rescue r end } || source + click = Click.new click.id = UUID.v4.to_s click.link = link - click.language = language + click.country = country click.user_agent = user_agent_str - click.browser = user_agent ? user_agent.family : "Unknown" - click.os = user_agent ? (user_agent.os.try &.family || "Unknown") : "Unknown" - click.referer = referer ? URI.parse(referer).host : "Unknown" + click.browser = user_agent.try &.family + click.os = user_agent.try &.os.try &.family + click.referer = referer_host changeset = Database.insert(click) if changeset.errors.any? diff --git a/app/lib/ip_lookup.cr b/app/lib/ip_lookup.cr new file mode 100644 index 0000000..3d11289 --- /dev/null +++ b/app/lib/ip_lookup.cr @@ -0,0 +1,54 @@ +require "maxminddb" + +class IpLookup + @@instance : MaxMindDB::Reader? = nil + + record Country, code : String? = nil, name : String? = nil + + getter ip : String + getter country : Country? + + def self.load_mmdb(mmdb_file_path : String) + @@instance = MaxMindDB.open(mmdb_file_path) + end + + def initialize(ip_address : String) + @ip = ip_address + @country = nil + + return if @@instance.nil? || ip_address == "Unknown" || ip_address.empty? + + begin + lookup = @@instance.not_nil!.get(ip_address) + + country_code = lookup["country"]?.try &.["iso_code"]?.try &.as_s + country_name = lookup["country"]?.try &.["names"]?.try &.["en"]?.try &.as_s + + if country_code || country_name + @country = Country.new( + code: country_code, + name: country_name + ) + end + rescue ex + # Silently handle lookup errors + Log.error { "IP lookup failed: #{ex.message}" } + end + end + + def self.extract_ip(address_string : String?) : String? + return nil if address_string.nil? + + if address_string.includes?('[') # IPv6 with port: [2001:db8::1]:8080 + address_string.split(']').first.sub('[', '\'') + elsif address_string.includes?(':') + if address_string.count(':') > 1 # IPv6 without port + address_string + else # IPv4 with port: 192.168.1.1:8080 + address_string.split(':').first + end + else # Address without port + address_string + end + end +end diff --git a/app/models/click.cr b/app/models/click.cr index 7a49301..2b3c8fe 100644 --- a/app/models/click.cr +++ b/app/models/click.cr @@ -13,6 +13,6 @@ module App::Models belongs_to :link, Link end - validate_required [:user_agent, :country, :referer] + validate_required [:user_agent, :referer] end end diff --git a/data/GeoLite2-Country.mmdb b/data/GeoLite2-Country.mmdb new file mode 100644 index 0000000..bcda987 Binary files /dev/null and b/data/GeoLite2-Country.mmdb differ diff --git a/db/migrations/20250316111734_replace_unkwown_with_null.sql b/db/migrations/20250316111734_replace_unkwown_with_null.sql new file mode 100644 index 0000000..c63babc --- /dev/null +++ b/db/migrations/20250316111734_replace_unkwown_with_null.sql @@ -0,0 +1,13 @@ +-- +micrate Up +-- SQL in section 'Up' is executed when this migration is applied +UPDATE clicks SET user_agent = NULL WHERE user_agent = 'Unknown'; +UPDATE clicks SET browser = NULL WHERE browser = 'Unknown'; +UPDATE clicks SET os = NULL WHERE os = 'Unknown'; +UPDATE clicks SET referer = NULL WHERE referer = 'Unknown'; + +-- +micrate Down +-- SQL section 'Down' is executed when this migration is rolled back +UPDATE clicks SET user_agent = 'Unknown' WHERE user_agent IS NULL; +UPDATE clicks SET browser = 'Unknown' WHERE browser IS NULL; +UPDATE clicks SET os = 'Unknown' WHERE os IS NULL; +UPDATE clicks SET referer = 'Unknown' WHERE referer IS NULL; diff --git a/shard.lock b/shard.lock index 58fe7e7..1703678 100644 --- a/shard.lock +++ b/shard.lock @@ -20,10 +20,18 @@ shards: git: https://github.com/crystal-loot/exception_page.git version: 0.4.1 + ipaddress: + git: https://github.com/sija/ipaddress.cr.git + version: 0.2.3 + kemal: git: https://github.com/kemalcr/kemal.git version: 1.5.0 + maxminddb: + git: https://github.com/delef/maxminddb.cr.git + version: 1.5.0 + micrate: git: https://github.com/amberframework/micrate.git version: 0.15.1 diff --git a/shard.yml b/shard.yml index 4b8d71d..83e1661 100644 --- a/shard.yml +++ b/shard.yml @@ -22,7 +22,8 @@ dependencies: version: 0.15.1 user_agent_parser: github: busyloop/user_agent_parser - version: 2.0.1 + maxminddb: + github: delef/maxminddb.cr development_dependencies: dotenv: diff --git a/spec/integration/link_spec.cr b/spec/integration/link_spec.cr index 588bddf..0126998 100644 --- a/spec/integration/link_spec.cr +++ b/spec/integration/link_spec.cr @@ -82,33 +82,71 @@ describe "App::Controllers::Link" do end describe "Index" do - it "should redirect to origin domain" do + it "should redirect to origin domain with forwarded headers" do link = "https://test.com" test_user = create_test_user() test_link = create_test_link(test_user, link) serialized_link = App::Serializers::Link.new(test_link) - get(serialized_link.refer, headers: HTTP::Headers{"X-Api-Key" => test_user.api_key.to_s}) + user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:127.0) Gecko/20100101 Firefox/127.0" + + get(serialized_link.refer, headers: HTTP::Headers{ + "X-Api-Key" => test_user.api_key.to_s, + "User-Agent" => user_agent + }) response.headers["Location"].should eq(link) + response.headers["X-Forwarded-User-Agent"].should eq(user_agent) + response.headers.has_key?("X-Forwarded-For").should be_true end - it "should create a new click after redirect" do + it "should create a new click after redirect with proper information" do link = "https://sjdonado.com" test_user = create_test_user() test_link = create_test_link(test_user, link) serialized_link = App::Serializers::Link.new(test_link) - get(serialized_link.refer, headers: HTTP::Headers{"User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:127.0) Gecko/20100101 Firefox/127.0"}) + user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:127.0) Gecko/20100101 Firefox/127.0" + referer = "https://example.com/page" + + get(serialized_link.refer, headers: HTTP::Headers{ + "User-Agent" => user_agent, + "Referer" => referer + }) Fiber.yield # replace yield with sleep 5 to debug errors response.headers["Location"].should eq(link) + # Verify that the click was recorded updated_test_link = get_test_link(test_link.id) updated_test_link.clicks.size.should eq(test_link.clicks.size + 1) + + # Verify click details + latest_click = updated_test_link.clicks.last + latest_click.user_agent.should eq(user_agent) + latest_click.browser.should eq("Firefox") + latest_click.os.should eq("Mac OS X") + latest_click.referer.should eq("example.com") # Should extract host from the referer + end + + it "should create a click with utm_source when no referer is provided" do + link = "https://sjdonado.com" + test_user = create_test_user() + + test_link = create_test_link(test_user, link) + serialized_link = App::Serializers::Link.new(test_link) + + # Add utm_source parameter + get("#{serialized_link.refer}?utm_source=email_campaign") + + Fiber.yield + + updated_test_link = get_test_link(test_link.id) + latest_click = updated_test_link.clicks.last + latest_click.referer.should eq("email_campaign") end it "should return 404 - link does not exist" do