feat(cli): --update-data download UA Parser and GeoLite2
This commit is contained in:
@@ -4,7 +4,7 @@ require "user_agent_parser"
|
|||||||
require "../lib/controller.cr"
|
require "../lib/controller.cr"
|
||||||
require "../lib/ip_lookup"
|
require "../lib/ip_lookup"
|
||||||
|
|
||||||
UserAgent.load_regexes(File.read("data/regexes.yaml"))
|
UserAgent.load_regexes(File.read("data/uap_core_regexes.yaml"))
|
||||||
IpLookup.load_mmdb("data/GeoLite2-Country.mmdb")
|
IpLookup.load_mmdb("data/GeoLite2-Country.mmdb")
|
||||||
|
|
||||||
module App::Controllers::Link
|
module App::Controllers::Link
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
require "file_utils"
|
||||||
|
require "http/client"
|
||||||
|
|
||||||
require "../config/*"
|
require "../config/*"
|
||||||
require "../lib/*"
|
require "../lib/*"
|
||||||
require "../models/*"
|
require "../models/*"
|
||||||
@@ -53,4 +56,86 @@ module App::Services::Cli
|
|||||||
puts "Admin setup skipped: Missing ADMIN_NAME or ADMIN_API_KEY environment variables."
|
puts "Admin setup skipped: Missing ADMIN_NAME or ADMIN_API_KEY environment variables."
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.update_uap_regexes
|
||||||
|
puts "Downloading User-Agent Parser core regexes..."
|
||||||
|
|
||||||
|
FileUtils.mkdir_p("data")
|
||||||
|
url = "https://raw.githubusercontent.com/ua-parser/uap-core/master/regexes.yaml"
|
||||||
|
output_file = "data/uap_core_regexes.yaml"
|
||||||
|
|
||||||
|
begin
|
||||||
|
http_get_with_redirect(url) do |response|
|
||||||
|
File.write(output_file, response.body_io.gets_to_end)
|
||||||
|
end
|
||||||
|
puts "User-Agent regexes downloaded to #{output_file}"
|
||||||
|
rescue e
|
||||||
|
puts "Error: Failed to download UAP core regexes: #{e.message}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.download_geolite_db
|
||||||
|
puts "Downloading GeoLite2 Country database..."
|
||||||
|
|
||||||
|
FileUtils.mkdir_p("data")
|
||||||
|
url = "https://github.com/P3TERX/GeoLite.mmdb/raw/download/GeoLite2-Country.mmdb"
|
||||||
|
output_file = "data/GeoLite2-Country.mmdb"
|
||||||
|
|
||||||
|
begin
|
||||||
|
File.open(output_file, "wb") do |file|
|
||||||
|
http_get_with_redirect(url) do |response|
|
||||||
|
IO.copy(response.body_io, file)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
puts "GeoLite2 database downloaded to #{output_file}"
|
||||||
|
rescue e
|
||||||
|
puts "Error: Failed to download GeoLite2 database: #{e.message}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
private def self.http_get_with_redirect(url : String, max_redirects = 5)
|
||||||
|
redirects = 0
|
||||||
|
|
||||||
|
while redirects < max_redirects
|
||||||
|
uri = URI.parse(url)
|
||||||
|
client = HTTP::Client.new(uri)
|
||||||
|
|
||||||
|
success = false
|
||||||
|
follow_redirect = false
|
||||||
|
redirect_url = nil
|
||||||
|
|
||||||
|
begin
|
||||||
|
client.get(uri.request_target) do |response|
|
||||||
|
case response.status_code
|
||||||
|
when 200
|
||||||
|
yield response
|
||||||
|
success = true
|
||||||
|
when 301, 302
|
||||||
|
if new_location = response.headers["Location"]?
|
||||||
|
puts "Following redirect to: #{new_location}"
|
||||||
|
redirect_url = new_location
|
||||||
|
follow_redirect = true
|
||||||
|
else
|
||||||
|
raise "Received redirect status but no Location header"
|
||||||
|
end
|
||||||
|
else
|
||||||
|
raise "Failed request with status code: #{response.status_code}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
ensure
|
||||||
|
client.close
|
||||||
|
end
|
||||||
|
|
||||||
|
return if success
|
||||||
|
|
||||||
|
if follow_redirect && redirect_url
|
||||||
|
url = redirect_url
|
||||||
|
redirects += 1
|
||||||
|
else
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
raise "Too many redirects (#{max_redirects})"
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -148,11 +148,11 @@ user_agent_parsers:
|
|||||||
family_replacement: 'Pinterestbot'
|
family_replacement: 'Pinterestbot'
|
||||||
|
|
||||||
# Bots
|
# Bots
|
||||||
- regex: '(CSimpleSpider|Cityreview Robot|CrawlDaddy|CrawlFire|Finderbots|Index crawler|Job Roboter|KiwiStatus Spider|Lijit Crawler|QuerySeekerSpider|ScollSpider|Trends Crawler|USyd-NLP-Spider|SiteCat Webbot|BotName\/\$BotVersion|123metaspider-Bot|1470\.net crawler|50\.nu|8bo Crawler Bot|Aboundex|Accoona-[A-z]{1,30}-Agent|AdsBot-Google(?:-[a-z]{1,30}|)|altavista|AppEngine-Google|archive.{0,30}\.org_bot|archiver|Ask Jeeves|[Bb]ai[Dd]u[Ss]pider(?:-[A-Za-z]{1,30})(?:-[A-Za-z]{1,30}|)|bingbot|BingPreview|blitzbot|BlogBridge|Bloglovin|BoardReader Blog Indexer|BoardReader Favicon Fetcher|boitho.com-dc|BotSeer|BUbiNG|\b\w{0,30}favicon\w{0,30}\b|\bYeti(?:-[a-z]{1,30}|)|Catchpoint(?: bot|)|[Cc]harlotte|Checklinks|clumboot|Comodo HTTP\(S\) Crawler|Comodo-Webinspector-Crawler|ConveraCrawler|CRAWL-E|CrawlConvera|Daumoa(?:-feedfetcher|)|Feed Seeker Bot|Feedbin|findlinks|Flamingo_SearchEngine|FollowSite Bot|furlbot|Genieo|gigabot|GomezAgent|gonzo1|(?:[a-zA-Z]{1,30}-|)Googlebot(?:-[a-zA-Z]{1,30}|)|Google SketchUp|grub-client|gsa-crawler|heritrix|HiddenMarket|holmes|HooWWWer|htdig|ia_archiver|ICC-Crawler|Icarus6j|ichiro(?:/mobile|)|IconSurf|IlTrovatore(?:-Setaccio|)|InfuzApp|Innovazion Crawler|InternetArchive|IP2[a-z]{1,30}Bot|jbot\b|KaloogaBot|Kraken|Kurzor|larbin|LEIA|LesnikBot|Linguee Bot|LinkAider|LinkedInBot|Lite Bot|Llaut|lycos|Mail\.RU_Bot|masscan|masidani_bot|Mediapartners-Google|Microsoft .{0,30} Bot|mogimogi|mozDex|MJ12bot|msnbot(?:-media {0,2}|)|msrbot|Mtps Feed Aggregation System|netresearch|Netvibes|NewsGator[^/]{0,30}|^NING|Nutch[^/]{0,30}|Nymesis|ObjectsSearch|OgScrper|Orbiter|OOZBOT|PagePeeker|PagesInventory|PaxleFramework|Peeplo Screenshot Bot|PHPCrawl|PlantyNet_WebRobot|Pompos|Qwantify|Read%20Later|Reaper|RedCarpet|Retreiver|Riddler|Rival IQ|scooter|Scrapy|Scrubby|searchsight|seekbot|semanticdiscovery|SemrushBot|Simpy|SimplePie|SEOstats|SimpleRSS|SiteCon|Slackbot-LinkExpanding|Slack-ImgProxy|Slurp|snappy|Speedy Spider|Squrl Java|Stringer|TheUsefulbot|ThumbShotsBot|Thumbshots\.ru|Tiny Tiny RSS|Twitterbot|WhatsApp|URL2PNG|Vagabondo|VoilaBot|^vortex|Votay bot|^voyager|WASALive.Bot|Web-sniffer|WebThumb|WeSEE:[A-z]{1,30}|WhatWeb|WIRE|WordPress|Wotbox|www\.almaden\.ibm\.com|Xenu(?:.s|) Link Sleuth|Xerka [A-z]{1,30}Bot|yacy(?:bot|)|YahooSeeker|Yahoo! Slurp|Yandex\w{1,30}|YodaoBot(?:-[A-z]{1,30}|)|YottaaMonitor|Yowedo|^Zao|^Zao-Crawler|ZeBot_www\.ze\.bz|ZooShot|ZyBorg|ArcGIS Hub Indexer)(?:[ /]v?(\d+)(?:\.(\d+)(?:\.(\d+)|)|)|)'
|
- regex: '(CSimpleSpider|Cityreview Robot|CrawlDaddy|CrawlFire|Finderbots|Index crawler|Job Roboter|KiwiStatus Spider|Lijit Crawler|QuerySeekerSpider|ScollSpider|Trends Crawler|USyd-NLP-Spider|SiteCat Webbot|BotName\/\$BotVersion|123metaspider-Bot|1470\.net crawler|50\.nu|8bo Crawler Bot|Aboundex|Accoona-[A-z]{1,30}-Agent|AdsBot-Google(?:-[a-z]{1,30}|)|altavista|AppEngine-Google|archive.{0,30}\.org_bot|archiver|Ask Jeeves|[Bb]ai[Dd]u[Ss]pider(?:-[A-Za-z]{1,30})(?:-[A-Za-z]{1,30}|)|bingbot|BingPreview|blitzbot|BlogBridge|Bloglovin|BoardReader Blog Indexer|BoardReader Favicon Fetcher|boitho.com-dc|BotSeer|BUbiNG|\b\w{0,30}favicon\w{0,30}\b|\bYeti(?:-[a-z]{1,30}|)|Catchpoint(?: bot|)|[Cc]harlotte|Checklinks|clumboot|Comodo HTTP\(S\) Crawler|Comodo-Webinspector-Crawler|ConveraCrawler|CRAWL-E|CrawlConvera|Daumoa(?:-feedfetcher|)|Feed Seeker Bot|Feedbin|findlinks|Flamingo_SearchEngine|FollowSite Bot|furlbot|Genieo|gigabot|GomezAgent|gonzo1|(?:[a-zA-Z]{1,30}-|)Googlebot(?:-[a-zA-Z]{1,30}|)|GoogleOther|Google SketchUp|grub-client|gsa-crawler|heritrix|HiddenMarket|holmes|HooWWWer|htdig|ia_archiver|ICC-Crawler|Icarus6j|ichiro(?:/mobile|)|IconSurf|IlTrovatore(?:-Setaccio|)|InfuzApp|Innovazion Crawler|InternetArchive|IP2[a-z]{1,30}Bot|jbot\b|KaloogaBot|Kraken|Kurzor|larbin|LEIA|LesnikBot|Linguee Bot|LinkAider|LinkedInBot|Lite Bot|Llaut|lycos|Mail\.RU_Bot|masscan|masidani_bot|Mediapartners-Google|Microsoft .{0,30} Bot|mogimogi|mozDex|MJ12bot|msnbot(?:-media {0,2}|)|msrbot|Mtps Feed Aggregation System|netresearch|Netvibes|NewsGator[^/]{0,30}|^NING|Nutch[^/]{0,30}|Nymesis|ObjectsSearch|OgScrper|Orbiter|OOZBOT|PagePeeker|PagesInventory|PaxleFramework|Peeplo Screenshot Bot|PHPCrawl|PlantyNet_WebRobot|Pompos|Qwantify|Read%20Later|Reaper|RedCarpet|Retreiver|Riddler|Rival IQ|scooter|Scrapy|Scrubby|searchsight|seekbot|semanticdiscovery|SemrushBot|Simpy|SimplePie|SEOstats|SimpleRSS|SiteCon|Slackbot-LinkExpanding|Slack-ImgProxy|Slurp|snappy|Speedy Spider|Squrl Java|Stringer|TheUsefulbot|ThumbShotsBot|Thumbshots\.ru|Tiny Tiny RSS|Twitterbot|WhatsApp|URL2PNG|Vagabondo|VoilaBot|^vortex|Votay bot|^voyager|WASALive.Bot|Web-sniffer|WebThumb|WeSEE:[A-z]{1,30}|WhatWeb|WIRE|WordPress|Wotbox|www\.almaden\.ibm\.com|Xenu(?:.s|) Link Sleuth|Xerka [A-z]{1,30}Bot|yacy(?:bot|)|YahooSeeker|Yahoo! Slurp|Yandex\w{1,30}|YodaoBot(?:-[A-z]{1,30}|)|YottaaMonitor|Yowedo|^Zao|^Zao-Crawler|ZeBot_www\.ze\.bz|ZooShot|ZyBorg|ArcGIS Hub Indexer|GPTBot)(?:[ /]v?(\d+)(?:\.(\d+)(?:\.(\d+)|)|)|)'
|
||||||
|
|
||||||
# AWS S3 Clients
|
# AWS S3 Clients
|
||||||
# must come before "Bots General matcher" to catch "boto"/"boto3" before "bot"
|
# must come before "Bots General matcher" to catch "boto"/"boto3" before "bot"
|
||||||
- regex: '\b(Boto3?|JetS3t|aws-(?:cli|sdk-(?:cpp|go|java|nodejs|ruby2?|dotnet-(?:\d{1,2}|core)))|s3fs)/(\d+)\.(\d+)(?:\.(\d+)|)'
|
- regex: '\b(Boto3?|JetS3t|aws-(?:cli|sdk-(?:cpp|go|go-v\d|java|nodejs|ruby2?|dotnet-(?:\d{1,2}|core)))|s3fs)/(\d+)\.(\d+)(?:\.(\d+)|)'
|
||||||
|
|
||||||
# SAFE FME
|
# SAFE FME
|
||||||
- regex: '(FME)\/(\d+\.\d+)\.(\d+)\.(\d+)'
|
- regex: '(FME)\/(\d+\.\d+)\.(\d+)\.(\d+)'
|
||||||
@@ -179,6 +179,9 @@ user_agent_parsers:
|
|||||||
- regex: '\[FB.{0,300};'
|
- regex: '\[FB.{0,300};'
|
||||||
family_replacement: 'Facebook'
|
family_replacement: 'Facebook'
|
||||||
|
|
||||||
|
# RecipeRadar crawler
|
||||||
|
- regex: '(RecipeRadar)/(\d+)\.(\d+)(?:\.(\d+)|)'
|
||||||
|
|
||||||
# Bots General matcher 'name/0.0'
|
# Bots General matcher 'name/0.0'
|
||||||
- regex: '^.{0,200}?(?:\/[A-Za-z0-9\.]{0,50}|) {0,2}([A-Za-z0-9 \-_\!\[\]:]{0,50}(?:[Aa]rchiver|[Ii]ndexer|[Ss]craper|[Bb]ot|[Ss]pider|[Cc]rawl[a-z]{0,50}))[/ ](\d+)(?:\.(\d+)(?:\.(\d+)|)|)'
|
- regex: '^.{0,200}?(?:\/[A-Za-z0-9\.]{0,50}|) {0,2}([A-Za-z0-9 \-_\!\[\]:]{0,50}(?:[Aa]rchiver|[Ii]ndexer|[Ss]craper|[Bb]ot|[Ss]pider|[Cc]rawl[a-z]{0,50}))[/ ](\d+)(?:\.(\d+)(?:\.(\d+)|)|)'
|
||||||
# Bots containing bot(but not CUBOT)
|
# Bots containing bot(but not CUBOT)
|
||||||
@@ -215,6 +218,16 @@ user_agent_parsers:
|
|||||||
# Twitter
|
# Twitter
|
||||||
- regex: '(Twitter for (?:iPhone|iPad)|TwitterAndroid)(?:\/(\d+)\.(\d+)|)'
|
- regex: '(Twitter for (?:iPhone|iPad)|TwitterAndroid)(?:\/(\d+)\.(\d+)|)'
|
||||||
family_replacement: 'Twitter'
|
family_replacement: 'Twitter'
|
||||||
|
# TikTok
|
||||||
|
- regex: '(musical_ly) app_version\/(\d+)\.(\d+)\.(\d+)'
|
||||||
|
family_replacement: 'TikTok'
|
||||||
|
- regex: '(musical_ly_)(\d+)\.(\d+)\.(\d+)'
|
||||||
|
family_replacement: 'TikTok'
|
||||||
|
- regex: '(BytedanceWebview)\/[a-z0-9]+'
|
||||||
|
family_replacement: 'TikTok'
|
||||||
|
# KakaoTalk
|
||||||
|
- regex: 'Mozilla.{1,200}Mobile.{1,100}(KAKAOTALK)/(\d+)\.(\d+)\.(\d+)'
|
||||||
|
family_replacement: 'KakaoTalk'
|
||||||
|
|
||||||
# Phantom app
|
# Phantom app
|
||||||
- regex: 'Mozilla.{1,200}Mobile.{1,100}(Phantom\/ios|Phantom\/android).(\d+)\.(\d+)\.(\d+)'
|
- regex: 'Mozilla.{1,200}Mobile.{1,100}(Phantom\/ios|Phantom\/android).(\d+)\.(\d+)\.(\d+)'
|
||||||
@@ -394,7 +407,7 @@ user_agent_parsers:
|
|||||||
- regex: '(Instabridge)/(\d+)(?:\.(\d+)|)(?:\.(\d+)|)'
|
- regex: '(Instabridge)/(\d+)(?:\.(\d+)|)(?:\.(\d+)|)'
|
||||||
|
|
||||||
# Aloha Browser
|
# Aloha Browser
|
||||||
- regex: '(AlohaBrowser)/(\d+)\.(\d+)\.(\d+)(?:\.(\d+)|)'
|
- regex: '(AlohaBrowser|ABB)/(\d+)\.(\d+)\.(\d+)(?:\.(\d+)|)'
|
||||||
family_replacement: 'Aloha Browser'
|
family_replacement: 'Aloha Browser'
|
||||||
|
|
||||||
# Brave Browser https://brave.com/ , should go before Safari and Chrome Mobile
|
# Brave Browser https://brave.com/ , should go before Safari and Chrome Mobile
|
||||||
@@ -415,7 +428,7 @@ user_agent_parsers:
|
|||||||
family_replacement: 'Edge Mobile'
|
family_replacement: 'Edge Mobile'
|
||||||
|
|
||||||
# Oculus Browser, should go before Samsung Internet
|
# Oculus Browser, should go before Samsung Internet
|
||||||
- regex: '(OculusBrowser)/(\d+)\.(\d+).0.0(?:\.([0-9\-]+)|)'
|
- regex: '(OculusBrowser)/(\d+)\.(\d+)(?:\.([0-9\-]+)|)'
|
||||||
family_replacement: 'Oculus Browser'
|
family_replacement: 'Oculus Browser'
|
||||||
|
|
||||||
# Samsung Internet (based on Chrome, but lacking some features)
|
# Samsung Internet (based on Chrome, but lacking some features)
|
||||||
@@ -492,6 +505,12 @@ user_agent_parsers:
|
|||||||
- regex: '(Ecosia) android@(\d+)(?:\.(\d+)|)(?:\.(\d+)|)(?:\.(\d+)|)'
|
- regex: '(Ecosia) android@(\d+)(?:\.(\d+)|)(?:\.(\d+)|)(?:\.(\d+)|)'
|
||||||
family_replacement: 'Ecosia Android'
|
family_replacement: 'Ecosia Android'
|
||||||
|
|
||||||
|
# VivoBrowser
|
||||||
|
- regex: '(VivoBrowser)\/(\d+)\.(\d+)\.(\d+)\.(\d+)'
|
||||||
|
|
||||||
|
# HiBrowser
|
||||||
|
- regex: '(HiBrowser)\/v(\d+)\.(\d+)\.(\d+)\.(\d+)'
|
||||||
|
|
||||||
# Chrome Mobile
|
# Chrome Mobile
|
||||||
- regex: 'Version/.{1,300}(Chrome)/(\d+)\.(\d+)\.(\d+)\.(\d+)'
|
- regex: 'Version/.{1,300}(Chrome)/(\d+)\.(\d+)\.(\d+)\.(\d+)'
|
||||||
family_replacement: 'Chrome Mobile WebView'
|
family_replacement: 'Chrome Mobile WebView'
|
||||||
@@ -569,6 +588,85 @@ user_agent_parsers:
|
|||||||
- regex: '^(surveyon)/(\d+)\.(\d+)\.(\d+)'
|
- regex: '^(surveyon)/(\d+)\.(\d+)\.(\d+)'
|
||||||
family_replacement: 'Surveyon'
|
family_replacement: 'Surveyon'
|
||||||
|
|
||||||
|
# 115 Browser
|
||||||
|
- regex: '(115Browser)/(\d+)\.(\d+)\.(\d+)\.(\d+)'
|
||||||
|
family_replacement: '115 Browser'
|
||||||
|
|
||||||
|
# Avira
|
||||||
|
- regex: '(Avira)/(\d+)\.(\d+)\.(\d+)\.(\d+)'
|
||||||
|
family_replacement: 'Avira'
|
||||||
|
|
||||||
|
# CCleaner Browser
|
||||||
|
- regex: '(CCleaner)/(\d+)\.(\d+)\.(\d+)\.(\d+)'
|
||||||
|
family_replacement: 'CCleaner'
|
||||||
|
|
||||||
|
# Norton
|
||||||
|
- regex: '(Norton)/(\d+)\.(\d+)\.(\d+)\.(\d+)'
|
||||||
|
family_replacement: 'Norton'
|
||||||
|
|
||||||
|
# Quark
|
||||||
|
- regex: '(Quark)/(\d+)\.(\d+)\.(\d+)'
|
||||||
|
family_replacement: 'Quark'
|
||||||
|
# Quark PC
|
||||||
|
- regex: '(QuarkPC)/(\d+)\.(\d+)\.(\d+)'
|
||||||
|
family_replacement: 'Quark PC'
|
||||||
|
|
||||||
|
# Smart Lenovo Browser
|
||||||
|
- regex: '(SLBrowser)/(\d+)\.(\d+)\.(\d+)\.(\d+) SLBChan/(\d+)'
|
||||||
|
family_replacement: 'Smart Lenovo Browser'
|
||||||
|
|
||||||
|
# Atom Browser
|
||||||
|
- regex: '(Atom)/(\d+)\.(\d+)\.(\d+)\.(\d+)'
|
||||||
|
family_replacement: 'Atom Browser'
|
||||||
|
|
||||||
|
# 360 Secure Browser
|
||||||
|
- regex: '(Chrome)/\d+\.\d+\.\d+\.\d+ .* QIHU 360(?:SEi18n|ENT)'
|
||||||
|
family_replacement: '360 Secure Browser'
|
||||||
|
|
||||||
|
# Decentr Web3 Browser
|
||||||
|
- regex: '(Decentr)'
|
||||||
|
family_replacement: 'Decentr Web3 Browser'
|
||||||
|
|
||||||
|
# Sparrow Browser
|
||||||
|
- regex: '(Sparrow)'
|
||||||
|
family_replacement: 'Sparrow Browser'
|
||||||
|
|
||||||
|
# Chromium GOST Browser
|
||||||
|
- regex: '(Chromium GOST)'
|
||||||
|
family_replacement: 'Chromium GOST Browser'
|
||||||
|
|
||||||
|
# AOL Shield Browser
|
||||||
|
- regex: '(AOLShield)/(\d+)\.(\d+)\.(\d+)\.(\d+)'
|
||||||
|
family_replacement: 'AOL Shield Browser'
|
||||||
|
|
||||||
|
# Hola Browser
|
||||||
|
- regex: '(Hola)/(\d+)\.(\d+)\.(\d+)'
|
||||||
|
family_replacement: 'Hola Browser'
|
||||||
|
|
||||||
|
# Craving Explorer Browser
|
||||||
|
- regex: '(CravingExplorer)/(\d+)\.(\d+)\.(\d+)'
|
||||||
|
family_replacement: 'Craving Explorer Browser'
|
||||||
|
|
||||||
|
# Talon Cyber Security Browser
|
||||||
|
- regex: '(Talon)'
|
||||||
|
family_replacement: 'Talon Cyber Security Browser'
|
||||||
|
|
||||||
|
# QAX Browser
|
||||||
|
- regex: '(Qaxbrowser)'
|
||||||
|
family_replacement: 'QAX Browser'
|
||||||
|
|
||||||
|
# AOL Desktop Gold Browser
|
||||||
|
- regex: '(ADG)/(\d+)\.(\d+)\.(\d+)'
|
||||||
|
family_replacement: 'AOL Desktop Gold Browser'
|
||||||
|
|
||||||
|
# Sber Browser
|
||||||
|
- regex: '(SberBrowser)/(\d+)\.(\d+)\.(\d+)\.(\d+)'
|
||||||
|
family_replacement: 'Sber Browser'
|
||||||
|
|
||||||
|
# JiSu Browser
|
||||||
|
- regex: '(JiSu)/(\d+)\.(\d+)\.(\d+)'
|
||||||
|
family_replacement: 'JiSu Browser'
|
||||||
|
|
||||||
#### END SPECIAL CASES TOP ####
|
#### END SPECIAL CASES TOP ####
|
||||||
|
|
||||||
#### MAIN CASES - this catches > 50% of all browsers ####
|
#### MAIN CASES - this catches > 50% of all browsers ####
|
||||||
@@ -1387,6 +1485,13 @@ os_parsers:
|
|||||||
# Box Drive and Box Sync on Mac OS X use OSX version numbers, not Darwin
|
# Box Drive and Box Sync on Mac OS X use OSX version numbers, not Darwin
|
||||||
- regex: '^Box.{0,200};(Darwin)/(10)\.(1\d)(?:\.(\d+)|)'
|
- regex: '^Box.{0,200};(Darwin)/(10)\.(1\d)(?:\.(\d+)|)'
|
||||||
os_replacement: 'Mac OS X'
|
os_replacement: 'Mac OS X'
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Hashicorp API
|
||||||
|
# APN/1.0 HashiCorp/1.0 Terraform/1.8.0 (+https://www.terraform.io) terraform-provider-aws/4.67.0 (+https://registry.terraform.io/providers/hashicorp/aws) aws-sdk-go/1.44.261 (go1.19.8; darwin; arm64)
|
||||||
|
##########
|
||||||
|
- regex: 'darwin; arm64'
|
||||||
|
os_replacement: 'Mac OS X'
|
||||||
|
|
||||||
##########
|
##########
|
||||||
# iOS
|
# iOS
|
||||||
@@ -1672,29 +1777,27 @@ os_parsers:
|
|||||||
- regex: 'CFNetwork/.{0,100} Darwin/(21)\.\d+'
|
- regex: 'CFNetwork/.{0,100} Darwin/(21)\.\d+'
|
||||||
os_replacement: 'iOS'
|
os_replacement: 'iOS'
|
||||||
os_v1_replacement: '15'
|
os_v1_replacement: '15'
|
||||||
- regex: 'CFNetwork/.{0,100} Darwin/22\.0\.\d+'
|
- regex: 'CFNetwork/.{0,100} Darwin/22\.([0-5])\.\d+'
|
||||||
os_replacement: 'iOS'
|
os_replacement: 'iOS'
|
||||||
os_v1_replacement: '16'
|
os_v1_replacement: '16'
|
||||||
os_v2_replacement: '0'
|
os_v2_replacement: '$1'
|
||||||
- regex: 'CFNetwork/.{0,100} Darwin/22\.1\.\d+'
|
|
||||||
os_replacement: 'iOS'
|
|
||||||
os_v1_replacement: '16'
|
|
||||||
os_v2_replacement: '1'
|
|
||||||
- regex: 'CFNetwork/.{0,100} Darwin/22\.2\.\d+'
|
|
||||||
os_replacement: 'iOS'
|
|
||||||
os_v1_replacement: '16'
|
|
||||||
os_v2_replacement: '2'
|
|
||||||
- regex: 'CFNetwork/.{0,100} Darwin/22\.3\.\d+'
|
|
||||||
os_replacement: 'iOS'
|
|
||||||
os_v1_replacement: '16'
|
|
||||||
os_v2_replacement: '3'
|
|
||||||
- regex: 'CFNetwork/.{0,100} Darwin/22\.4\.\d+'
|
|
||||||
os_replacement: 'iOS'
|
|
||||||
os_v1_replacement: '16'
|
|
||||||
os_v2_replacement: '4'
|
|
||||||
- regex: 'CFNetwork/.{0,100} Darwin/(22)\.\d+'
|
- regex: 'CFNetwork/.{0,100} Darwin/(22)\.\d+'
|
||||||
os_replacement: 'iOS'
|
os_replacement: 'iOS'
|
||||||
os_v1_replacement: '16'
|
os_v1_replacement: '16'
|
||||||
|
- regex: 'CFNetwork/.{0,100} Darwin/23\.([0-5])\.\d+'
|
||||||
|
os_replacement: 'iOS'
|
||||||
|
os_v1_replacement: '17'
|
||||||
|
os_v2_replacement: '$1'
|
||||||
|
- regex: 'CFNetwork/.{0,100} Darwin/(23)\.\d+'
|
||||||
|
os_replacement: 'iOS'
|
||||||
|
os_v1_replacement: '17'
|
||||||
|
- regex: 'CFNetwork/.{0,100} Darwin/24\.([0-5])\.\d+'
|
||||||
|
os_replacement: 'iOS'
|
||||||
|
os_v1_replacement: '18'
|
||||||
|
os_v2_replacement: '$1'
|
||||||
|
- regex: 'CFNetwork/.{0,100} Darwin/(24)\.\d+'
|
||||||
|
os_replacement: 'iOS'
|
||||||
|
os_v1_replacement: '18'
|
||||||
- regex: 'CFNetwork/.{0,100} Darwin/'
|
- regex: 'CFNetwork/.{0,100} Darwin/'
|
||||||
os_replacement: 'iOS'
|
os_replacement: 'iOS'
|
||||||
|
|
||||||
@@ -1889,6 +1992,21 @@ os_parsers:
|
|||||||
# Roku Digital-Video-Players https://www.roku.com/
|
# Roku Digital-Video-Players https://www.roku.com/
|
||||||
- regex: '^(Roku)/DVP-(\d+)\.(\d+)'
|
- regex: '^(Roku)/DVP-(\d+)\.(\d+)'
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Amazon S3 client boto3
|
||||||
|
# Hasicorp API
|
||||||
|
# Boto3/1.28.62 md/Botocore#1.31.62 ua/2.0 os/macos#22.4.0 md/arch#arm64 lang/python#3.11.6 md/pyimpl#CPython cfg/retry-mode#legacy Botocore/1.31.62
|
||||||
|
# APN/1.0 HashiCorp/1.0 Terraform/1.8.1 (+https://www.terraform.io) terraform-provider-aws/4.67.0 (+https://registry.terraform.io/providers/hashicorp/aws) aws-sdk-go-v2/1.18.0 os/macos lang/go/1.19.8 md/GOOS/darwin md/GOARCH/arm64 api/identitystore/1.16.11
|
||||||
|
##########
|
||||||
|
- regex: 'os\/macos[#]?(\d*)[.]?(\d*)[.]?(\d*)'
|
||||||
|
os_replacement: 'Mac OS X'
|
||||||
|
os_v1_replacement: '$1'
|
||||||
|
os_v2_replacement: '$2'
|
||||||
|
os_v3_replacement: '$3'
|
||||||
|
|
||||||
|
# Huawei HarmonyOS
|
||||||
|
- regex: '(HarmonyOS)[\s;]+(\d+|)\.?(\d+|)\.?(\d+|)'
|
||||||
|
|
||||||
device_parsers:
|
device_parsers:
|
||||||
|
|
||||||
#########
|
#########
|
||||||
@@ -5905,7 +6023,7 @@ device_parsers:
|
|||||||
##########
|
##########
|
||||||
# Spiders (this is a hack...)
|
# Spiders (this is a hack...)
|
||||||
##########
|
##########
|
||||||
- regex: '^.{0,100}(bot|BUbiNG|zao|borg|DBot|oegp|silk|Xenu|zeal|^NING|CCBot|crawl|htdig|lycos|slurp|teoma|voila|yahoo|Sogou|CiBra|Nutch|^Java/|^JNLP/|Daumoa|Daum|Genieo|ichiro|larbin|pompos|Scrapy|snappy|speedy|spider|msnbot|msrbot|vortex|^vortex|crawler|favicon|indexer|Riddler|scooter|scraper|scrubby|WhatWeb|WinHTTP|bingbot|BingPreview|openbot|gigabot|furlbot|polybot|seekbot|^voyager|archiver|Icarus6j|mogimogi|Netvibes|blitzbot|altavista|charlotte|findlinks|Retreiver|TLSProber|WordPress|SeznamBot|ProoXiBot|wsr\-agent|Squrl Java|EtaoSpider|PaperLiBot|SputnikBot|A6\-Indexer|netresearch|searchsight|baiduspider|YisouSpider|ICC\-Crawler|http%20client|Python-urllib|dataparksearch|converacrawler|Screaming Frog|AppEngine-Google|YahooCacheSystem|fast\-webcrawler|Sogou Pic Spider|semanticdiscovery|Innovazion Crawler|facebookexternalhit|Google.{0,200}/\+/web/snippet|Google-HTTP-Java-Client|BlogBridge|IlTrovatore-Setaccio|InternetArchive|GomezAgent|WebThumbnail|heritrix|NewsGator|PagePeeker|Reaper|ZooShot|holmes|NL-Crawler|Pingdom|StatusCake|WhatsApp|masscan|Google Web Preview|Qwantify|Yeti|OgScrper)'
|
- regex: '^.{0,100}(bot|BUbiNG|zao|borg|DBot|oegp|silk|Xenu|zeal|^NING|CCBot|crawl|htdig|lycos|slurp|teoma|voila|yahoo|Sogou|CiBra|Nutch|^Java/|^JNLP/|Daumoa|Daum|Genieo|ichiro|larbin|pompos|Scrapy|snappy|speedy|spider|msnbot|msrbot|vortex|^vortex|crawler|favicon|indexer|Riddler|scooter|scraper|scrubby|WhatWeb|WinHTTP|bingbot|BingPreview|openbot|gigabot|furlbot|polybot|seekbot|^voyager|archiver|Icarus6j|mogimogi|Netvibes|blitzbot|altavista|charlotte|findlinks|Retreiver|TLSProber|WordPress|SeznamBot|ProoXiBot|wsr\-agent|Squrl Java|EtaoSpider|PaperLiBot|SputnikBot|A6\-Indexer|netresearch|searchsight|baiduspider|YisouSpider|ICC\-Crawler|http%20client|Python-urllib|dataparksearch|converacrawler|Screaming Frog|AppEngine-Google|YahooCacheSystem|fast\-webcrawler|Sogou Pic Spider|semanticdiscovery|Innovazion Crawler|facebookexternalhit|Google.{0,200}/\+/web/snippet|Google-HTTP-Java-Client|BlogBridge|IlTrovatore-Setaccio|InternetArchive|GomezAgent|WebThumbnail|heritrix|NewsGator|PagePeeker|Reaper|ZooShot|holmes|NL-Crawler|Pingdom|StatusCake|WhatsApp|masscan|Google Web Preview|Qwantify|Yeti|OgScrper|RecipeRadar|GPTBot)'
|
||||||
regex_flag: 'i'
|
regex_flag: 'i'
|
||||||
device_replacement: 'Spider'
|
device_replacement: 'Spider'
|
||||||
brand_replacement: 'Spider'
|
brand_replacement: 'Spider'
|
||||||
+12
-3
@@ -19,11 +19,20 @@ OptionParser.parse do |parser|
|
|||||||
exit
|
exit
|
||||||
end
|
end
|
||||||
|
|
||||||
|
parser.on("--update-data", "Download all required data files (UA Parser and GeoLite2)") do
|
||||||
|
puts "=== Starting data files update ==="
|
||||||
|
App::Services::Cli.update_uap_regexes
|
||||||
|
App::Services::Cli.download_geolite_db
|
||||||
|
puts "=== All data files updated successfully ==="
|
||||||
|
exit
|
||||||
|
end
|
||||||
|
|
||||||
if ARGV.empty?
|
if ARGV.empty?
|
||||||
puts "Usage: ./cli [options]"
|
puts "Usage: ./cli [options]"
|
||||||
puts "Options:"
|
puts "Options:"
|
||||||
puts " --create-user=NAME Create a new user with the given name"
|
puts " --create-user=NAME Create a new user with the given name"
|
||||||
puts " --list-users List all users"
|
puts " --list-users List all users"
|
||||||
puts " --delete-user=USER_ID Delete a user by ID"
|
puts " --delete-user=USER_ID Delete a user by ID"
|
||||||
|
puts " --update-data Download all required data files"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user