chore: remote waf fingerprinting base

2025-10-04 23:54:29 -04:00
14 changed files with 407 additions and 579 deletions
@@ -65,7 +65,7 @@ The Actor provides three types of outputs:
 | Field | Type | Required | Description |
 |-------|------|----------|-------------|
 | `username` | string | Yes | Username the search was conducted for |
-| `links` | array | Yes | Array with found links to the social media |
+| `links` | arrray | Yes | Array with found links to the social media |
 | `links[]`| string | No | URL to the account

 ### Example Dataset Item (JSON)
@@ -27,12 +27,12 @@ on:
 jobs:
  tox-lint:
    runs-on: ubuntu-latest
-    # Linting is run through tox to ensure that the same linter
+    # Linting is ran through tox to ensure that the same linter
    # is used by local runners
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v4
      - name: Set up linting environment
-        uses: actions/setup-python@v6
+        uses: actions/setup-python@v5
        with:
          python-version: '3.x'
      - name: Install tox and related dependencies
@@ -44,7 +44,7 @@ jobs:
  tox-matrix:
    runs-on: ${{ matrix.os }}
    strategy:
-      # We want to know what specific versions it fails on
+      # We want to know what specicic versions it fails on
      fail-fast: false
      matrix:
        os: [
@@ -57,13 +57,11 @@ jobs:
          '3.11',
          '3.12',
          '3.13',
-          '3.14',
-          '3.14t',
        ]
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v4
      - name: Set up environment ${{ matrix.python-version }}
-        uses: actions/setup-python@v6
+        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install tox and related dependencies
@@ -77,7 +75,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
-        uses: actions/checkout@v6
+        uses: actions/checkout@v4
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Get version from pyproject.toml
@@ -17,41 +17,29 @@ jobs:
      - name: Checkout repository
        uses: actions/checkout@v5
        with:
-          # Checkout the base branch but fetch all history to avoid a second fetch call
          ref: ${{ github.base_ref }}
-          fetch-depth: 0
-          persist-credentials: false
+          fetch-depth: 1

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
-          python-version: "3.13"
+          python-version: '3.13'

      - name: Install Poetry
        uses: abatilo/actions-poetry@v4
        with:
-          poetry-version: "latest"
+          poetry-version: 'latest'

      - name: Install dependencies
        run: |
          poetry install --no-interaction --with dev

-      - name: Prepare JSON versions for comparison
+      - name: Drop in place updated manifest from base
        run: |
-          # Fetch only the PR's branch head (single network call in this step)
-          git fetch origin pull/${{ github.event.pull_request.number }}/head:pr
-
-          # Find the merge-base commit between the target branch and the PR branch
-          MERGE_BASE=$(git merge-base origin/${{ github.base_ref }} pr)
-          echo "Comparing PR head against merge-base commit: $MERGE_BASE"
-
-          # Safely extract the file from the PR's head and the merge-base commit
-          git show pr:sherlock_project/resources/data.json > data.json.head
-          git show $MERGE_BASE:sherlock_project/resources/data.json > data.json.base
-
-          # CRITICAL FIX: Overwrite the checked-out data.json with the one from the PR
-          # This ensures that pytest runs against the new, updated file.
-          cp data.json.head sherlock_project/resources/data.json
+          cp sherlock_project/resources/data.json data.json.base
+          git fetch origin pull/${{ github.event.pull_request.number }}/head:pr --depth=1
+          git show pr:sherlock_project/resources/data.json > sherlock_project/resources/data.json
+          cp sherlock_project/resources/data.json data.json.head

      - name: Discover modified targets
        id: discover-modified
@@ -59,16 +47,8 @@ jobs:
          CHANGED=$(
            python - <<'EOF'
          import json
-          import sys
-          try:
-              with open("data.json.base") as f: base = json.load(f)
-              with open("data.json.head") as f: head = json.load(f)
-          except FileNotFoundError as e:
-              print(f"Error: Could not find {e.filename}", file=sys.stderr)
-              sys.exit(1)
-          except json.JSONDecodeError as e:
-              print(f"Error: Could not decode JSON from a file - {e}", file=sys.stderr)
-              sys.exit(1)
+          with open("data.json.base") as f: base = json.load(f)
+          with open("data.json.head") as f: head = json.load(f)

          changed = []
          for k, v in head.items():
@@ -83,19 +63,12 @@ jobs:
          echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
          echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"

-      - name: Validate remote manifest against local schema
-        if: steps.discover-modified.outputs.changed_targets != ''
-        run: |
-          poetry run pytest tests/test_manifest.py::test_validate_manifest_against_local_schema
-
-      # --- The rest of the steps below are unchanged ---
-
      - name: Validate modified targets
-        env:
-          CHANGED_TARGETS: ${{ steps.discover-modified.outputs.changed_targets }}
+        if: steps.discover-modified.outputs.changed_targets != ''
+        continue-on-error: true
        run: |
          poetry run pytest -q --tb no -rA -m validate_targets -n 20 \
-            --chunked-sites "$CHANGED_TARGETS" \
+            --chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" \
            --junitxml=validation_results.xml

      - name: Prepare validation summary
@@ -1,45 +1,39 @@
 #!/usr/bin/env python
 # This module generates the listing of supported sites which can be found in
-# sites.mdx. It also organizes all the sites in alphanumeric order
+# sites.md. It also organizes all the sites in alphanumeric order
 import json
 import os

+
 DATA_REL_URI: str = "sherlock_project/resources/data.json"

-DEFAULT_ENCODING = "utf-8"
-
 # Read the data.json file
-with open(DATA_REL_URI, "r", encoding=DEFAULT_ENCODING) as data_file:
+with open(DATA_REL_URI, "r", encoding="utf-8") as data_file:
    data: dict = json.load(data_file)

 # Removes schema-specific keywords for proper processing
-social_networks = data.copy()
+social_networks: dict = dict(data)
 social_networks.pop('$schema', None)

 # Sort the social networks in alphanumeric order
-social_networks = sorted(social_networks.items())
+social_networks: list = sorted(social_networks.items())

 # Make output dir where the site list will be written
 os.mkdir("output")

-# Write the list of supported sites to sites.mdx
-with open("output/sites.mdx", "w", encoding=DEFAULT_ENCODING) as site_file:
-    site_file.write("---\n")
-    site_file.write("title: 'List of supported sites'\n")
-    site_file.write("sidebarTitle: 'Supported sites'\n")
-    site_file.write("icon: 'globe'\n")
-    site_file.write("description: 'Sherlock currently supports **400+** sites'\n")
-    site_file.write("---\n\n")
-
+# Write the list of supported sites to sites.md
+with open("output/sites.mdx", "w") as site_file:
+    site_file.write("---\ntitle: 'List of supported sites'\nsidebarTitle: 'Supported sites'\nicon: 'globe'\ndescription: 'Sherlock currently supports **400+** sites'\n---\n\n")
    for social_network, info in social_networks:
        url_main = info["urlMain"]
        is_nsfw = "**(NSFW)**" if info.get("isNSFW") else ""
        site_file.write(f"1. [{social_network}]({url_main}) {is_nsfw}\n")

 # Overwrite the data.json file with sorted data
-with open(DATA_REL_URI, "w", encoding=DEFAULT_ENCODING) as data_file:
+with open(DATA_REL_URI, "w") as data_file:
    sorted_data = json.dumps(data, indent=2, sort_keys=True)
    data_file.write(sorted_data)
-    data_file.write("\n")  # Keep the newline after writing data
+    data_file.write("\n")

 print("Finished updating supported site listing!")
+
@@ -23,17 +23,17 @@

 > [!WARNING]  
 > Packages for ParrotOS and Ubuntu 24.04, maintained by a third party, appear to be __broken__.  
-> Users of these systems should defer to [`uv`](https://docs.astral.sh/uv/)/`pipx`/`pip` or Docker.
+> Users of these systems should defer to pipx/pip or Docker.

 | Method | Notes |
 | - | - |
-| `pipx install sherlock-project` | `pip` or [`uv`](https://docs.astral.sh/uv/) may be used in place of `pipx` |
+| `pipx install sherlock-project` | `pip` may be used in place of `pipx` |
 | `docker run -it --rm sherlock/sherlock` |
 | `dnf install sherlock-project` | |

 Community-maintained packages are available for Debian (>= 13), Ubuntu (>= 22.10), Homebrew, Kali, and BlackArch. These packages are not directly supported or maintained by the Sherlock Project.

-See all alternative installation methods [here](https://sherlockproject.xyz/installation).
+See all alternative installation methods [here](https://sherlockproject.xyz/installation)

 ## General usage

@@ -51,42 +51,70 @@ Accounts found will be stored in an individual text file with the corresponding

 ```console
 $ sherlock --help
-usage: sherlock [-h] [--version] [--verbose] [--folderoutput FOLDEROUTPUT] [--output OUTPUT] [--csv] [--xlsx] [--site SITE_NAME] [--proxy PROXY_URL] [--dump-response]
-                [--json JSON_FILE] [--timeout TIMEOUT] [--print-all] [--print-found] [--no-color] [--browse] [--local] [--nsfw] [--txt] [--ignore-exclusions]
+usage: sherlock [-h] [--version] [--verbose] [--folderoutput FOLDEROUTPUT]
+                [--output OUTPUT] [--tor] [--unique-tor] [--csv] [--xlsx]
+                [--site SITE_NAME] [--proxy PROXY_URL] [--json JSON_FILE]
+                [--timeout TIMEOUT] [--print-all] [--print-found] [--no-color]
+                [--browse] [--local] [--nsfw]
                USERNAMES [USERNAMES ...]

-Sherlock: Find Usernames Across Social Networks (Version 0.16.0)
+Sherlock: Find Usernames Across Social Networks (Version 0.14.3)

 positional arguments:
-  USERNAMES             One or more usernames to check with social networks. Check similar usernames using {?} (replace to '_', '-', '.').
+  USERNAMES             One or more usernames to check with social networks.
+                        Check similar usernames using {?} (replace to '_', '-', '.').

-options:
+optional arguments:
  -h, --help            show this help message and exit
  --version             Display version information and dependencies.
  --verbose, -v, -d, --debug
                        Display extra debugging information and metrics.
  --folderoutput FOLDEROUTPUT, -fo FOLDEROUTPUT
-                        If using multiple usernames, the output of the results will be saved to this folder.
+                        If using multiple usernames, the output of the results will be
+                        saved to this folder.
  --output OUTPUT, -o OUTPUT
-                        If using single username, the output of the result will be saved to this file.
+                        If using single username, the output of the result will be saved
+                        to this file.
+  --tor, -t             Make requests over Tor; increases runtime; requires Tor to be
+                        installed and in system path.
+  --unique-tor, -u      Make requests over Tor with new Tor circuit after each request;
+                        increases runtime; requires Tor to be installed and in system
+                        path.
  --csv                 Create Comma-Separated Values (CSV) File.
-  --xlsx                Create the standard file for the modern Microsoft Excel spreadsheet (xlsx).
-  --site SITE_NAME      Limit analysis to just the listed sites. Add multiple options to specify more than one site.
+  --xlsx                Create the standard file for the modern Microsoft Excel
+                        spreadsheet (xlsx).
+  --site SITE_NAME      Limit analysis to just the listed sites. Add multiple options to
+                        specify more than one site.
  --proxy PROXY_URL, -p PROXY_URL
                        Make requests over a proxy. e.g. socks5://127.0.0.1:1080
-  --dump-response       Dump the HTTP response to stdout for targeted debugging.
  --json JSON_FILE, -j JSON_FILE
-                        Load data from a JSON file or an online, valid, JSON file. Upstream PR numbers also accepted.
+                        Load data from a JSON file or an online, valid, JSON file.
  --timeout TIMEOUT     Time (in seconds) to wait for response to requests (Default: 60)
  --print-all           Output sites where the username was not found.
-  --print-found         Output sites where the username was found (also if exported as file).
+  --print-found         Output sites where the username was found.
  --no-color            Don't color terminal output
  --browse, -b          Browse to all results on default browser.
  --local, -l           Force the use of the local data.json file.
  --nsfw                Include checking of NSFW sites from default list.
-  --txt                 Enable creation of a txt file
-  --ignore-exclusions   Ignore upstream exclusions (may return more false positives)
 ```
+## Apify Actor Usage [![Sherlock Actor](https://apify.com/actor-badge?actor=netmilk/sherlock)](https://apify.com/netmilk/sherlock?fpr=sherlock)
+
+<a href="https://apify.com/netmilk/sherlock?fpr=sherlock"><img src="https://apify.com/ext/run-on-apify.png" alt="Run Sherlock Actor on Apify" width="176" height="39" /></a>
+
+You can run Sherlock in the cloud without installation using the [Sherlock Actor](https://apify.com/netmilk/sherlock?fpr=sherlock) on [Apify](https://apify.com?fpr=sherlock) free of charge.
+
+``` bash
+$ echo '{"usernames":["user123"]}' | apify call -so netmilk/sherlock
+[{
+  "username": "user123",
+  "links": [
+    "https://www.1337x.to/user/user123/",
+    ...
+  ]
+}]
+```
+
+Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmatically via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock).

 ## Credits

@@ -96,7 +124,7 @@ Thank you to everyone who has contributed to Sherlock! ❤️
  <img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" alt="contributors"/>
 </a>

-## Star History
+## Star history

 <picture>
  <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date&theme=dark" />
@@ -107,7 +135,7 @@ Thank you to everyone who has contributed to Sherlock! ❤️
 ## License

 MIT © Sherlock Project<br/>
-Creator - [Siddharth Dushantha](https://github.com/sdushantha)
+Original Creator - [Siddharth Dushantha](https://github.com/sdushantha)

 <!-- Reference Links -->

@@ -8,7 +8,7 @@ source = "init"

 [tool.poetry]
 name = "sherlock-project"
-version = "0.16.1"
+version = "0.16.0"
 description = "Hunt down social media accounts by username across social networks"
 license = "MIT"
 authors = [
@@ -29,10 +29,6 @@ classifiers = [
    "Natural Language :: English",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
-    "Programming Language :: Python :: 3.13",
    "Topic :: Security"
 ]
 homepage = "https://sherlockproject.xyz/"
@@ -53,6 +49,7 @@ stem = "^1.8.0"
 pandas = "^2.2.1"
 openpyxl = "^3.0.10"
 tomli = "^2.2.1"
+pyyaml = "^6.0.3"

 [tool.poetry.group.dev.dependencies]
 jsonschema = "^4.0.0"
@@ -37,6 +37,7 @@ class QueryNotify:

        self.result = result

+        # return

    def start(self, message=None):
        """Notify Start.
@@ -55,6 +56,7 @@ class QueryNotify:
        Nothing.
        """

+        # return

    def update(self, result):
        """Notify Update.
@@ -73,6 +75,7 @@ class QueryNotify:

        self.result = result

+        # return

    def finish(self, message=None):
        """Notify Finish.
@@ -91,6 +94,7 @@ class QueryNotify:
        Nothing.
        """

+        # return

    def __str__(self):
        """Convert Object To String.
@@ -133,6 +137,7 @@ class QueryNotifyPrint(QueryNotify):
        self.print_all = print_all
        self.browse = browse

+        return

    def start(self, message):
        """Notify Start.
@@ -158,6 +163,7 @@ class QueryNotifyPrint(QueryNotify):
        # An empty line between first line and the result(more clear output)
        print('\r')

+        return

    def countResults(self):
        """This function counts the number of results. Every time the function is called,
@@ -232,7 +238,7 @@ class QueryNotifyPrint(QueryNotify):
                      Fore.WHITE + "]" +
                      Fore.GREEN + f" {self.result.site_name}:" +
                      Fore.YELLOW + f" {msg}")
-
+                
        elif result.status == QueryStatus.WAF:
            if self.print_all:
                print(Style.BRIGHT + Fore.WHITE + "[" +
@@ -248,9 +254,10 @@ class QueryNotifyPrint(QueryNotify):
                f"Unknown Query Status '{result.status}' for site '{self.result.site_name}'"
            )

+        return

    def finish(self, message="The processing has been finished."):
-        """Notify Finish.
+        """Notify Start.
        Will print the last line to the standard output.
        Keyword Arguments:
        self                   -- This object.
@@ -79,13 +79,13 @@
    "username_claimed": "pink"
  },
  "AllMyLinks": {
-    "errorMsg": "Page not found",
-    "errorType": "message",
-    "regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$",
-    "url": "https://allmylinks.com/{}",
-    "urlMain": "https://allmylinks.com/",
-    "username_claimed": "blue"
-  },
+  "errorMsg": "Page not found",
+  "errorType": "message",
+  "regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$",
+  "url": "https://allmylinks.com/{}",
+  "urlMain": "https://allmylinks.com/",
+  "username_claimed": "blue"
+},
  "AniWorld": {
    "errorMsg": "Dieses Profil ist nicht verf\u00fcgbar",
    "errorType": "message",
@@ -149,12 +149,6 @@
    "urlProbe": "https://archive.org/details/@{}?noscript=true",
    "username_claimed": "blue"
  },
-  "Arduino Forum": {
-    "errorType": "status_code",
-    "url": "https://forum.arduino.cc/u/{}/summary",
-    "urlMain": "https://forum.arduino.cc/",
-    "username_claimed": "system"
-  },
  "ArtStation": {
    "errorType": "status_code",
    "url": "https://www.artstation.com/{}",
@@ -179,12 +173,6 @@
    "urlMain": "https://atcoder.jp/",
    "username_claimed": "ksun48"
  },
-  "Vjudge": {
-    "errorType": "status_code",
-    "url": "https://VJudge.net/user/{}",
-    "urlMain": "https://VJudge.net/",
-    "username_claimed": "tokitsukaze"
-  },
  "Audiojungle": {
    "errorType": "status_code",
    "regexCheck": "^[a-zA-Z0-9_]+$",
@@ -205,13 +193,6 @@
    "urlMain": "https://www.avizo.cz/",
    "username_claimed": "blue"
  },
-  "AWS Skills Profile": {
-    "errorType": "message",
-    "errorMsg": "shareProfileAccepted\":false",
-    "url": "https://skillsprofile.skillbuilder.aws/user/{}/",
-    "urlMain": "https://skillsprofile.skillbuilder.aws",
-    "username_claimed": "mayank04pant"
-  },
  "BOOTH": {
    "errorType": "response_url",
    "errorUrl": "https://booth.pm/",
@@ -278,8 +259,7 @@
    "username_claimed": "blue"
  },
  "Blitz Tactics": {
-    "errorMsg": "That page doesn't exist",
-    "errorType": "message",
+    "errorType": "status_code",
    "url": "https://blitztactics.com/{}",
    "urlMain": "https://blitztactics.com/",
    "username_claimed": "Lance5500"
@@ -298,6 +278,14 @@
    "urlMain": "https://bsky.app/",
    "username_claimed": "mcuban"
  },
+  "BoardGameGeek": {
+    "errorType": "message",
+    "regexCheck": "^[a-zA-Z0-9_]*$",
+    "errorMsg": "User not found",
+    "url": "https://boardgamegeek.com/user/{}",
+    "urlMain": "https://boardgamegeek.com",
+    "username_claimed": "blue"
+  },
  "BongaCams": {
    "errorType": "status_code",
    "isNSFW": true,
@@ -311,27 +299,12 @@
    "urlMain": "https://www.bookcrossing.com/",
    "username_claimed": "blue"
  },
-  "BoardGameGeek": {
-    "errorMsg": "\"isValid\":true",
-    "errorType": "message",
-    "url": "https://boardgamegeek.com/user/{}",
-    "urlMain": "https://boardgamegeek.com/",
-    "urlProbe": "https://api.geekdo.com/api/accounts/validate/username?username={}",
-    "username_claimed": "blue"
-  },
  "BraveCommunity": {
    "errorType": "status_code",
    "url": "https://community.brave.com/u/{}/",
    "urlMain": "https://community.brave.com/",
    "username_claimed": "blue"
  },
-  "BreachSta.rs Forum": {
-    "errorMsg": "<title>Error - BreachStars</title>",
-    "errorType": "message",
-    "url": "https://breachsta.rs/profile/{}",
-    "urlMain": "https://breachsta.rs/",
-    "username_claimed": "Sleepybubble"
-  },
  "BugCrowd": {
    "errorType": "status_code",
    "url": "https://bugcrowd.com/{}",
@@ -352,12 +325,6 @@
    "urlMain": "https://buzzfeed.com/",
    "username_claimed": "blue"
  },
-  "Cfx.re Forum": {
-    "errorType": "status_code",
-    "url": "https://forum.cfx.re/u/{}/summary",
-    "urlMain": "https://forum.cfx.re",
-    "username_claimed": "hightowerlssd"
-  },
  "CGTrader": {
    "errorType": "status_code",
    "regexCheck": "^[^.]*?$",
@@ -447,7 +414,7 @@
  "Chess": {
    "errorMsg": "Username is valid",
    "errorType": "message",
-    "regexCheck": "^[a-zA-Z0-9_]{3,25}$",
+    "regexCheck": "^[a-z1-9]{3,25}$",
    "url": "https://www.chess.com/member/{}",
    "urlMain": "https://www.chess.com/",
    "urlProbe": "https://www.chess.com/callback/user/valid?username={}",
@@ -538,29 +505,12 @@
    "urlMain": "https://coderwall.com",
    "username_claimed": "hacker"
  },
-  "CodeSandbox": {
-    "errorType": "message",
-    "errorMsg": "Could not find user with username",
-    "regexCheck": "^[a-zA-Z0-9_-]{3,30}$",
-    "url": "https://codesandbox.io/u/{}",
-    "urlProbe": "https://codesandbox.io/api/v1/users/{}",
-    "urlMain": "https://codesandbox.io",
-    "username_claimed": "icyjoseph"
-  },
  "Codewars": {
    "errorType": "status_code",
    "url": "https://www.codewars.com/users/{}",
    "urlMain": "https://www.codewars.com",
    "username_claimed": "example"
  },
-  "Codolio": {
-    "errorType": "message",
-    "errorMsg": "<title>Page Not Found | Codolio</title>",
-    "url": "https://codolio.com/profile/{}",
-    "urlMain": "https://codolio.com/",
-    "username_claimed": "testuser",
-    "regexCheck": "^[a-zA-Z0-9_-]{3,30}$"
-  },
  "Coinvote": {
    "errorType": "status_code",
    "url": "https://coinvote.cc/profile/{}",
@@ -587,13 +537,6 @@
    "urlMain": "https://coroflot.com/",
    "username_claimed": "blue"
  },
-  "Cplusplus": {
-    "errorType": "message",
-    "errorMsg": "<title>404 Page Not Found</title>",
-    "url": "https://cplusplus.com/user/{}",
-    "urlMain": "https://cplusplus.com",
-    "username_claimed": "mbozzi"
-  },
  "Cracked": {
    "errorType": "response_url",
    "errorUrl": "https://www.cracked.com/",
@@ -601,19 +544,6 @@
    "urlMain": "https://www.cracked.com/",
    "username_claimed": "blue"
  },
-  "Cracked Forum": {
-    "errorMsg": "The member you specified is either invalid or doesn't exist",
-    "errorType": "message",
-    "url": "https://cracked.sh/{}",
-    "urlMain": "https://cracked.sh/",
-    "username_claimed": "Blue"
-  },
-  "Credly": {
-    "errorType": "status_code",
-    "url": "https://www.credly.com/users/{}",
-    "urlMain": "https://www.credly.com/",
-    "username_claimed": "credly"
-  },
  "Crevado": {
    "errorType": "status_code",
    "regexCheck": "^[\\w@-]+?$",
@@ -628,13 +558,6 @@
    "urlMain": "https://crowdin.com/",
    "username_claimed": "blue"
  },
-  "CryptoHack": {
-    "errorType": "response_url",
-    "errorUrl": "https://cryptohack.org/",
-    "url": "https://cryptohack.org/user/{}/",
-    "urlMain": "https://cryptohack.org/",
-    "username_claimed": "blue"
-  },
  "Cryptomator Forum": {
    "errorType": "status_code",
    "url": "https://community.cryptomator.org/u/{}",
@@ -690,21 +613,21 @@
    "urlMain": "https://www.dealabs.com/",
    "username_claimed": "blue"
  },
-  "DeviantArt": {
-    "errorType": "message",
-    "errorMsg": "Llama Not Found",
-    "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
-    "url": "https://www.deviantart.com/{}",
-    "urlMain": "https://www.deviantart.com/",
-    "username_claimed": "blue"
-  },
+ "DeviantArt": {
+  "errorType": "message",
+  "errorMsg": "Llama Not Found",
+  "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
+  "url": "https://www.deviantart.com/{}",
+  "urlMain": "https://www.deviantart.com/",
+  "username_claimed": "blue"
+},
  "DigitalSpy": {
-    "errorMsg": "The page you were looking for could not be found.",
-    "errorType": "message",
-    "url": "https://forums.digitalspy.com/profile/{}",
-    "urlMain": "https://forums.digitalspy.com/",
-    "username_claimed": "blue",
-    "regexCheck": "^\\w{3,20}$"
+      "errorMsg": "The page you were looking for could not be found.",
+      "errorType": "message",
+      "url": "https://forums.digitalspy.com/profile/{}",
+      "urlMain": "https://forums.digitalspy.com/",
+      "username_claimed": "blue",
+      "regexCheck": "^\\w{3,20}$"
  },
  "Discogs": {
    "errorType": "status_code",
@@ -717,7 +640,10 @@
    "url": "https://discord.com",
    "urlMain": "https://discord.com/",
    "urlProbe": "https://discord.com/api/v9/unique-username/username-attempt-unauthed",
-    "errorMsg": ["{\"taken\":false}", "The resource is being rate limited"],
+    "errorMsg": [
+      "{\"taken\":false}",
+      "The resource is being rate limited"
+    ],
    "request_method": "POST",
    "request_payload": {
      "username": "{}"
@@ -727,28 +653,12 @@
    },
    "username_claimed": "blue"
  },
-  "Discord.bio": {
-    "errorType": "message",
-    "errorMsg": "<title>Server Error (500)</title>",
-    "url": "https://discords.com/api-v2/bio/details/{}",
-    "urlMain": "https://discord.bio/",
-    "username_claimed": "robert"
-  },
  "Discuss.Elastic.co": {
    "errorType": "status_code",
    "url": "https://discuss.elastic.co/u/{}",
    "urlMain": "https://discuss.elastic.co/",
    "username_claimed": "blue"
  },
-  "Diskusjon.no": {
-    "errorMsg": "{\"result\":\"ok\"}",
-    "errorType": "message",
-    "regexCheck": "^[a-zA-Z0-9_.-]{3,40}$",
-    "urlProbe": "https://www.diskusjon.no/?app=core&module=system&controller=ajax&do=usernameExists&input={}",
-    "url": "https://www.diskusjon.no",
-    "urlMain": "https://www.diskusjon.no",
-    "username_claimed": "blue"
-  },
  "Disqus": {
    "errorType": "status_code",
    "url": "https://disqus.com/{}",
@@ -773,6 +683,7 @@
  "Duolingo": {
    "errorMsg": "{\"users\":[]}",
    "errorType": "message",
+
    "url": "https://www.duolingo.com/profile/{}",
    "urlMain": "https://duolingo.com/",
    "urlProbe": "https://www.duolingo.com/2017-06-30/users?username={}",
@@ -945,12 +856,6 @@
    "urlMain": "https://www.gamespot.com/",
    "username_claimed": "blue"
  },
-  "GameFAQs": {
-    "errorType": "status_code",
-    "url": "https://gamefaqs.gamespot.com/community/{}",
-    "urlMain": "https://gamefaqs.gamespot.com",
-    "username_claimed": "blue"
-  },
  "GeeksforGeeks": {
    "errorType": "status_code",
    "url": "https://auth.geeksforgeeks.org/user/{}",
@@ -1010,14 +915,6 @@
    "urlMain": "https://www.github.com/",
    "username_claimed": "blue"
  },
-  "Warframe Market": {
-    "errorType": "status_code",
-    "request_method": "GET",
-    "url": "https://warframe.market/profile/{}",
-    "urlMain": "https://warframe.market/",
-    "urlProbe": "https://api.warframe.market/v2/user/{}",
-    "username_claimed": "kaiallalone"
-  },
  "GitLab": {
    "errorMsg": "[]",
    "errorType": "message",
@@ -1107,14 +1004,18 @@
    "username_claimed": "blazezaria"
  },
  "HackerEarth": {
-    "errorType": "status_code",
+    "errorMsg": "404. URL not found.",
+    "errorType": "message",
    "url": "https://hackerearth.com/@{}",
    "urlMain": "https://hackerearth.com/",
    "username_claimed": "naveennamani877"
  },
  "HackerNews": {
    "__comment__": "First errMsg invalid, second errMsg rate limited. Not ideal. Adjust for better rate limit filtering.",
-    "errorMsg": ["No such user.", "Sorry."],
+    "errorMsg": [
+      "No such user.",
+      "Sorry."
+    ],
    "errorType": "message",
    "url": "https://news.ycombinator.com/user?id={}",
    "urlMain": "https://news.ycombinator.com/",
@@ -1135,18 +1036,6 @@
    "urlMain": "https://hackerrank.com/",
    "username_claimed": "satznova"
  },
-  "HackerSploit": {
-    "errorType": "status_code",
-    "url": "https://forum.hackersploit.org/u/{}",
-    "urlMain": "https://forum.hackersploit.org/",
-    "username_claimed": "hackersploit"
-  },
-  "HackMD": {
-    "errorType": "status_code",
-    "url": "https://hackmd.io/@{}",
-    "urlMain": "https://hackmd.io/",
-    "username_claimed": "blue"
-  },
  "Harvard Scholar": {
    "errorType": "status_code",
    "url": "https://scholar.harvard.edu/{}",
@@ -1167,13 +1056,6 @@
    "urlMain": "https://www.heavy-r.com/",
    "username_claimed": "kilroy222"
  },
-  "Hive Blog": {
-    "errorMsg": "<title>User Not Found - Hive</title>",
-    "errorType": "message",
-    "url": "https://hive.blog/@{}",
-    "urlMain": "https://hive.blog/",
-    "username_claimed": "mango-juice"
-  },
  "Holopin": {
    "errorMsg": "true",
    "errorType": "message",
@@ -1225,12 +1107,6 @@
    "urlMain": "https://www.ifttt.com/",
    "username_claimed": "blue"
  },
-  "Ifunny": {
-    "errorType": "status_code",
-    "url": "https://ifunny.co/user/{}",
-    "urlMain": "https://ifunny.co/",
-    "username_claimed": "agua"
-  },
  "IRC-Galleria": {
    "errorType": "response_url",
    "errorUrl": "https://irc-galleria.net/users/search?username={}",
@@ -1265,12 +1141,6 @@
    "urlProbe": "https://api.imgur.com/account/v1/accounts/{}?client_id=546c25a59c58ad7",
    "username_claimed": "blue"
  },
-  "imood": {
-    "errorType": "status_code",
-    "url": "https://www.imood.com/users/{}",
-    "urlMain": "https://www.imood.com/",
-    "username_claimed": "blue"
-  },
  "Instagram": {
    "errorType": "status_code",
    "url": "https://instagram.com/{}",
@@ -1347,13 +1217,6 @@
    "urlMain": "https://discourse.joplinapp.org/",
    "username_claimed": "laurent"
  },
-  "Jupyter Community Forum": {
-    "errorMsg": "Oops! That page doesn’t exist or is private.",
-    "errorType": "message",
-    "url": "https://discourse.jupyter.org/u/{}/summary",
-    "urlMain": "https://discourse.jupyter.org",
-    "username_claimed": "choldgraf"
-  },
  "Kaggle": {
    "errorType": "status_code",
    "url": "https://www.kaggle.com/{}",
@@ -1363,9 +1226,7 @@
  "kaskus": {
    "errorType": "status_code",
    "url": "https://www.kaskus.co.id/@{}",
-    "urlMain": "https://www.kaskus.co.id",
-    "urlProbe": "https://www.kaskus.co.id/api/users?username={}",
-    "request_method": "GET",
+    "urlMain": "https://www.kaskus.co.id/",
    "username_claimed": "l0mbart"
  },
  "Keybase": {
@@ -1400,15 +1261,6 @@
    "urlMain": "https://www.kongregate.com/",
    "username_claimed": "blue"
  },
-  "Kvinneguiden": {
-    "errorMsg": "{\"result\":\"ok\"}",
-    "errorType": "message",
-    "regexCheck": "^[a-zA-Z0-9_.-]{3,18}$",
-    "urlProbe": "https://forum.kvinneguiden.no/?app=core&module=system&controller=ajax&do=usernameExists&input={}",
-    "url": "https://forum.kvinneguiden.no",
-    "urlMain": "https://forum.kvinneguiden.no",
-    "username_claimed": "blue"
-  },
  "LOR": {
    "errorType": "status_code",
    "url": "https://www.linux.org.ru/people/{}/profile",
@@ -1416,7 +1268,7 @@
    "username_claimed": "red"
  },
  "Laracast": {
-    "errorType": "status_code",
+    "errorType":"status_code",
    "url": "https://laracasts.com/@{}",
    "urlMain": "https://laracasts.com/",
    "regexCheck": "^[a-zA-Z0-9_-]{3,}$",
@@ -1434,19 +1286,11 @@
    "urlMain": "https://leetcode.com/",
    "username_claimed": "blue"
  },
-  "LemmyWorld": {
-    "errorType": "message",
-    "errorMsg": "<h1>Error!</h1>",
-    "url": "https://lemmy.world/u/{}",
-    "urlMain": "https://lemmy.world",
-    "username_claimed": "blue"
-  },
  "LessWrong": {
-    "url": "https://www.lesswrong.com/users/{}",
+    "errorType": "status_code",
+    "url": "https://www.lesswrong.com/users/@{}",
    "urlMain": "https://www.lesswrong.com/",
-    "errorType": "response_url",
-    "errorUrl": "https://www.lesswrong.com/",
-    "username_claimed": "habryka"
+    "username_claimed": "blue"
  },
  "Letterboxd": {
    "errorMsg": "Sorry, we can\u2019t find the page you\u2019ve requested.",
@@ -1471,13 +1315,8 @@
    "urlMain": "https://lichess.org",
    "username_claimed": "john"
  },
- "LinkedIn": {
+  "LinkedIn": {
    "errorType": "status_code",
-    "headers": {
-      "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
-      "Accept-Language": "en-US,en;q=0.9",
-      "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
-    },
    "regexCheck": "^[a-zA-Z0-9]{3,100}$",
    "request_method": "GET",
    "url": "https://linkedin.com/in/{}",
@@ -1562,7 +1401,6 @@
  "Minecraft": {
    "errorMsg": "Couldn't find any profile with name",
    "errorType": "message",
-    "regexCheck": "^.{1,25}$",
    "url": "https://api.mojang.com/users/profiles/minecraft/{}",
    "urlMain": "https://minecraft.net/",
    "username_claimed": "blue"
@@ -1595,13 +1433,6 @@
    "urlMain": "https://www.motorradfrage.net/",
    "username_claimed": "gutefrage"
  },
-  "MuseScore": {
-    "errorType": "status_code",
-    "url": "https://musescore.com/{}",
-    "urlMain": "https://musescore.com/",
-    "username_claimed": "arrangeme",
-    "request_method": "GET"
-  },
  "MyAnimeList": {
    "errorType": "status_code",
    "url": "https://myanimelist.net/profile/{}",
@@ -1615,12 +1446,12 @@
    "username_claimed": "blue"
  },
  "Mydramalist": {
-    "errorMsg": "The requested page was not found",
-    "errorType": "message",
-    "url": "https://www.mydramalist.com/profile/{}",
-    "urlMain": "https://mydramalist.com",
-    "username_claimed": "elhadidy12398"
-  },
+  "errorMsg": "The requested page was not found",
+  "errorType": "message",
+  "url": "https://www.mydramalist.com/profile/{}",
+  "urlMain": "https://mydramalist.com",
+  "username_claimed": "elhadidy12398"
+},
  "Myspace": {
    "errorType": "status_code",
    "url": "https://myspace.com/{}",
@@ -1715,36 +1546,18 @@
    "urlProbe": "https://notabug.org/{}/followers",
    "username_claimed": "red"
  },
-  "Nothing Community": {
-    "errorType": "status_code",
-    "url": "https://nothing.community/u/{}",
-    "urlMain": "https://nothing.community/",
-    "username_claimed": "Carl"
-  },
  "Nyaa.si": {
    "errorType": "status_code",
    "url": "https://nyaa.si/user/{}",
    "urlMain": "https://nyaa.si/",
    "username_claimed": "blue"
  },
-  "ObservableHQ": {
-    "errorType": "message",
-    "errorMsg": "Page not found",
-    "url": "https://observablehq.com/@{}",
-    "urlMain": "https://observablehq.com/",
-    "username_claimed": "mbostock"
-  },
  "Open Collective": {
-    "errorType": "status_code",
+    "errorMsg": "Oops! Page not found",
+    "errorType": "message",
    "url": "https://opencollective.com/{}",
    "urlMain": "https://opencollective.com/",
-    "username_claimed": "sindresorhus"
-  },
-  "OpenGameArt": {
-    "errorType": "status_code",
-    "url": "https://opengameart.org/users/{}",
-    "urlMain": "https://opengameart.org",
-    "username_claimed": "ski"
+    "username_claimed": "pylapp"
  },
  "OpenStreetMap": {
    "errorType": "status_code",
@@ -1753,13 +1566,6 @@
    "urlMain": "https://www.openstreetmap.org/",
    "username_claimed": "blue"
  },
-  "Odysee": {
-    "errorMsg": "<link rel=\"canonical\" content=\"odysee.com\"/>",
-    "errorType": "message",
-    "url": "https://odysee.com/@{}",
-    "urlMain": "https://odysee.com/",
-    "username_claimed": "Odysee"
-  },
  "Opensource": {
    "errorType": "status_code",
    "url": "https://opensource.com/users/{}",
@@ -1808,13 +1614,6 @@
    "urlMain": "https://pastebin.com/",
    "username_claimed": "blue"
  },
-  "Patched": {
-    "errorMsg": "The member you specified is either invalid or doesn't exist.",
-    "errorType": "message",
-    "url": "https://patched.sh/User/{}",
-    "urlMain": "https://patched.sh/",
-    "username_claimed": "blue"
-  },
  "Patreon": {
    "errorType": "status_code",
    "url": "https://www.patreon.com/{}",
@@ -1828,68 +1627,12 @@
    "urlMain": "https://pentesterlab.com/",
    "username_claimed": "0day"
  },
-  "HotUKdeals": {
-    "errorType": "status_code",
-    "url": "https://www.hotukdeals.com/profile/{}",
-    "urlMain": "https://www.hotukdeals.com/",
-    "username_claimed": "Blue",
-    "request_method": "GET"
-  },
-  "Mydealz": {
-    "errorType": "status_code",
-    "url": "https://www.mydealz.de/profile/{}",
-    "urlMain": "https://www.mydealz.de/",
-    "username_claimed": "blue",
-    "request_method": "GET"
-  },
-  "Chollometro": {
-    "errorType": "status_code",
-    "url": "https://www.chollometro.com/profile/{}",
-    "urlMain": "https://www.chollometro.com/",
-    "username_claimed": "blue",
-    "request_method": "GET"
-  },
-  "PepperNL": {
-    "errorType": "status_code",
-    "url": "https://nl.pepper.com/profile/{}",
-    "urlMain": "https://nl.pepper.com/",
-    "username_claimed": "Dynaw",
-    "request_method": "GET"
-  },
-  "PepperPL": {
-    "errorType": "status_code",
-    "url": "https://www.pepper.pl/profile/{}",
-    "urlMain": "https://www.pepper.pl/",
-    "username_claimed": "FireChicken",
-    "request_method": "GET"
-  },
-  "Preisjaeger": {
-    "errorType": "status_code",
-    "url": "https://www.preisjaeger.at/profile/{}",
-    "urlMain": "https://www.preisjaeger.at/",
-    "username_claimed": "Stefan",
-    "request_method": "GET"
-  },
-  "Pepperdeals": {
-    "errorType": "status_code",
-    "url": "https://www.pepperdeals.se/profile/{}",
-    "urlMain": "https://www.pepperdeals.se/",
-    "username_claimed": "Mark",
-    "request_method": "GET"
-  },
-  "PepperealsUS": {
-    "errorType": "status_code",
-    "url": "https://www.pepperdeals.com/profile/{}",
-    "urlMain": "https://www.pepperdeals.com/",
-    "username_claimed": "Stepan",
-    "request_method": "GET"
-  },
-  "Promodescuentos": {
-    "errorType": "status_code",
-    "url": "https://www.promodescuentos.com/profile/{}",
-    "urlMain": "https://www.promodescuentos.com/",
-    "username_claimed": "blue",
-    "request_method": "GET"
+  "PepperIT": {
+    "errorMsg": "La pagina che hai provato a raggiungere non si trova qui",
+    "errorType": "message",
+    "url": "https://www.pepper.it/profile/{}/overview",
+    "urlMain": "https://www.pepper.it",
+    "username_claimed": "asoluinostrisca"
  },
  "Periscope": {
    "errorType": "status_code",
@@ -1984,11 +1727,11 @@
    "username_claimed": "pylapp"
  },
  "Pychess": {
-    "errorType": "message",
-    "errorMsg": "404",
-    "url": "https://www.pychess.org/@/{}",
-    "urlMain": "https://www.pychess.org",
-    "username_claimed": "gbtami"
+  "errorType": "message",
+  "errorMsg": "404",
+  "url": "https://www.pychess.org/@/{}",
+  "urlMain": "https://www.pychess.org",
+  "username_claimed": "gbtami"
  },
  "PromoDJ": {
    "errorType": "status_code",
@@ -1996,12 +1739,6 @@
    "urlMain": "http://promodj.com/",
    "username_claimed": "blue"
  },
-  "Pronouns.page": {
-    "errorType": "status_code",
-    "url": "https://pronouns.page/@{}",
-    "urlMain": "https://pronouns.page/",
-    "username_claimed": "andrea"
-  },
  "PyPi": {
    "errorType": "status_code",
    "url": "https://pypi.org/user/{}",
@@ -2009,13 +1746,6 @@
    "urlMain": "https://pypi.org",
    "username_claimed": "Blue"
  },
-  "Python.org Discussions": {
-    "errorMsg": "Oops! That page doesn’t exist or is private.",
-    "errorType": "message",
-    "url": "https://discuss.python.org/u/{}/summary",
-    "urlMain": "https://discuss.python.org",
-    "username_claimed": "pablogsal"
-  },
  "Rajce.net": {
    "errorType": "status_code",
    "regexCheck": "^[\\w@-]+?$",
@@ -2064,13 +1794,6 @@
    "urlMain": "https://www.reddit.com/",
    "username_claimed": "blue"
  },
-  "Realmeye": {
-    "errorMsg": "Sorry, but we either:",
-    "errorType": "message",
-    "url": "https://www.realmeye.com/player/{}",
-    "urlMain": "https://www.realmeye.com/",
-    "username_claimed": "rotmg"
-  },
  "Reisefrage": {
    "errorType": "status_code",
    "url": "https://www.reisefrage.net/nutzer/{}",
@@ -2118,13 +1841,6 @@
    "urlMain": "https://royalcams.com",
    "username_claimed": "asuna-black"
  },
-  "Ruby Forums": {
-    "errorMsg": "Oops! That page doesn’t exist or is private.",
-    "errorType": "message",
-    "url": "https://ruby-forum.com/u/{}/summary",
-    "urlMain": "https://ruby-forums.com",
-    "username_claimed": "rishard"
-  },
  "RubyGems": {
    "errorType": "status_code",
    "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]{1,40}",
@@ -2172,18 +1888,6 @@
    "urlMain": "https://www.scribd.com/",
    "username_claimed": "blue"
  },
-  "SEOForum": {
-    "errorType": "status_code",
-    "url": "https://seoforum.com/@{}",
-    "urlMain": "https://www.seoforum.com/",
-    "username_claimed": "ko"
-  },
-  "Shelf": {
-    "errorType": "status_code",
-    "url": "https://www.shelf.im/{}",
-    "urlMain": "https://www.shelf.im/",
-    "username_claimed": "blue"
-  },
  "ShitpostBot5000": {
    "errorType": "status_code",
    "url": "https://www.shitpostbot.com/user/{}",
@@ -2337,6 +2041,7 @@
  },
  "Spotify": {
    "errorType": "status_code",
+
    "url": "https://open.spotify.com/user/{}",
    "urlMain": "https://open.spotify.com/",
    "username_claimed": "blue"
@@ -2348,13 +2053,6 @@
    "urlMain": "https://robertsspaceindustries.com/",
    "username_claimed": "blue"
  },
-  "Status Cafe": {
-    "errorMsg": "Page Not Found",
-    "errorType": "message",
-    "url": "https://status.cafe/users/{}",
-    "urlMain": "https://status.cafe/",
-    "username_claimed": "blue"
-  },
  "Steam Community (Group)": {
    "errorMsg": "No group could be retrieved for the given URL",
    "errorType": "message",
@@ -2390,22 +2088,6 @@
    "urlProbe": "https://ch.tetr.io/api/users/{}",
    "username_claimed": "osk"
  },
-  "TheMovieDB": {
-    "errorType": "status_code",
-    "url": "https://www.themoviedb.org/u/{}",
-    "urlMain": "https://www.themoviedb.org/",
-    "username_claimed": "blue"
-  },
-  "TikTok": {
-    "url": "https://www.tiktok.com/@{}",
-    "urlMain": "https://www.tiktok.com",
-    "errorType": "message",
-    "errorMsg": [
-      "\"statusCode\":10221",
-      "Govt. of India decided to block 59 apps"
-    ],
-    "username_claimed": "charlidamelio"
-  },
  "Tiendanube": {
    "url": "https://{}.mitiendanube.com/",
    "urlMain": "https://www.tiendanube.com/",
@@ -2418,13 +2100,7 @@
    "urlMain": "https://topcoder.com/",
    "username_claimed": "USER",
    "urlProbe": "https://api.topcoder.com/v5/members/{}",
-    "regexCheck": "^[a-zA-Z0-9_.]+$"
-  },
-  "Topmate": {
-    "errorType": "status_code",
-    "url": "https://topmate.io/{}",
-    "urlMain": "https://topmate.io/",
-    "username_claimed": "blue"
+    "regexCheck": "[a-zA-Z0-9 ]"
  },
  "TRAKTRAIN": {
    "errorType": "status_code",
@@ -2456,13 +2132,6 @@
    "urlMain": "https://tenor.com/",
    "username_claimed": "red"
  },
-  "Terraria Forums": {
-    "errorMsg": "The following members could not be found",
-    "errorType": "message",
-    "url": "https://forums.terraria.org/index.php?search/42798315/&c[users]={}&o=relevance",
-    "urlMain": "https://forums.terraria.org/index.php",
-    "username_claimed": "blue"
-  },
  "ThemeForest": {
    "errorType": "status_code",
    "url": "https://themeforest.net/user/{}",
@@ -2539,21 +2208,6 @@
    "urlMain": "https://tweakers.net",
    "username_claimed": "femme"
  },
-  "Twitch": {
-    "errorMsg": "content='Twitch is the world&#39;s leading video platform and community for gamers.'",
-    "errorType": "message",
-    "url": "https://www.twitch.tv/{}",
-    "urlMain": "https://www.twitch.tv",
-    "username_claimed": "xqc"
-  },
-
-  "Trovo": {
-    "errorMsg": "Uh Ohhh...",
-    "errorType": "message",
-    "url": "https://trovo.live/s/{}/",
-    "urlMain": "https://trovo.live",
-    "username_claimed": "Aimilios"
-  },
  "Twitter": {
    "errorMsg": [
      "<div class=\"error-panel\"><span>User ",
@@ -2592,13 +2246,6 @@
    "urlMain": "https://untappd.com/",
    "username_claimed": "untappd"
  },
-  "Valorant Forums": {
-    "errorMsg": "The page you requested could not be found.",
-    "errorType": "message",
-    "url": "https://valorantforums.com/u/{}",
-    "urlMain": "https://valorantforums.com",
-    "username_claimed": "Wolves"
-  },
  "VK": {
    "errorType": "response_url",
    "errorUrl": "https://www.quora.com/profile/{}",
@@ -2626,7 +2273,9 @@
    "username_claimed": "red"
  },
  "Venmo": {
-    "errorMsg": ["Venmo | Page Not Found"],
+    "errorMsg": [
+      "Venmo | Page Not Found"
+    ],
    "errorType": "message",
    "headers": {
      "Host": "account.venmo.com"
@@ -2671,12 +2320,6 @@
    "urlMain": "https://discourse.wicg.io/",
    "username_claimed": "stefano"
  },
-  "Wakatime": {
-    "errorType": "status_code",
-    "url": "https://wakatime.com/@{}",
-    "urlMain": "https://wakatime.com/",
-    "username_claimed": "blue"
-  },
  "Warrior Forum": {
    "errorType": "status_code",
    "url": "https://www.warriorforum.com/members/{}.html",
@@ -2821,6 +2464,7 @@
  },
  "YouTube": {
    "errorType": "status_code",
+
    "url": "https://www.youtube.com/@{}",
    "urlMain": "https://www.youtube.com/",
    "username_claimed": "youtube"
@@ -3043,12 +2687,6 @@
    "urlMain": "https://mastodon.xyz/",
    "username_claimed": "TheKinrar"
  },
-  "mstdn.social": {
-    "errorType": "status_code",
-    "url": "https://mstdn.social/@{}",
-    "urlMain": "https://mstdn.social/",
-    "username_claimed": "MagicLike"
-  },
  "mercadolivre": {
    "errorType": "status_code",
    "url": "https://www.mercadolivre.com.br/perfil/{}",
@@ -3081,12 +2719,6 @@
    "urlMain": "https://www.nairaland.com/",
    "username_claimed": "red"
  },
-  "n8n Community": {
-    "errorType": "status_code",
-    "url": "https://community.n8n.io/u/{}/summary",
-    "urlMain": "https://community.n8n.io/",
-    "username_claimed": "n8n"
-  },
  "nnRU": {
    "errorType": "status_code",
    "regexCheck": "^[\\w@-]+?$",
@@ -3141,14 +2773,6 @@
    "urlMain": "https://pikabu.ru/",
    "username_claimed": "blue"
  },
-  "Pinterest": {
-    "errorType": "status_code",
-    "errorUrl": "https://www.pinterest.com/",
-    "url": "https://www.pinterest.com/{}/",
-    "urlProbe": "https://www.pinterest.com/oembed.json?url=https://www.pinterest.com/{}/",
-    "urlMain": "https://www.pinterest.com/",
-    "username_claimed": "blue"
-  },
  "pr0gramm": {
    "errorType": "status_code",
    "url": "https://pr0gramm.com/user/{}",
@@ -3214,7 +2838,7 @@
    "url": "https://{}.tumblr.com/",
    "urlMain": "https://www.tumblr.com/",
    "username_claimed": "goku"
-  },
+},
  "uid": {
    "errorType": "status_code",
    "url": "http://uid.me/{}",
@@ -3241,6 +2865,13 @@
    "urlMain": "https://znanylekarz.pl",
    "username_claimed": "janusz-nowak"
  },
+  "Bluesky": {
+    "errorType": "status_code",
+    "url": "https://bsky.app/profile/{}.bsky.social",
+    "urlProbe": "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor={}.bsky.social",
+    "urlMain": "https://bsky.app/",
+    "username_claimed": "mcuban"
+  },
  "Platzi": {
    "errorType": "status_code",
    "errorCode": 404,
@@ -3248,36 +2879,5 @@
    "urlMain": "https://platzi.com/",
    "username_claimed": "freddier",
    "request_method": "GET"
-  },
-  "BabyRu": {
-    "url": "https://www.baby.ru/u/{}",
-    "urlMain": "https://www.baby.ru/",
-    "errorType": "message",
-    "errorMsg": [
-      "\u0421\u0442\u0440\u0430\u043d\u0438\u0446\u0430, \u043a\u043e\u0442\u043e\u0440\u0443\u044e \u0432\u044b \u0438\u0441\u043a\u0430\u043b\u0438, \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u0430",
-      "\u0414\u043e\u0441\u0442\u0443\u043f \u0441 \u0432\u0430\u0448\u0435\u0433\u043e IP-\u0430\u0434\u0440\u0435\u0441\u0430 \u0432\u0440\u0435\u043c\u0435\u043d\u043d\u043e \u043e\u0433\u0440\u0430\u043d\u0438\u0447\u0435\u043d"
-    ],
-    "username_claimed": "example"
-  },
-  "Wowhead": {
-    "url": "https://wowhead.com/user={}",
-    "urlMain": "https://wowhead.com/",
-    "errorType": "status_code",
-    "errorCode": 404,
-    "username_claimed": "blue"
-  },
-  "addons.wago.io": {
-    "url": "https://addons.wago.io/user/{}",
-    "urlMain": "https://addons.wago.io/",
-    "errorType": "status_code",
-    "errorCode": 404,
-    "username_claimed": "blue"
-  },
-  "CurseForge": {
-    "url": "https://www.curseforge.com/members/{}/projects",
-    "urlMain": "https://www.curseforge.com.",
-    "errorType": "status_code",
-    "errorCode": 404,
-    "username_claimed": "blue"
  }
 }
@@ -675,6 +675,16 @@ def main():
        help="Include checking of NSFW sites from default list.",
    )

+    # TODO deprecated in favor of --txt, retained for workflow compatibility, to be removed
+    # in future release
+    parser.add_argument(
+        "--no-txt",
+        action="store_true",
+        dest="no_txt",
+        default=False,
+        help="Disable creation of a txt file - WILL BE DEPRECATED",
+    )
+
    parser.add_argument(
        "--txt",
        action="store_true",
@@ -915,8 +925,8 @@ def main():
                {
                    "username": usernames,
                    "name": names,
-                    "url_main": [f'=HYPERLINK(\"{u}\")' for u in url_main],
-                    "url_user": [f'=HYPERLINK(\"{u}\")' for u in url_user],
+                    "url_main": url_main,
+                    "url_user": url_user,
                    "exists": exists,
                    "http_status": http_status,
                    "response_time_s": response_time_s,
@@ -8,7 +8,7 @@ import requests
 import secrets


-MANIFEST_URL = "https://data.sherlockproject.xyz"
+MANIFEST_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.json"
 EXCLUSIONS_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/refs/heads/exclusions/false_positive_exclusions.txt"

 class SiteInformation:
@@ -121,6 +121,11 @@ class SitesInformation:
            # users from creating issue about false positives which has already been fixed or having outdated data
            data_file_path = MANIFEST_URL

+        # Ensure that specified data file has correct extension.
+        if not data_file_path.lower().endswith(".json"):
+            raise FileNotFoundError(f"Incorrect JSON file extension for data file '{data_file_path}'.")
+
+        # if "http://"  == data_file_path[:7].lower() or "https://" == data_file_path[:8].lower():
        if data_file_path.lower().startswith("http"):
            # Reference is to a URL.
            try:
@@ -0,0 +1,84 @@
+import requests
+import yaml
+
+
+NUCLEI_FINGERPRINT_URL: str = "https://raw.githubusercontent.com/projectdiscovery/nuclei-templates/refs/heads/main/http/global-matchers/global-waf-detect.yaml"
+
+def _check_nuclei_regex(matcher: dict[str,str|list[str]], response: requests.Response) -> bool:
+    import re
+
+    and_cond: bool = matcher.get('condition', '') == 'and'
+
+    target_text: str
+    if matcher['part'] == 'body':
+        target_text = response.text
+    elif matcher['part'] == 'header':
+        target_text = str(response.headers)
+    else:
+        target_text = response.text + str(response.headers)
+
+    for regex in matcher['regex']:
+        if re.search(regex, target_text):
+            if not and_cond:
+                return True
+        else:
+            break
+    else:
+        # `and` conditions will cycle, resulting in this default return True
+        # unless an early failed detection breaks the loop (resulting in False)
+        return True
+    return False
+
+def _check_nuclei_words(matcher: dict[str,str|list[str]], response: requests.Response) -> bool:
+    and_cond: bool = matcher.get('condition', '') == 'and'
+
+    target_text: str
+    if matcher['part'] == 'body':
+        target_text = response.text
+    elif matcher['part'] == 'header':
+        target_text = str(response.headers)
+    else:
+        target_text = response.text + str(response.headers)
+
+    for word in matcher['words']:
+        if word in target_text:
+            if not and_cond:
+                return True
+        else:
+            break
+    else:
+        # `and` conditions will cycle, resulting in this default return True
+        # unless an early failed detection breaks the loop (resulting in False)
+        return True
+    return False
+
+def fetch_nuclei_fingerprints() -> list[dict[str,str|list[str]]] | None:
+    """Fetch the latest Nuclei WAF fingerprints from the official repository."""
+    try:
+        response = requests.get(NUCLEI_FINGERPRINT_URL, timeout=10)
+        response.raise_for_status()
+        raw = yaml.safe_load(response.text)
+        fingerprints: list[dict[str,str|list[str]]] = raw['http'][0]['matchers']
+        return fingerprints
+    except requests.RequestException as e:
+        print(f"Error fetching Nuclei fingerprints: {e}")
+        return None
+    except yaml.YAMLError as e:
+        print(f"Error parsing YAML data: {e}")
+        return None
+
+def nuclei_check(response: requests.Response, fingerprints: list[dict[str,str|list[str]]]) -> bool:
+    """Check if the response matches any of the WAF fingerprints.
+
+    Keyword arguments:
+    response -- The HTTP response to check.
+    fingerprints -- The list of Nuclei WAF fingerprints to check against.
+
+    Returns True if a WAF is detected, False otherwise.
+    """
+    for matcher in fingerprints:
+        if matcher['type'] == 'word':
+            return _check_nuclei_words(matcher, response)
+        elif matcher['type'] == 'regex':
+            return _check_nuclei_regex(matcher, response)
+    return False
@@ -0,0 +1,26 @@
+id: global-waf-detect
+http:
+  - global-matchers: true
+    matchers-condition: or
+    matchers:
+      - type: regex
+        name: regexSite
+        regex:
+          - '(?i)access.to.this.page.has.been.denied'
+          - '(?i)http(s)?://(www.)?anotheroneblocked.\w+.whywasiblocked'
+        condition: or
+        part: response
+
+      - type: word
+        name: wordSiteBody
+        part: body
+        words:
+          - "bad_text_in_body"
+
+      - type: word
+        name: wordSiteHead
+        part: header
+        condition: or
+        words:
+          - "text_in_head"
+          - "other_in_head"
@@ -16,7 +16,6 @@ def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUAN
    """Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`."""
    def replace_upper_bound(match: re.Match) -> str: # type: ignore
        lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore
-        nonlocal upper_bound
        upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore  # noqa: F823
        return f'{{{lower_bound},{upper_bound}}}'

@@ -0,0 +1,107 @@
+import os
+import unittest
+from unittest.mock import patch, Mock
+import requests
+from requests.structures import CaseInsensitiveDict
+import yaml
+
+from sherlock_project import waf_check
+
+
+TEMPLATE_BODY_PATH: str = os.path.join(os.path.dirname(__file__), 'mocks', 'global_waf_detect.yaml')
+
+def side_effect(url, **kwargs) -> Mock:
+    if url == waf_check.NUCLEI_FINGERPRINT_URL:
+        with open(TEMPLATE_BODY_PATH, 'r', encoding='utf-8') as file:
+            template_body: str = file.read()
+        mock_response: Mock = Mock()
+        mock_response.status_code = 200
+        mock_response.text = template_body
+        return mock_response
+    raise RuntimeError("Unexpected URL")
+
+class TestWafCheck(unittest.TestCase):
+
+    @patch('sherlock_project.waf_check.requests.get')
+    def test_fetch_nuclei_fingerprints(self, mock_requests_get): # type: ignore
+        mock_requests_get.side_effect = side_effect
+
+        result = waf_check.fetch_nuclei_fingerprints()
+
+        with open(TEMPLATE_BODY_PATH, 'r', encoding='utf-8') as file:
+            template_body: str = file.read()
+
+        expected: list[dict[str, str | list[str]]] = yaml.safe_load(template_body)['http'][0]['matchers']
+        self.assertEqual(result, expected)
+
+    def test_nuclei_regex_check(self):
+        mock_res: requests.Response = requests.Response()
+        mock_res.status_code = 200
+        mock_res._content = b"This is a test response with Test-Regex in the body."
+        mock_res.headers = CaseInsensitiveDict({
+            'Content-Type': 'text/html',
+            'Server': 'TestServer'
+        })
+        matcher: dict[str, str | list[str]] = {
+            'type': 'regex',
+            'name': 'test-regex',
+            'part': 'body',
+            'regex': [r'(?i)not-present'],
+            'condition': 'or'
+        }
+        self.assertFalse(waf_check._check_nuclei_regex(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
+
+        matcher['regex'] = [r'(?i)TeSt-REgEx']
+        self.assertTrue(waf_check._check_nuclei_regex(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
+
+        matcher['regex'] = [r'(?i)TeSt-REgEx', r'(?i)Not-Present']
+        self.assertTrue(waf_check._check_nuclei_regex(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
+
+        matcher['condition'] = 'and'
+        self.assertFalse(waf_check._check_nuclei_regex(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
+
+        matcher['part'] = 'header'
+        matcher['regex'] = [r'(?i)testserver']
+        self.assertTrue(waf_check._check_nuclei_regex(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
+
+        matcher['part'] = 'response'
+        self.assertTrue(waf_check._check_nuclei_regex(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
+
+        matcher['regex'] = [r'(?i)not-present']
+        self.assertFalse(waf_check._check_nuclei_regex(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
+
+    def test_nuclei_words_check(self):
+        mock_res: requests.Response = requests.Response()
+        mock_res.status_code = 200
+        mock_res._content = b"This is a test response with test-words in the body."
+        mock_res.headers = CaseInsensitiveDict({
+            'Content-Type': 'text/html',
+            'Server': 'TestServer'
+        })
+        matcher: dict[str, str | list[str]] = {
+            'type': 'word',
+            'name': 'test-word',
+            'part': 'body',
+            'words': ['not-present'],
+            'condition': 'or'
+        }
+        self.assertFalse(waf_check._check_nuclei_words(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
+
+        matcher['words'] = ['test-word']
+        self.assertTrue(waf_check._check_nuclei_words(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
+
+        matcher['words'] = ['test-word', 'Not-Present']
+        self.assertTrue(waf_check._check_nuclei_words(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
+
+        matcher['condition'] = 'and'
+        self.assertFalse(waf_check._check_nuclei_words(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
+
+        matcher['part'] = 'header'
+        matcher['words'] = ['testserver']
+        self.assertFalse(waf_check._check_nuclei_words(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
+
+        matcher['words'] = ['TestServer']
+        self.assertTrue(waf_check._check_nuclei_words(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
+
+        matcher['part'] = 'response'
+        self.assertTrue(waf_check._check_nuclei_words(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]