fix: mastodon.cloud

Co-authored-by: dollaransh17 <dollaransh17@users.noreply.github.com>
2025-10-05 14:32:54 -04:00
16 changed files with 380 additions and 908 deletions
@@ -65,7 +65,7 @@ The Actor provides three types of outputs:
 | Field | Type | Required | Description |
 |-------|------|----------|-------------|
 | `username` | string | Yes | Username the search was conducted for |
-| `links` | array | Yes | Array with found links to the social media |
+| `links` | arrray | Yes | Array with found links to the social media |
 | `links[]`| string | No | URL to the account

 ### Example Dataset Item (JSON)
@@ -1,5 +1,5 @@
 ### REPOSITORY
-/.github/CODEOWNERS @sdushantha @ppfeister
+/.github/CODEOWNERS @sdushantha
 /.github/FUNDING.yml @sdushantha
 /LICENSE @sdushantha

@@ -11,7 +11,6 @@ on:
      - '**/*.py'
      - '**/*.ini'
      - '**/*.toml'
-      - 'Dockerfile'
  push:
    branches:
      - master
@@ -22,17 +21,15 @@ on:
      - '**/*.py'
      - '**/*.ini'
      - '**/*.toml'
-      - 'Dockerfile'

 jobs:
  tox-lint:
+    # Linting is ran through tox to ensure that the same linter is used by local runners
    runs-on: ubuntu-latest
-    # Linting is run through tox to ensure that the same linter
-    # is used by local runners
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v4
      - name: Set up linting environment
-        uses: actions/setup-python@v6
+        uses: actions/setup-python@v5
        with:
          python-version: '3.x'
      - name: Install tox and related dependencies
@@ -44,8 +41,7 @@ jobs:
  tox-matrix:
    runs-on: ${{ matrix.os }}
    strategy:
-      # We want to know what specific versions it fails on
-      fail-fast: false
+      fail-fast: false # We want to know what specicic versions it fails on
      matrix:
        os: [
          ubuntu-latest,
@@ -57,13 +53,11 @@ jobs:
          '3.11',
          '3.12',
          '3.13',
-          '3.14',
-          '3.14t',
        ]
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v4
      - name: Set up environment ${{ matrix.python-version }}
-        uses: actions/setup-python@v6
+        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install tox and related dependencies
@@ -73,22 +67,3 @@ jobs:
          pip install tox-gh-actions
      - name: Run tox
        run: tox
-  docker-build-test:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v6
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      - name: Get version from pyproject.toml
-        id: get-version
-        run: |
-          VERSION=$(grep -m1 'version = ' pyproject.toml | cut -d'"' -f2)
-          echo "version=$VERSION" >> $GITHUB_OUTPUT
-      - name: Build Docker image
-        run: |
-          docker build \
-            --build-arg VERSION_TAG=${{ steps.get-version.outputs.version }} \
-            -t sherlock-test:latest .
-      - name: Test Docker image runs
-        run: docker run --rm sherlock-test:latest --version
@@ -17,41 +17,29 @@ jobs:
      - name: Checkout repository
        uses: actions/checkout@v5
        with:
-          # Checkout the base branch but fetch all history to avoid a second fetch call
          ref: ${{ github.base_ref }}
-          fetch-depth: 0
-          persist-credentials: false
+          fetch-depth: 1

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
-          python-version: "3.13"
+          python-version: '3.13'

      - name: Install Poetry
        uses: abatilo/actions-poetry@v4
        with:
-          poetry-version: "latest"
+          poetry-version: 'latest'

      - name: Install dependencies
        run: |
          poetry install --no-interaction --with dev

-      - name: Prepare JSON versions for comparison
+      - name: Drop in place updated manifest from base
        run: |
-          # Fetch only the PR's branch head (single network call in this step)
-          git fetch origin pull/${{ github.event.pull_request.number }}/head:pr
-
-          # Find the merge-base commit between the target branch and the PR branch
-          MERGE_BASE=$(git merge-base origin/${{ github.base_ref }} pr)
-          echo "Comparing PR head against merge-base commit: $MERGE_BASE"
-
-          # Safely extract the file from the PR's head and the merge-base commit
-          git show pr:sherlock_project/resources/data.json > data.json.head
-          git show $MERGE_BASE:sherlock_project/resources/data.json > data.json.base
-
-          # CRITICAL FIX: Overwrite the checked-out data.json with the one from the PR
-          # This ensures that pytest runs against the new, updated file.
-          cp data.json.head sherlock_project/resources/data.json
+          cp sherlock_project/resources/data.json data.json.base
+          git fetch origin pull/${{ github.event.pull_request.number }}/head:pr --depth=1
+          git show pr:sherlock_project/resources/data.json > sherlock_project/resources/data.json
+          cp sherlock_project/resources/data.json data.json.head

      - name: Discover modified targets
        id: discover-modified
@@ -59,16 +47,8 @@ jobs:
          CHANGED=$(
            python - <<'EOF'
          import json
-          import sys
-          try:
-              with open("data.json.base") as f: base = json.load(f)
-              with open("data.json.head") as f: head = json.load(f)
-          except FileNotFoundError as e:
-              print(f"Error: Could not find {e.filename}", file=sys.stderr)
-              sys.exit(1)
-          except json.JSONDecodeError as e:
-              print(f"Error: Could not decode JSON from a file - {e}", file=sys.stderr)
-              sys.exit(1)
+          with open("data.json.base") as f: base = json.load(f)
+          with open("data.json.head") as f: head = json.load(f)

          changed = []
          for k, v in head.items():
@@ -83,19 +63,12 @@ jobs:
          echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
          echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"

-      - name: Validate remote manifest against local schema
-        if: steps.discover-modified.outputs.changed_targets != ''
-        run: |
-          poetry run pytest tests/test_manifest.py::test_validate_manifest_against_local_schema
-
-      # --- The rest of the steps below are unchanged ---
-
      - name: Validate modified targets
-        env:
-          CHANGED_TARGETS: ${{ steps.discover-modified.outputs.changed_targets }}
+        if: steps.discover-modified.outputs.changed_targets != ''
+        continue-on-error: true
        run: |
          poetry run pytest -q --tb no -rA -m validate_targets -n 20 \
-            --chunked-sites "$CHANGED_TARGETS" \
+            --chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" \
            --junitxml=validation_results.xml

      - name: Prepare validation summary
@@ -4,7 +4,7 @@
  # 3. Build image with BOTH latest and version tags
    # i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .`

-FROM python:3.12-slim-bullseye AS build
+FROM python:3.12-slim-bullseye as build
 WORKDIR /sherlock

 RUN pip3 install --no-cache-dir --upgrade pip
@@ -1,45 +1,39 @@
 #!/usr/bin/env python
 # This module generates the listing of supported sites which can be found in
-# sites.mdx. It also organizes all the sites in alphanumeric order
+# sites.md. It also organizes all the sites in alphanumeric order
 import json
 import os

+
 DATA_REL_URI: str = "sherlock_project/resources/data.json"

-DEFAULT_ENCODING = "utf-8"
-
 # Read the data.json file
-with open(DATA_REL_URI, "r", encoding=DEFAULT_ENCODING) as data_file:
+with open(DATA_REL_URI, "r", encoding="utf-8") as data_file:
    data: dict = json.load(data_file)

 # Removes schema-specific keywords for proper processing
-social_networks = data.copy()
+social_networks: dict = dict(data)
 social_networks.pop('$schema', None)

 # Sort the social networks in alphanumeric order
-social_networks = sorted(social_networks.items())
+social_networks: list = sorted(social_networks.items())

 # Make output dir where the site list will be written
 os.mkdir("output")

-# Write the list of supported sites to sites.mdx
-with open("output/sites.mdx", "w", encoding=DEFAULT_ENCODING) as site_file:
-    site_file.write("---\n")
-    site_file.write("title: 'List of supported sites'\n")
-    site_file.write("sidebarTitle: 'Supported sites'\n")
-    site_file.write("icon: 'globe'\n")
-    site_file.write("description: 'Sherlock currently supports **400+** sites'\n")
-    site_file.write("---\n\n")
-
+# Write the list of supported sites to sites.md
+with open("output/sites.mdx", "w") as site_file:
+    site_file.write("---\ntitle: 'List of supported sites'\nsidebarTitle: 'Supported sites'\nicon: 'globe'\ndescription: 'Sherlock currently supports **400+** sites'\n---\n\n")
    for social_network, info in social_networks:
        url_main = info["urlMain"]
        is_nsfw = "**(NSFW)**" if info.get("isNSFW") else ""
        site_file.write(f"1. [{social_network}]({url_main}) {is_nsfw}\n")

 # Overwrite the data.json file with sorted data
-with open(DATA_REL_URI, "w", encoding=DEFAULT_ENCODING) as data_file:
+with open(DATA_REL_URI, "w") as data_file:
    sorted_data = json.dumps(data, indent=2, sort_keys=True)
    data_file.write(sorted_data)
-    data_file.write("\n")  # Keep the newline after writing data
+    data_file.write("\n")

 print("Finished updating supported site listing!")
+
@@ -23,17 +23,17 @@

 > [!WARNING]  
 > Packages for ParrotOS and Ubuntu 24.04, maintained by a third party, appear to be __broken__.  
-> Users of these systems should defer to [`uv`](https://docs.astral.sh/uv/)/`pipx`/`pip` or Docker.
+> Users of these systems should defer to pipx/pip or Docker.

 | Method | Notes |
 | - | - |
-| `pipx install sherlock-project` | `pip` or [`uv`](https://docs.astral.sh/uv/) may be used in place of `pipx` |
+| `pipx install sherlock-project` | `pip` may be used in place of `pipx` |
 | `docker run -it --rm sherlock/sherlock` |
 | `dnf install sherlock-project` | |

 Community-maintained packages are available for Debian (>= 13), Ubuntu (>= 22.10), Homebrew, Kali, and BlackArch. These packages are not directly supported or maintained by the Sherlock Project.

-See all alternative installation methods [here](https://sherlockproject.xyz/installation).
+See all alternative installation methods [here](https://sherlockproject.xyz/installation)

 ## General usage

@@ -51,42 +51,70 @@ Accounts found will be stored in an individual text file with the corresponding

 ```console
 $ sherlock --help
-usage: sherlock [-h] [--version] [--verbose] [--folderoutput FOLDEROUTPUT] [--output OUTPUT] [--csv] [--xlsx] [--site SITE_NAME] [--proxy PROXY_URL] [--dump-response]
-                [--json JSON_FILE] [--timeout TIMEOUT] [--print-all] [--print-found] [--no-color] [--browse] [--local] [--nsfw] [--txt] [--ignore-exclusions]
+usage: sherlock [-h] [--version] [--verbose] [--folderoutput FOLDEROUTPUT]
+                [--output OUTPUT] [--tor] [--unique-tor] [--csv] [--xlsx]
+                [--site SITE_NAME] [--proxy PROXY_URL] [--json JSON_FILE]
+                [--timeout TIMEOUT] [--print-all] [--print-found] [--no-color]
+                [--browse] [--local] [--nsfw]
                USERNAMES [USERNAMES ...]

-Sherlock: Find Usernames Across Social Networks (Version 0.16.0)
+Sherlock: Find Usernames Across Social Networks (Version 0.14.3)

 positional arguments:
-  USERNAMES             One or more usernames to check with social networks. Check similar usernames using {?} (replace to '_', '-', '.').
+  USERNAMES             One or more usernames to check with social networks.
+                        Check similar usernames using {?} (replace to '_', '-', '.').

-options:
+optional arguments:
  -h, --help            show this help message and exit
  --version             Display version information and dependencies.
  --verbose, -v, -d, --debug
                        Display extra debugging information and metrics.
  --folderoutput FOLDEROUTPUT, -fo FOLDEROUTPUT
-                        If using multiple usernames, the output of the results will be saved to this folder.
+                        If using multiple usernames, the output of the results will be
+                        saved to this folder.
  --output OUTPUT, -o OUTPUT
-                        If using single username, the output of the result will be saved to this file.
+                        If using single username, the output of the result will be saved
+                        to this file.
+  --tor, -t             Make requests over Tor; increases runtime; requires Tor to be
+                        installed and in system path.
+  --unique-tor, -u      Make requests over Tor with new Tor circuit after each request;
+                        increases runtime; requires Tor to be installed and in system
+                        path.
  --csv                 Create Comma-Separated Values (CSV) File.
-  --xlsx                Create the standard file for the modern Microsoft Excel spreadsheet (xlsx).
-  --site SITE_NAME      Limit analysis to just the listed sites. Add multiple options to specify more than one site.
+  --xlsx                Create the standard file for the modern Microsoft Excel
+                        spreadsheet (xlsx).
+  --site SITE_NAME      Limit analysis to just the listed sites. Add multiple options to
+                        specify more than one site.
  --proxy PROXY_URL, -p PROXY_URL
                        Make requests over a proxy. e.g. socks5://127.0.0.1:1080
-  --dump-response       Dump the HTTP response to stdout for targeted debugging.
  --json JSON_FILE, -j JSON_FILE
-                        Load data from a JSON file or an online, valid, JSON file. Upstream PR numbers also accepted.
+                        Load data from a JSON file or an online, valid, JSON file.
  --timeout TIMEOUT     Time (in seconds) to wait for response to requests (Default: 60)
  --print-all           Output sites where the username was not found.
-  --print-found         Output sites where the username was found (also if exported as file).
+  --print-found         Output sites where the username was found.
  --no-color            Don't color terminal output
  --browse, -b          Browse to all results on default browser.
  --local, -l           Force the use of the local data.json file.
  --nsfw                Include checking of NSFW sites from default list.
-  --txt                 Enable creation of a txt file
-  --ignore-exclusions   Ignore upstream exclusions (may return more false positives)
 ```
+## Apify Actor Usage [![Sherlock Actor](https://apify.com/actor-badge?actor=netmilk/sherlock)](https://apify.com/netmilk/sherlock?fpr=sherlock)
+
+<a href="https://apify.com/netmilk/sherlock?fpr=sherlock"><img src="https://apify.com/ext/run-on-apify.png" alt="Run Sherlock Actor on Apify" width="176" height="39" /></a>
+
+You can run Sherlock in the cloud without installation using the [Sherlock Actor](https://apify.com/netmilk/sherlock?fpr=sherlock) on [Apify](https://apify.com?fpr=sherlock) free of charge.
+
+``` bash
+$ echo '{"usernames":["user123"]}' | apify call -so netmilk/sherlock
+[{
+  "username": "user123",
+  "links": [
+    "https://www.1337x.to/user/user123/",
+    ...
+  ]
+}]
+```
+
+Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmatically via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock).

 ## Credits

@@ -96,7 +124,7 @@ Thank you to everyone who has contributed to Sherlock! ❤️
  <img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" alt="contributors"/>
 </a>

-## Star History
+## Star history

 <picture>
  <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date&theme=dark" />
@@ -107,7 +135,7 @@ Thank you to everyone who has contributed to Sherlock! ❤️
 ## License

 MIT © Sherlock Project<br/>
-Creator - [Siddharth Dushantha](https://github.com/sdushantha)
+Original Creator - [Siddharth Dushantha](https://github.com/sdushantha)

 <!-- Reference Links -->

@@ -8,7 +8,7 @@ source = "init"

 [tool.poetry]
 name = "sherlock-project"
-version = "0.16.1"
+version = "0.16.0"
 description = "Hunt down social media accounts by username across social networks"
 license = "MIT"
 authors = [
@@ -29,10 +29,6 @@ classifiers = [
    "Natural Language :: English",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
-    "Programming Language :: Python :: 3.13",
    "Topic :: Security"
 ]
 homepage = "https://sherlockproject.xyz/"
@@ -50,10 +46,14 @@ PySocks = "^1.7.0"
 requests = "^2.22.0"
 requests-futures = "^1.0.0"
 stem = "^1.8.0"
+torrequest = "^0.1.0"
 pandas = "^2.2.1"
 openpyxl = "^3.0.10"
 tomli = "^2.2.1"

+[tool.poetry.extras]
+tor = ["torrequest"]
+
 [tool.poetry.group.dev.dependencies]
 jsonschema = "^4.0.0"
 rstr = "^3.2.2"
@@ -37,6 +37,7 @@ class QueryNotify:

        self.result = result

+        # return

    def start(self, message=None):
        """Notify Start.
@@ -55,6 +56,7 @@ class QueryNotify:
        Nothing.
        """

+        # return

    def update(self, result):
        """Notify Update.
@@ -73,6 +75,7 @@ class QueryNotify:

        self.result = result

+        # return

    def finish(self, message=None):
        """Notify Finish.
@@ -91,6 +94,7 @@ class QueryNotify:
        Nothing.
        """

+        # return

    def __str__(self):
        """Convert Object To String.
@@ -133,6 +137,7 @@ class QueryNotifyPrint(QueryNotify):
        self.print_all = print_all
        self.browse = browse

+        return

    def start(self, message):
        """Notify Start.
@@ -158,6 +163,7 @@ class QueryNotifyPrint(QueryNotify):
        # An empty line between first line and the result(more clear output)
        print('\r')

+        return

    def countResults(self):
        """This function counts the number of results. Every time the function is called,
@@ -232,7 +238,7 @@ class QueryNotifyPrint(QueryNotify):
                      Fore.WHITE + "]" +
                      Fore.GREEN + f" {self.result.site_name}:" +
                      Fore.YELLOW + f" {msg}")
-
+                
        elif result.status == QueryStatus.WAF:
            if self.print_all:
                print(Style.BRIGHT + Fore.WHITE + "[" +
@@ -248,9 +254,10 @@ class QueryNotifyPrint(QueryNotify):
                f"Unknown Query Status '{result.status}' for site '{self.result.site_name}'"
            )

+        return

    def finish(self, message="The processing has been finished."):
-        """Notify Finish.
+        """Notify Start.
        Will print the last line to the standard output.
        Keyword Arguments:
        self                   -- This object.
@@ -1,149 +1,80 @@
 {
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "title": "Sherlock Target Manifest",
-  "description": "Social media targets to probe for the existence of known usernames",
-  "type": "object",
-  "properties": {
-    "$schema": { "type": "string" }
-  },
-  "patternProperties": {
-    "^(?!\\$).*?$": {
-      "type": "object",
-      "description": "Target name and associated information (key should be human readable name)",
-      "required": ["url", "urlMain", "errorType", "username_claimed"],
-      "properties": {
-        "url": { "type": "string" },
-        "urlMain": { "type": "string" },
-        "urlProbe": { "type": "string" },
-        "username_claimed": { "type": "string" },
-        "regexCheck": { "type": "string" },
-        "isNSFW": { "type": "boolean" },
-        "headers": { "type": "object" },
-        "request_payload": { "type": "object" },
-        "__comment__": {
-          "type": "string",
-          "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
-        },
-        "tags": {
-          "oneOf": [
-            { "$ref": "#/$defs/tag" },
-            { "type": "array", "items": { "$ref": "#/$defs/tag" } }
-          ]
-        },
-        "request_method": {
-          "type": "string",
-          "enum": ["GET", "POST", "HEAD", "PUT"]
-        },
-        "errorType": {
-          "oneOf": [
-            {
-              "type": "string",
-              "enum": ["message", "response_url", "status_code"]
+    "$schema": "https://json-schema.org/draft/2020-12/schema",
+    "title": "Sherlock Target Manifest",
+    "description": "Social media targets to probe for the existence of known usernames",
+    "type": "object",
+    "properties": {
+        "$schema": { "type": "string" }
+    },
+    "patternProperties": {
+        "^(?!\\$).*?$": {
+            "type": "object",
+            "description": "Target name and associated information (key should be human readable name)",
+            "required": [ "url", "urlMain", "errorType", "username_claimed" ],
+            "properties": {
+                "url": { "type": "string" },
+                "urlMain": { "type": "string" },
+                "urlProbe": { "type": "string" },
+                "username_claimed": { "type": "string" },
+                "regexCheck": { "type": "string" },
+                "isNSFW": { "type": "boolean" },
+                "headers": { "type": "object" },
+                "request_payload": { "type": "object" },
+                "__comment__": {
+                    "type": "string",
+                    "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
+                },
+                "tags": {
+                    "oneOf": [
+                        { "$ref": "#/$defs/tag" },
+                        { "type": "array", "items": { "$ref": "#/$defs/tag" } }
+                    ]
+                },
+                "request_method": {
+                    "type": "string",
+                    "enum": [ "GET", "POST", "HEAD", "PUT" ]
+                },
+                "errorType": {
+                    "type": "string",
+                    "enum": [ "message", "response_url", "status_code" ]
+                },
+                "errorMsg": {
+                    "oneOf": [
+                        { "type": "string" },
+                        { "type": "array", "items": { "type": "string" } }
+                    ]
+                },
+                "errorCode": {
+                    "oneOf": [
+                        { "type": "integer" },
+                        { "type": "array", "items": { "type": "integer" } }
+                    ]
+                },
+                "errorUrl": { "type": "string" },
+                "response_url": { "type": "string" }
            },
-            {
-              "type": "array",
-              "items": {
-                "type": "string",
-                "enum": ["message", "response_url", "status_code"]
-              }
-            }
-          ]
-        },
-        "errorMsg": {
-          "oneOf": [
-            { "type": "string" },
-            { "type": "array", "items": { "type": "string" } }
-          ]
-        },
-        "errorCode": {
-          "oneOf": [
-            { "type": "integer" },
-            { "type": "array", "items": { "type": "integer" } }
-          ]
-        },
-        "errorUrl": { "type": "string" },
-        "response_url": { "type": "string" }
-      },
-      "dependencies": {
-        "errorMsg": {
-          "oneOf": [
-            { "properties": { "errorType": { "const": "message" } } },
-            {
-              "properties": {
-                "errorType": {
-                  "type": "array",
-                  "contains": { "const": "message" }
+            "dependencies": {
+                "errorMsg": {
+                    "properties" : { "errorType": { "const": "message" } }
+                },
+                "errorUrl": {
+                    "properties": { "errorType": { "const": "response_url" } }
+                },
+                "errorCode": {
+                    "properties": { "errorType": { "const": "status_code" } }
                }
-              }
-            }
-          ]
-        },
-        "errorUrl": {
-          "oneOf": [
-            { "properties": { "errorType": { "const": "response_url" } } },
-            {
-              "properties": {
-                "errorType": {
-                  "type": "array",
-                  "contains": { "const": "response_url" }
-                }
-              }
-            }
-          ]
-        },
-        "errorCode": {
-          "oneOf": [
-            { "properties": { "errorType": { "const": "status_code" } } },
-            {
-              "properties": {
-                "errorType": {
-                  "type": "array",
-                  "contains": { "const": "status_code" }
-                }
-              }
-            }
-          ]
+            },
+            "if": { "properties": { "errorType": { "const": "message" } } },
+            "then": { "required": [ "errorMsg" ] },
+            "else": {
+                "if": { "properties": { "errorType": { "const": "response_url" } } },
+                "then": { "required": [ "errorUrl" ] }
+            },
+            "additionalProperties": false
        }
-      },
-      "allOf": [
-        {
-          "if": {
-            "anyOf": [
-              { "properties": { "errorType": { "const": "message" } } },
-              {
-                "properties": {
-                  "errorType": {
-                    "type": "array",
-                    "contains": { "const": "message" }
-                  }
-                }
-              }
-            ]
-          },
-          "then": { "required": ["errorMsg"] }
-        },
-        {
-          "if": {
-            "anyOf": [
-              { "properties": { "errorType": { "const": "response_url" } } },
-              {
-                "properties": {
-                  "errorType": {
-                    "type": "array",
-                    "contains": { "const": "response_url" }
-                  }
-                }
-              }
-            ]
-          },
-          "then": { "required": ["errorUrl"] }
-        }
-      ],
-      "additionalProperties": false
+    },
+    "additionalProperties": false,
+    "$defs": {
+        "tag": { "type": "string", "enum": [ "adult", "gaming" ] }
    }
-  },
-  "additionalProperties": false,
-  "$defs": {
-    "tag": { "type": "string", "enum": ["adult", "gaming"] }
-  }
 }
@@ -136,9 +136,6 @@ def get_response(request_future, error_type, social_network):
    except requests.exceptions.RequestException as err:
        error_context = "Unknown Error"
        exception_text = str(err)
-    except UnicodeError as err:
-        error_context = "Encoding Error"
-        exception_text = str(err)

    return response, error_context, exception_text

@@ -174,6 +171,8 @@ def sherlock(
    username: str,
    site_data: dict[str, dict[str, str]],
    query_notify: QueryNotify,
+    tor: bool = False,
+    unique_tor: bool = False,
    dump_response: bool = False,
    proxy: Optional[str] = None,
    timeout: int = 60,
@@ -189,6 +188,8 @@ def sherlock(
    query_notify           -- Object with base type of QueryNotify().
                              This will be used to notify the caller about
                              query results.
+    tor                    -- Boolean indicating whether to use a tor circuit for the requests.
+    unique_tor             -- Boolean indicating whether to use a new tor circuit for each request.
    proxy                  -- String indicating the proxy URL
    timeout                -- Time in seconds to wait before timing out request.
                              Default is 60 seconds.
@@ -209,9 +210,32 @@ def sherlock(

    # Notify caller that we are starting the query.
    query_notify.start(username)
+    # Create session based on request methodology
+    if tor or unique_tor:
+        try:
+            from torrequest import TorRequest  # noqa: E402
+        except ImportError:
+            print("Important!")
+            print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.")
+            print("> If you've installed Sherlock via pip, you can include the optional dependency via `pip install 'sherlock-project[tor]'`.")
+            print("> Other packages should refer to their documentation, or install it separately with `pip install torrequest`.\n")
+            sys.exit(query_notify.finish())

-    # Normal requests
-    underlying_session = requests.session()
+        print("Important!")
+        print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.")
+
+        # Requests using Tor obfuscation
+        try:
+            underlying_request = TorRequest()
+        except OSError:
+            print("Tor not found in system path. Unable to continue.\n")
+            sys.exit(query_notify.finish())
+
+        underlying_session = underlying_request.session
+    else:
+        # Normal requests
+        underlying_session = requests.session()
+        underlying_request = requests.Request()

    # Limit number of workers to 20.
    # This is probably vastly overkill.
@@ -335,10 +359,15 @@ def sherlock(
            # Store future in data for access later
            net_info["request_future"] = future

+            # Reset identify for tor (if needed)
+            if unique_tor:
+                underlying_request.reset_identity()
+
        # Add this site's results into final dictionary with all the other results.
        results_total[social_network] = results_site

    # Open the file containing account links
+    # Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses
    for social_network, net_info in site_data.items():
        # Retrieve results again
        results_site = results_total.get(social_network)
@@ -352,8 +381,6 @@ def sherlock(

        # Get the expected error type
        error_type = net_info["errorType"]
-        if isinstance(error_type, str):
-            error_type: list[str] = [error_type]

        # Retrieve future and ensure it has finished
        future = net_info["request_future"]
@@ -398,60 +425,58 @@ def sherlock(
        elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
            query_status = QueryStatus.WAF

-        else:
-            if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
-                error_context = f"Unknown error type '{error_type}' for {social_network}"
-                query_status = QueryStatus.UNKNOWN
+        elif error_type == "message":
+            # error_flag True denotes no error found in the HTML
+            # error_flag False denotes error found in the HTML
+            error_flag = True
+            errors = net_info.get("errorMsg")
+            # errors will hold the error message
+            # it can be string or list
+            # by isinstance method we can detect that
+            # and handle the case for strings as normal procedure
+            # and if its list we can iterate the errors
+            if isinstance(errors, str):
+                # Checks if the error message is in the HTML
+                # if error is present we will set flag to False
+                if errors in r.text:
+                    error_flag = False
            else:
-                if "message" in error_type:
-                    # error_flag True denotes no error found in the HTML
-                    # error_flag False denotes error found in the HTML
-                    error_flag = True
-                    errors = net_info.get("errorMsg")
-                    # errors will hold the error message
-                    # it can be string or list
-                    # by isinstance method we can detect that
-                    # and handle the case for strings as normal procedure
-                    # and if its list we can iterate the errors
-                    if isinstance(errors, str):
-                        # Checks if the error message is in the HTML
-                        # if error is present we will set flag to False
-                        if errors in r.text:
-                            error_flag = False
-                    else:
-                        # If it's list, it will iterate all the error message
-                        for error in errors:
-                            if error in r.text:
-                                error_flag = False
-                                break
-                    if error_flag:
-                        query_status = QueryStatus.CLAIMED
-                    else:
-                        query_status = QueryStatus.AVAILABLE
+                # If it's list, it will iterate all the error message
+                for error in errors:
+                    if error in r.text:
+                        error_flag = False
+                        break
+            if error_flag:
+                query_status = QueryStatus.CLAIMED
+            else:
+                query_status = QueryStatus.AVAILABLE
+        elif error_type == "status_code":
+            error_codes = net_info.get("errorCode")
+            query_status = QueryStatus.CLAIMED

-                if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
-                    error_codes = net_info.get("errorCode")
-                    query_status = QueryStatus.CLAIMED
+            # Type consistency, allowing for both singlets and lists in manifest
+            if isinstance(error_codes, int):
+                error_codes = [error_codes]

-                    # Type consistency, allowing for both singlets and lists in manifest
-                    if isinstance(error_codes, int):
-                        error_codes = [error_codes]
-
-                    if error_codes is not None and r.status_code in error_codes:
-                        query_status = QueryStatus.AVAILABLE
-                    elif r.status_code >= 300 or r.status_code < 200:
-                        query_status = QueryStatus.AVAILABLE
-
-                if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
-                    # For this detection method, we have turned off the redirect.
-                    # So, there is no need to check the response URL: it will always
-                    # match the request.  Instead, we will ensure that the response
-                    # code indicates that the request was successful (i.e. no 404, or
-                    # forward to some odd redirect).
-                    if 200 <= r.status_code < 300:
-                        query_status = QueryStatus.CLAIMED
-                    else:
-                        query_status = QueryStatus.AVAILABLE
+            if error_codes is not None and r.status_code in error_codes:
+                query_status = QueryStatus.AVAILABLE
+            elif r.status_code >= 300 or r.status_code < 200:
+                query_status = QueryStatus.AVAILABLE
+        elif error_type == "response_url":
+            # For this detection method, we have turned off the redirect.
+            # So, there is no need to check the response URL: it will always
+            # match the request.  Instead, we will ensure that the response
+            # code indicates that the request was successful (i.e. no 404, or
+            # forward to some odd redirect).
+            if 200 <= r.status_code < 300:
+                query_status = QueryStatus.CLAIMED
+            else:
+                query_status = QueryStatus.AVAILABLE
+        else:
+            # It should be impossible to ever get here...
+            raise ValueError(
+                f"Unknown Error Type '{error_type}' for " f"site '{social_network}'"
+            )

        if dump_response:
            print("+++++++++++++++++++++")
@@ -571,6 +596,22 @@ def main():
        dest="output",
        help="If using single username, the output of the result will be saved to this file.",
    )
+    parser.add_argument(
+        "--tor",
+        "-t",
+        action="store_true",
+        dest="tor",
+        default=False,
+        help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.",
+    )
+    parser.add_argument(
+        "--unique-tor",
+        "-u",
+        action="store_true",
+        dest="unique_tor",
+        default=False,
+        help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.",
+    )
    parser.add_argument(
        "--csv",
        action="store_true",
@@ -679,11 +720,11 @@ def main():
    )

    parser.add_argument(
-        "--txt",
+        "--no-txt",
        action="store_true",
-        dest="output_txt",
+        dest="no_txt",
        default=False,
-        help="Enable creation of a txt file",
+        help="Disable creation of a txt file",
    )

    parser.add_argument(
@@ -701,7 +742,7 @@ def main():

    # Check for newer version of Sherlock. If it exists, let the user know about it
    try:
-        latest_release_raw = requests.get(forge_api_latest_release, timeout=10).text
+        latest_release_raw = requests.get(forge_api_latest_release).text
        latest_release_json = json_loads(latest_release_raw)
        latest_remote_tag = latest_release_json["tag_name"]

@@ -714,10 +755,22 @@ def main():
    except Exception as error:
        print(f"A problem occurred while checking for an update: {error}")

+    # Argument check
+    # TODO regex check on args.proxy
+    if args.tor and (args.proxy is not None):
+        raise Exception("Tor and Proxy cannot be set at the same time.")
+
    # Make prompts
    if args.proxy is not None:
        print("Using the proxy: " + args.proxy)

+    if args.tor or args.unique_tor:
+        print("Using Tor to make requests")
+
+        print(
+            "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors."
+        )
+
    if args.no_color:
        # Disable color output.
        init(strip=True, convert=False)
@@ -749,7 +802,7 @@ def main():
                if args.json_file.isnumeric():
                    pull_number = args.json_file
                    pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}"
-                    pull_request_raw = requests.get(pull_url, timeout=10).text
+                    pull_request_raw = requests.get(pull_url).text
                    pull_request_json = json_loads(pull_request_raw)

                    # Check if it's a valid pull request
@@ -818,6 +871,8 @@ def main():
            username,
            site_data,
            query_notify,
+            tor=args.tor,
+            unique_tor=args.unique_tor,
            dump_response=args.dump_response,
            proxy=args.proxy,
            timeout=args.timeout,
@@ -833,7 +888,7 @@ def main():
        else:
            result_file = f"{username}.txt"

-        if args.output_txt:
+        if not args.no_txt:
            with open(result_file, "w", encoding="utf-8") as file:
                exists_counter = 0
                for website_name in results:
@@ -918,8 +973,8 @@ def main():
                {
                    "username": usernames,
                    "name": names,
-                    "url_main": [f'=HYPERLINK(\"{u}\")' for u in url_main],
-                    "url_user": [f'=HYPERLINK(\"{u}\")' for u in url_user],
+                    "url_main": url_main,
+                    "url_user": url_user,
                    "exists": exists,
                    "http_status": http_status,
                    "response_time_s": response_time_s,
@@ -8,7 +8,7 @@ import requests
 import secrets


-MANIFEST_URL = "https://data.sherlockproject.xyz"
+MANIFEST_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.json"
 EXCLUSIONS_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/refs/heads/exclusions/false_positive_exclusions.txt"

 class SiteInformation:
@@ -121,10 +121,15 @@ class SitesInformation:
            # users from creating issue about false positives which has already been fixed or having outdated data
            data_file_path = MANIFEST_URL

+        # Ensure that specified data file has correct extension.
+        if not data_file_path.lower().endswith(".json"):
+            raise FileNotFoundError(f"Incorrect JSON file extension for data file '{data_file_path}'.")
+
+        # if "http://"  == data_file_path[:7].lower() or "https://" == data_file_path[:8].lower():
        if data_file_path.lower().startswith("http"):
            # Reference is to a URL.
            try:
-                response = requests.get(url=data_file_path, timeout=30)
+                response = requests.get(url=data_file_path)
            except Exception as error:
                raise FileNotFoundError(
                    f"Problem while attempting to access data file URL '{data_file_path}':  {error}"
@@ -161,7 +166,7 @@ class SitesInformation:

        if honor_exclusions:
            try:
-                response = requests.get(url=EXCLUSIONS_URL, timeout=10)
+                response = requests.get(url=EXCLUSIONS_URL)
                if response.status_code == 200:
                    exclusions = response.text.splitlines()
                    exclusions = [exclusion.strip() for exclusion in exclusions]
@@ -1,47 +0,0 @@
-"""Tests for handling usernames with special/unicode characters."""
-
-from concurrent.futures import Future
-
-from sherlock_project.sherlock import get_response
-
-
-def _make_future_with_exception(exc):
-    """Create a Future that raises the given exception."""
-    future = Future()
-    future.set_exception(exc)
-    return future
-
-
-def test_get_response_handles_unicode_decode_error():
-    """Regression test for issue #2730.
-
-    Usernames with special characters (e.g. 'Émile') can trigger a
-    UnicodeDecodeError inside the requests library during redirect
-    handling. This must not crash the program.
-    """
-    future = _make_future_with_exception(
-        UnicodeDecodeError("utf-8", b"\xe9", 0, 1, "invalid continuation byte")
-    )
-    response, error_context, exception_text = get_response(
-        request_future=future,
-        error_type=["status_code"],
-        social_network="TestSite",
-    )
-    assert response is None
-    assert error_context == "Encoding Error"
-    assert "utf-8" in exception_text
-
-
-def test_get_response_handles_unicode_encode_error():
-    """UnicodeEncodeError should also be caught (subclass of UnicodeError)."""
-    future = _make_future_with_exception(
-        UnicodeEncodeError("ascii", "É", 0, 1, "ordinal not in range(128)")
-    )
-    response, error_context, exception_text = get_response(
-        request_future=future,
-        error_type=["status_code"],
-        social_network="TestSite",
-    )
-    assert response is None
-    assert error_context == "Encoding Error"
-    assert "ascii" in exception_text
@@ -4,7 +4,7 @@ from sherlock_interactives import Interactives
 from sherlock_interactives import InteractivesSubprocessError

 def test_remove_nsfw(sites_obj):
-    nsfw_target: str = 'Xvideos'
+    nsfw_target: str = 'Pornhub'
    assert nsfw_target in {site.name: site.information for site in sites_obj}
    sites_obj.remove_nsfw_sites()
    assert nsfw_target not in {site.name: site.information for site in sites_obj}
@@ -12,8 +12,8 @@ def test_remove_nsfw(sites_obj):

 # Parametrized sites should *not* include Motherless, which is acting as the control
@pytest.mark.parametrize('nsfwsites', [
-    ['Xvideos'],
-    ['Xvideos', 'Erome'],
+    ['Pornhub'],
+    ['Pornhub', 'Xvideos'],
 ])
 def test_nsfw_explicit_selection(sites_obj, nsfwsites):
    for site in nsfwsites:
@@ -16,7 +16,6 @@ def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUAN
    """Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`."""
    def replace_upper_bound(match: re.Match) -> str: # type: ignore
        lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore
-        nonlocal upper_bound
        upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore  # noqa: F823
        return f'{{{lower_bound},{upper_bound}}}'