chore: remove deprecated --no-txt

chore: make default --no-txt
Workflows where a txt file is still required should use --txt
2025-10-04 20:37:19 -04:00 · 2025-10-04 20:32:16 -04:00 · 2025-10-04 20:22:34 -04:00 · 2025-10-04 18:42:13 -04:00 · 2025-10-04 17:21:26 -04:00 · 2025-10-04 16:53:30 -04:00
11 changed files with 510 additions and 180 deletions
@@ -1,5 +1,5 @@
 ### REPOSITORY
-/.github/CODEOWNERS @sdushantha
+/.github/CODEOWNERS @sdushantha @ppfeister
 /.github/FUNDING.yml @sdushantha
 /LICENSE @sdushantha

@@ -45,9 +45,9 @@ jobs:
        run: |
          git fetch origin exclusions || true

-          if git show origin/exclusions:exclusions.txt >/dev/null 2>&1; then
+          if git show origin/exclusions:false_positive_exclusions.txt >/dev/null 2>&1; then
            # If the exclusions branch and file exist, compare
-            if git diff --quiet origin/exclusions -- exclusions.txt; then
+            if git diff --quiet origin/exclusions -- false_positive_exclusions.txt; then
              echo "exclusions_changed=false" >> "$GITHUB_OUTPUT"
            else
              echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
@@ -71,10 +71,19 @@ jobs:
          git config user.name "Paul Pfeister (automation)"
          git config user.email "code@pfeister.dev"

+          mv false_positive_exclusions.txt false_positive_exclusions.txt.tmp
+
+          git add -f false_positive_exclusions.txt.tmp # -f required to override .gitignore
+          git stash push -m "stash false positive exclusion list" -- false_positive_exclusions.txt.tmp
+
          git fetch origin exclusions || true # Allows creation of branch if deleted
          git checkout -B exclusions origin/exclusions || (git checkout --orphan exclusions && git rm -rf .)

-          git add false_positive_exclusions.txt
+          git stash pop || true

+          mv false_positive_exclusions.txt.tmp false_positive_exclusions.txt
+
+          git rm -f false_positive_exclusions.txt.tmp || true
+          git add false_positive_exclusions.txt
          git commit -m "auto: update exclusions list" || echo "No changes to commit"
          git push origin exclusions
@@ -0,0 +1,100 @@
+name: Modified Target Validation
+
+on:
+  pull_request_target:
+    branches:
+      - master
+    paths:
+      - "sherlock_project/resources/data.json"
+
+jobs:
+  validate-modified-targets:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v5
+        with:
+          ref: ${{ github.base_ref }}
+          fetch-depth: 1
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: '3.13'
+
+      - name: Install Poetry
+        uses: abatilo/actions-poetry@v4
+        with:
+          poetry-version: 'latest'
+
+      - name: Install dependencies
+        run: |
+          poetry install --no-interaction --with dev
+
+      - name: Drop in place updated manifest from base
+        run: |
+          cp sherlock_project/resources/data.json data.json.base
+          git fetch origin pull/${{ github.event.pull_request.number }}/head:pr --depth=1
+          git show pr:sherlock_project/resources/data.json > sherlock_project/resources/data.json
+          cp sherlock_project/resources/data.json data.json.head
+
+      - name: Discover modified targets
+        id: discover-modified
+        run: |
+          CHANGED=$(
+            python - <<'EOF'
+          import json
+          with open("data.json.base") as f: base = json.load(f)
+          with open("data.json.head") as f: head = json.load(f)
+
+          changed = []
+          for k, v in head.items():
+              if k not in base or base[k] != v:
+                  changed.append(k)
+
+          print(",".join(sorted(changed)))
+          EOF
+          )
+
+          # Preserve changelist
+          echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
+          echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"
+
+      - name: Validate modified targets
+        if: steps.discover-modified.outputs.changed_targets != ''
+        continue-on-error: true
+        run: |
+          poetry run pytest -q --tb no -rA -m validate_targets -n 20 \
+            --chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" \
+            --junitxml=validation_results.xml
+
+      - name: Prepare validation summary
+        if: steps.discover-modified.outputs.changed_targets != ''
+        id: prepare-summary
+        run: |
+          summary=$(
+            poetry run python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results"
+          )
+          echo "$summary" > validation_summary.md
+
+      - name: Announce validation results
+        if: steps.discover-modified.outputs.changed_targets != ''
+        uses: actions/github-script@v8
+        with:
+          script: |
+            const fs = require('fs');
+            const body = fs.readFileSync('validation_summary.md', 'utf8');
+            await github.rest.issues.createComment({
+              issue_number: context.payload.pull_request.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: body,
+            });
+
+      - name: This step shows as ran when no modifications are found
+        if: steps.discover-modified.outputs.changed_targets == ''
+        run: |
+          echo "No modified targets found"
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# This module summarizes the results of site validation tests queued by
+# workflow validate_modified_targets for presentation in Issue comments.
+
+from defusedxml import ElementTree as ET
+import sys
+from pathlib import Path
+
+def summarize_junit_xml(xml_path: Path) -> str:
+    tree = ET.parse(xml_path)
+    root = tree.getroot()
+    suite = root.find('testsuite')
+
+    pass_message: str = ":heavy_check_mark: &nbsp; Pass"
+    fail_message: str = ":x: &nbsp; Fail"
+
+    if suite is None:
+        raise ValueError("Invalid JUnit XML: No testsuite found")
+
+    summary_lines: list[str] = []
+    summary_lines.append("#### Automatic validation of changes\n")
+    summary_lines.append("| Target | F+ Check | F- Check |")
+    summary_lines.append("|---|---|---|")
+
+    failures = int(suite.get('failures', 0))
+    errors_detected: bool = False
+
+    results: dict[str, dict[str, str]] = {}
+
+    for testcase in suite.findall('testcase'):
+        test_name = testcase.get('name').split('[')[0]
+        site_name = testcase.get('name').split('[')[1].rstrip(']')
+        failure = testcase.find('failure')
+        error = testcase.find('error')
+
+        if site_name not in results:
+            results[site_name] = {}
+
+        if test_name == "test_false_neg":
+            results[site_name]['F- Check'] = pass_message if failure is None and error is None else fail_message
+        elif test_name == "test_false_pos":
+            results[site_name]['F+ Check'] = pass_message if failure is None and error is None else fail_message
+
+        if error is not None:
+            errors_detected = True
+
+    for result in results:
+        summary_lines.append(f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |")
+
+    if failures > 0:
+        summary_lines.append("\n___\n" +
+            "\nFailures were detected on at least one updated target. Commits containing accuracy failures" +
+            " will often not be merged (unless a rationale is provided, such as false negatives due to regional differences).")
+
+    if errors_detected:
+        summary_lines.append("\n___\n" +
+            "\n**Errors were detected during validation. Please review the workflow logs.**")
+
+    return "\n".join(summary_lines)
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: summarize_site_validation.py <junit-xml-file>")
+        sys.exit(1)
+
+    xml_path: Path = Path(sys.argv[1])
+    if not xml_path.is_file():
+        print(f"Error: File '{xml_path}' does not exist.")
+        sys.exit(1)
+
+    summary: str = summarize_junit_xml(xml_path)
+    print(summary)
@@ -8,8 +8,7 @@ source = "init"

 [tool.poetry]
 name = "sherlock-project"
-# single source of truth for version is __init__.py
-version = "0"
+version = "0.16.0"
 description = "Hunt down social media accounts by username across social networks"
 license = "MIT"
 authors = [
@@ -50,6 +49,7 @@ stem = "^1.8.0"
 torrequest = "^0.1.0"
 pandas = "^2.2.1"
 openpyxl = "^3.0.10"
+tomli = "^2.2.1"

 [tool.poetry.extras]
 tor = ["torrequest"]
@@ -60,5 +60,9 @@ rstr = "^3.2.2"
 pytest = "^8.4.2"
 pytest-xdist = "^3.8.0"

+
+[tool.poetry.group.ci.dependencies]
+defusedxml = "^0.7.1"
+
 [tool.poetry.scripts]
 sherlock = 'sherlock_project.sherlock:main'
@@ -5,11 +5,26 @@ networks.

 """

+from importlib.metadata import version as pkg_version, PackageNotFoundError
+import pathlib
+import tomli
+
+
+def get_version() -> str:
+    """Fetch the version number of the installed package."""
+    try:
+        return pkg_version("sherlock_project")
+    except PackageNotFoundError:
+        pyproject_path: pathlib.Path = pathlib.Path(__file__).resolve().parent.parent / "pyproject.toml"
+        with pyproject_path.open("rb") as f:
+            pyproject_data = tomli.load(f)
+        return pyproject_data["tool"]["poetry"]["version"]
+
 # This variable is only used to check for ImportErrors induced by users running as script rather than as module or package
 import_error_test_var = None

 __shortname__   = "Sherlock"
 __longname__    = "Sherlock: Find Usernames Across Social Networks"
-__version__     = "0.16.0"
+__version__     = get_version()

 forge_api_latest_release = "https://api.github.com/repos/sherlock-project/sherlock/releases/latest"
@@ -79,13 +79,13 @@
    "username_claimed": "pink"
  },
  "AllMyLinks": {
-    "errorMsg": "Not Found",
-    "errorType": "message",
-    "regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$",
-    "url": "https://allmylinks.com/{}",
-    "urlMain": "https://allmylinks.com/",
-    "username_claimed": "blue"
-  },
+  "errorMsg": "Page not found",
+  "errorType": "message",
+  "regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$",
+  "url": "https://allmylinks.com/{}",
+  "urlMain": "https://allmylinks.com/",
+  "username_claimed": "blue"
+},
  "AniWorld": {
    "errorMsg": "Dieses Profil ist nicht verf\u00fcgbar",
    "errorType": "message",
@@ -115,12 +115,20 @@
    "username_claimed": "lio24d"
  },
  "Apple Discussions": {
-    "errorMsg": "The page you tried was not found. You may have used an outdated link or may have typed the address (URL) incorrectly.",
+    "errorMsg": "Looking for something in Apple Support Communities?",
    "errorType": "message",
    "url": "https://discussions.apple.com/profile/{}",
    "urlMain": "https://discussions.apple.com",
    "username_claimed": "jason"
  },
+  "Aparat": {
+    "errorType": "status_code",
+    "request_method": "GET",
+    "url": "https://www.aparat.com/{}/",
+    "urlMain": "https://www.aparat.com/",
+    "urlProbe": "https://www.aparat.com/api/fa/v1/user/user/information/username/{}",
+    "username_claimed": "jadi"
+  },
  "Archive of Our Own": {
    "errorType": "status_code",
    "regexCheck": "^[^.]*?$",
@@ -564,8 +572,7 @@
    "username_claimed": "brown"
  },
  "CyberDefenders": {
-    "errorMsg": "<title>Blue Team Training for SOC analysts and DFIR - CyberDefenders</title>",
-    "errorType": "message",
+    "errorType": "status_code",
    "regexCheck": "^[^\\/:*?\"<>|@]{3,50}$",
    "request_method": "GET",
    "url": "https://cyberdefenders.org/p/{}",
@@ -592,6 +599,12 @@
    "urlMain": "https://www.dailymotion.com/",
    "username_claimed": "blue"
  },
+  "dcinside": {
+    "errorType": "status_code",
+    "url": "https://gallog.dcinside.com/{}",
+    "urlMain": "https://www.dcinside.com/",
+    "username_claimed": "anrbrb"
+  },
  "Dealabs": {
    "errorMsg": "La page que vous essayez",
    "errorType": "message",
@@ -600,13 +613,14 @@
    "urlMain": "https://www.dealabs.com/",
    "username_claimed": "blue"
  },
-  "DeviantART": {
-    "errorType": "status_code",
-    "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
-    "url": "https://{}.deviantart.com",
-    "urlMain": "https://deviantart.com",
-    "username_claimed": "blue"
-  },
+ "DeviantArt": {
+  "errorType": "message",
+  "errorMsg": "Llama Not Found",
+  "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
+  "url": "https://www.deviantart.com/{}",
+  "urlMain": "https://www.deviantart.com/",
+  "username_claimed": "blue"
+},
  "DigitalSpy": {
      "errorMsg": "The page you were looking for could not be found.",
      "errorType": "message",
@@ -807,14 +821,6 @@
    "urlMain": "https://framapiaf.org",
    "username_claimed": "pylapp"
  },
-  "Freelance.habr": {
-    "errorMsg": "<div class=\"icon_user_locked\"></div>",
-    "errorType": "message",
-    "regexCheck": "^((?!\\.).)*$",
-    "url": "https://freelance.habr.com/freelancers/{}",
-    "urlMain": "https://freelance.habr.com/",
-    "username_claimed": "adam"
-  },
  "Freelancer": {
    "errorMsg": "\"users\":{}",
    "errorType": "message",
@@ -1142,6 +1148,13 @@
    "urlProbe": "https://imginn.com/{}",
    "username_claimed": "instagram"
  },
+  "Instapaper": {
+    "errorType": "status_code",
+    "request_method": "GET",
+    "url": "https://www.instapaper.com/p/{}",
+    "urlMain": "https://www.instapaper.com/",
+    "username_claimed": "john"
+  },
  "Instructables": {
    "errorType": "status_code",
    "url": "https://www.instructables.com/member/{}",
@@ -1254,6 +1267,13 @@
    "urlMain": "https://linux.org.ru/",
    "username_claimed": "red"
  },
+  "Laracast": {
+    "errorType":"status_code",
+    "url": "https://laracasts.com/@{}",
+    "urlMain": "https://laracasts.com/",
+    "regexCheck": "^[a-zA-Z0-9_-]{3,}$",
+    "username_claimed": "user1"
+  },
  "Launchpad": {
    "errorType": "status_code",
    "url": "https://launchpad.net/~{}",
@@ -1426,12 +1446,12 @@
    "username_claimed": "blue"
  },
  "Mydramalist": {
-    "errorMsg": "Sign in - MyDramaList",
-    "errorType": "message",
-    "url": "https://www.mydramalist.com/profile/{}",
-    "urlMain": "https://mydramalist.com",
-    "username_claimed": "elhadidy12398"
-  },
+  "errorMsg": "The requested page was not found",
+  "errorType": "message",
+  "url": "https://www.mydramalist.com/profile/{}",
+  "urlMain": "https://mydramalist.com",
+  "username_claimed": "elhadidy12398"
+},
  "Myspace": {
    "errorType": "status_code",
    "url": "https://myspace.com/{}",
@@ -1445,6 +1465,13 @@
    "urlMain": "https://www.native-instruments.com/forum/",
    "username_claimed": "jambert"
  },
+  "namuwiki": {
+    "__comment__": "This is a Korean site and it's expected to return false negatives in certain other regions.",
+    "errorType": "status_code",
+    "url": "https://namu.wiki/w/%EC%82%AC%EC%9A%A9%EC%9E%90:{}",
+    "urlMain": "https://namu.wiki/",
+    "username_claimed": "namu"
+  },
  "NationStates Nation": {
    "errorMsg": "Was this your nation? It may have ceased to exist due to inactivity, but can rise again!",
    "errorType": "message",
@@ -1558,7 +1585,7 @@
    "url": "https://outgress.com/agents/{}",
    "urlMain": "https://outgress.com/",
    "username_claimed": "pylapp"
-  },  
+  },
  "PCGamer": {
    "errorMsg": "The specified member cannot be found. Please enter a member's entire name.",
    "errorType": "message",
@@ -1795,8 +1822,7 @@
    "username_claimed": "blue"
  },
  "Roblox": {
-    "errorMsg": "Page cannot be found or no longer exists",
-    "errorType": "message",
+    "errorType": "status_code",
    "url": "https://www.roblox.com/user.aspx?username={}",
    "urlMain": "https://www.roblox.com/",
    "username_claimed": "bluewolfekiller"
@@ -1904,7 +1930,7 @@
  },
  "SlideShare": {
    "errorType": "message",
-    "errorMsg": "<title>Username available</title>",
+    "errorMsg": "<title>Page no longer exists</title>",
    "url": "https://slideshare.net/{}",
    "urlMain": "https://slideshare.net/",
    "username_claimed": "blue"
@@ -1938,6 +1964,13 @@
    "urlMain": "https://www.snapchat.com",
    "username_claimed": "teamsnapchat"
  },
+  "SOOP": {
+    "errorType": "status_code",
+    "url": "https://www.sooplive.co.kr/station/{}",
+    "urlMain": "https://www.sooplive.co.kr/",
+    "urlProbe": "https://api-channel.sooplive.co.kr/v1.1/channel/{}/station",
+    "username_claimed": "udkn"
+  },
  "SoundCloud": {
    "errorType": "status_code",
    "url": "https://soundcloud.com/{}",
@@ -2105,6 +2138,12 @@
    "urlMain": "https://themeforest.net/",
    "username_claimed": "user"
  },
+  "tistory": {
+    "errorType": "status_code",
+    "url": "https://{}.tistory.com/",
+    "urlMain": "https://www.tistory.com/",
+    "username_claimed": "notice"
+  },
  "TnAFlix": {
    "errorType": "status_code",
    "isNSFW": true,
@@ -2779,7 +2818,7 @@
    "username_claimed": "green"
  },
  "threads": {
-    "errorMsg": "<title>Threads</title>",
+    "errorMsg": "<title>Threads • Log in</title>",
    "errorType": "message",
    "headers": {
      "Sec-Fetch-Mode": "navigate"
@@ -1,80 +1,149 @@
 {
-    "$schema": "https://json-schema.org/draft/2020-12/schema",
-    "title": "Sherlock Target Manifest",
-    "description": "Social media targets to probe for the existence of known usernames",
-    "type": "object",
-    "properties": {
-        "$schema": { "type": "string" }
-    },
-    "patternProperties": {
-        "^(?!\\$).*?$": {
-            "type": "object",
-            "description": "Target name and associated information (key should be human readable name)",
-            "required": [ "url", "urlMain", "errorType", "username_claimed" ],
-            "properties": {
-                "url": { "type": "string" },
-                "urlMain": { "type": "string" },
-                "urlProbe": { "type": "string" },
-                "username_claimed": { "type": "string" },
-                "regexCheck": { "type": "string" },
-                "isNSFW": { "type": "boolean" },
-                "headers": { "type": "object" },
-                "request_payload": { "type": "object" },
-                "__comment__": {
-                    "type": "string",
-                    "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
-                },
-                "tags": {
-                    "oneOf": [
-                        { "$ref": "#/$defs/tag" },
-                        { "type": "array", "items": { "$ref": "#/$defs/tag" } }
-                    ]
-                },
-                "request_method": {
-                    "type": "string",
-                    "enum": [ "GET", "POST", "HEAD", "PUT" ]
-                },
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "title": "Sherlock Target Manifest",
+  "description": "Social media targets to probe for the existence of known usernames",
+  "type": "object",
+  "properties": {
+    "$schema": { "type": "string" }
+  },
+  "patternProperties": {
+    "^(?!\\$).*?$": {
+      "type": "object",
+      "description": "Target name and associated information (key should be human readable name)",
+      "required": ["url", "urlMain", "errorType", "username_claimed"],
+      "properties": {
+        "url": { "type": "string" },
+        "urlMain": { "type": "string" },
+        "urlProbe": { "type": "string" },
+        "username_claimed": { "type": "string" },
+        "regexCheck": { "type": "string" },
+        "isNSFW": { "type": "boolean" },
+        "headers": { "type": "object" },
+        "request_payload": { "type": "object" },
+        "__comment__": {
+          "type": "string",
+          "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
+        },
+        "tags": {
+          "oneOf": [
+            { "$ref": "#/$defs/tag" },
+            { "type": "array", "items": { "$ref": "#/$defs/tag" } }
+          ]
+        },
+        "request_method": {
+          "type": "string",
+          "enum": ["GET", "POST", "HEAD", "PUT"]
+        },
+        "errorType": {
+          "oneOf": [
+            {
+              "type": "string",
+              "enum": ["message", "response_url", "status_code"]
+            },
+            {
+              "type": "array",
+              "items": {
+                "type": "string",
+                "enum": ["message", "response_url", "status_code"]
+              }
+            }
+          ]
+        },
+        "errorMsg": {
+          "oneOf": [
+            { "type": "string" },
+            { "type": "array", "items": { "type": "string" } }
+          ]
+        },
+        "errorCode": {
+          "oneOf": [
+            { "type": "integer" },
+            { "type": "array", "items": { "type": "integer" } }
+          ]
+        },
+        "errorUrl": { "type": "string" },
+        "response_url": { "type": "string" }
+      },
+      "dependencies": {
+        "errorMsg": {
+          "oneOf": [
+            { "properties": { "errorType": { "const": "message" } } },
+            {
+              "properties": {
                "errorType": {
-                    "type": "string",
-                    "enum": [ "message", "response_url", "status_code" ]
-                },
-                "errorMsg": {
-                    "oneOf": [
-                        { "type": "string" },
-                        { "type": "array", "items": { "type": "string" } }
-                    ]
-                },
-                "errorCode": {
-                    "oneOf": [
-                        { "type": "integer" },
-                        { "type": "array", "items": { "type": "integer" } }
-                    ]
-                },
-                "errorUrl": { "type": "string" },
-                "response_url": { "type": "string" }
-            },
-            "dependencies": {
-                "errorMsg": {
-                    "properties" : { "errorType": { "const": "message" } }
-                },
-                "errorUrl": {
-                    "properties": { "errorType": { "const": "response_url" } }
-                },
-                "errorCode": {
-                    "properties": { "errorType": { "const": "status_code" } }
+                  "type": "array",
+                  "contains": { "const": "message" }
                }
-            },
-            "if": { "properties": { "errorType": { "const": "message" } } },
-            "then": { "required": [ "errorMsg" ] },
-            "else": {
-                "if": { "properties": { "errorType": { "const": "response_url" } } },
-                "then": { "required": [ "errorUrl" ] }
-            },
-            "additionalProperties": false
+              }
+            }
+          ]
+        },
+        "errorUrl": {
+          "oneOf": [
+            { "properties": { "errorType": { "const": "response_url" } } },
+            {
+              "properties": {
+                "errorType": {
+                  "type": "array",
+                  "contains": { "const": "response_url" }
+                }
+              }
+            }
+          ]
+        },
+        "errorCode": {
+          "oneOf": [
+            { "properties": { "errorType": { "const": "status_code" } } },
+            {
+              "properties": {
+                "errorType": {
+                  "type": "array",
+                  "contains": { "const": "status_code" }
+                }
+              }
+            }
+          ]
        }
-    },
-    "additionalProperties": false,
-    "$defs": {
-        "tag": { "type": "string", "enum": [ "adult", "gaming" ] }
+      },
+      "allOf": [
+        {
+          "if": {
+            "anyOf": [
+              { "properties": { "errorType": { "const": "message" } } },
+              {
+                "properties": {
+                  "errorType": {
+                    "type": "array",
+                    "contains": { "const": "message" }
+                  }
+                }
+              }
+            ]
+          },
+          "then": { "required": ["errorMsg"] }
+        },
+        {
+          "if": {
+            "anyOf": [
+              { "properties": { "errorType": { "const": "response_url" } } },
+              {
+                "properties": {
+                  "errorType": {
+                    "type": "array",
+                    "contains": { "const": "response_url" }
+                  }
+                }
+              }
+            ]
+          },
+          "then": { "required": ["errorUrl"] }
+        }
+      ],
+      "additionalProperties": false
    }
+  },
+  "additionalProperties": false,
+  "$defs": {
+    "tag": { "type": "string", "enum": ["adult", "gaming"] }
+  }
 }
@@ -381,6 +381,8 @@ def sherlock(

        # Get the expected error type
        error_type = net_info["errorType"]
+        if isinstance(error_type, str):
+            error_type: list[str] = [error_type]

        # Retrieve future and ensure it has finished
        future = net_info["request_future"]
@@ -425,58 +427,60 @@ def sherlock(
        elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
            query_status = QueryStatus.WAF

-        elif error_type == "message":
-            # error_flag True denotes no error found in the HTML
-            # error_flag False denotes error found in the HTML
-            error_flag = True
-            errors = net_info.get("errorMsg")
-            # errors will hold the error message
-            # it can be string or list
-            # by isinstance method we can detect that
-            # and handle the case for strings as normal procedure
-            # and if its list we can iterate the errors
-            if isinstance(errors, str):
-                # Checks if the error message is in the HTML
-                # if error is present we will set flag to False
-                if errors in r.text:
-                    error_flag = False
-            else:
-                # If it's list, it will iterate all the error message
-                for error in errors:
-                    if error in r.text:
-                        error_flag = False
-                        break
-            if error_flag:
-                query_status = QueryStatus.CLAIMED
-            else:
-                query_status = QueryStatus.AVAILABLE
-        elif error_type == "status_code":
-            error_codes = net_info.get("errorCode")
-            query_status = QueryStatus.CLAIMED
-
-            # Type consistency, allowing for both singlets and lists in manifest
-            if isinstance(error_codes, int):
-                error_codes = [error_codes]
-
-            if error_codes is not None and r.status_code in error_codes:
-                query_status = QueryStatus.AVAILABLE
-            elif r.status_code >= 300 or r.status_code < 200:
-                query_status = QueryStatus.AVAILABLE
-        elif error_type == "response_url":
-            # For this detection method, we have turned off the redirect.
-            # So, there is no need to check the response URL: it will always
-            # match the request.  Instead, we will ensure that the response
-            # code indicates that the request was successful (i.e. no 404, or
-            # forward to some odd redirect).
-            if 200 <= r.status_code < 300:
-                query_status = QueryStatus.CLAIMED
-            else:
-                query_status = QueryStatus.AVAILABLE
        else:
-            # It should be impossible to ever get here...
-            raise ValueError(
-                f"Unknown Error Type '{error_type}' for " f"site '{social_network}'"
-            )
+            if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
+                error_context = f"Unknown error type '{error_type}' for {social_network}"
+                query_status = QueryStatus.UNKNOWN
+            else:
+                if "message" in error_type:
+                    # error_flag True denotes no error found in the HTML
+                    # error_flag False denotes error found in the HTML
+                    error_flag = True
+                    errors = net_info.get("errorMsg")
+                    # errors will hold the error message
+                    # it can be string or list
+                    # by isinstance method we can detect that
+                    # and handle the case for strings as normal procedure
+                    # and if its list we can iterate the errors
+                    if isinstance(errors, str):
+                        # Checks if the error message is in the HTML
+                        # if error is present we will set flag to False
+                        if errors in r.text:
+                            error_flag = False
+                    else:
+                        # If it's list, it will iterate all the error message
+                        for error in errors:
+                            if error in r.text:
+                                error_flag = False
+                                break
+                    if error_flag:
+                        query_status = QueryStatus.CLAIMED
+                    else:
+                        query_status = QueryStatus.AVAILABLE
+
+                if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
+                    error_codes = net_info.get("errorCode")
+                    query_status = QueryStatus.CLAIMED
+
+                    # Type consistency, allowing for both singlets and lists in manifest
+                    if isinstance(error_codes, int):
+                        error_codes = [error_codes]
+
+                    if error_codes is not None and r.status_code in error_codes:
+                        query_status = QueryStatus.AVAILABLE
+                    elif r.status_code >= 300 or r.status_code < 200:
+                        query_status = QueryStatus.AVAILABLE
+
+                if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
+                    # For this detection method, we have turned off the redirect.
+                    # So, there is no need to check the response URL: it will always
+                    # match the request.  Instead, we will ensure that the response
+                    # code indicates that the request was successful (i.e. no 404, or
+                    # forward to some odd redirect).
+                    if 200 <= r.status_code < 300:
+                        query_status = QueryStatus.CLAIMED
+                    else:
+                        query_status = QueryStatus.AVAILABLE

        if dump_response:
            print("+++++++++++++++++++++")
@@ -720,11 +724,11 @@ def main():
    )

    parser.add_argument(
-        "--no-txt",
+        "--txt",
        action="store_true",
-        dest="no_txt",
+        dest="output_txt",
        default=False,
-        help="Disable creation of a txt file",
+        help="Enable creation of a txt file",
    )

    parser.add_argument(
@@ -742,7 +746,7 @@ def main():

    # Check for newer version of Sherlock. If it exists, let the user know about it
    try:
-        latest_release_raw = requests.get(forge_api_latest_release).text
+        latest_release_raw = requests.get(forge_api_latest_release, timeout=10).text
        latest_release_json = json_loads(latest_release_raw)
        latest_remote_tag = latest_release_json["tag_name"]

@@ -802,7 +806,7 @@ def main():
                if args.json_file.isnumeric():
                    pull_number = args.json_file
                    pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}"
-                    pull_request_raw = requests.get(pull_url).text
+                    pull_request_raw = requests.get(pull_url, timeout=10).text
                    pull_request_json = json_loads(pull_request_raw)

                    # Check if it's a valid pull request
@@ -888,7 +892,7 @@ def main():
        else:
            result_file = f"{username}.txt"

-        if not args.no_txt:
+        if args.output_txt:
            with open(result_file, "w", encoding="utf-8") as file:
                exists_counter = 0
                for website_name in results:
@@ -129,7 +129,7 @@ class SitesInformation:
        if data_file_path.lower().startswith("http"):
            # Reference is to a URL.
            try:
-                response = requests.get(url=data_file_path)
+                response = requests.get(url=data_file_path, timeout=30)
            except Exception as error:
                raise FileNotFoundError(
                    f"Problem while attempting to access data file URL '{data_file_path}':  {error}"
@@ -166,7 +166,7 @@ class SitesInformation:

        if honor_exclusions:
            try:
-                response = requests.get(url=EXCLUSIONS_URL)
+                response = requests.get(url=EXCLUSIONS_URL, timeout=10)
                if response.status_code == 200:
                    exclusions = response.text.splitlines()
                    exclusions = [exclusion.strip() for exclusion in exclusions]
@@ -4,9 +4,9 @@ import urllib
 import pytest
 from sherlock_project.sites import SitesInformation

-def fetch_local_manifest() -> dict[str, dict[str, str]]:
-    sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
-    sites_iterable = {site.name: site.information for site in sites_obj}
+def fetch_local_manifest(honor_exclusions: bool = True) -> dict[str, dict[str, str]]:
+    sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"), honor_exclusions=honor_exclusions)
+    sites_iterable: dict[str, dict[str, str]] = {site.name: site.information for site in sites_obj}
    return sites_iterable

@pytest.fixture()
@@ -25,9 +25,27 @@ def remote_schema():
        schemadat = json.load(remoteschema)
    yield schemadat

+def pytest_addoption(parser):
+    parser.addoption(
+        "--chunked-sites",
+        action="store",
+        default=None,
+        help="For tests utilizing chunked sites, include only the (comma-separated) site(s) specified.",
+    )
+
 def pytest_generate_tests(metafunc):
    if "chunked_sites" in metafunc.fixturenames:
-        sites_info = fetch_local_manifest()
+        sites_info = fetch_local_manifest(honor_exclusions=False)
+
+        # Ingest and apply site selections
+        site_filter: str | None = metafunc.config.getoption("--chunked-sites")
+        if site_filter:
+            selected_sites: list[str] = [site.strip() for site in site_filter.split(",")]
+            sites_info = {
+                site: data for site, data in sites_info.items()
+                if site in selected_sites
+            }
+
        params = [{name: data} for name, data in sites_info.items()]
        ids = list(sites_info.keys())
        metafunc.parametrize("chunked_sites", params, ids=ids)
Author	SHA1	Message	Date
Paul Pfeister	3926e85cf7	chore: remove deprecated --no-txt	2025-10-04 20:37:19 -04:00
Paul Pfeister	4246a7b16f	chore: make default --no-txt Workflows where a txt file is still required should use --txt	2025-10-04 20:32:16 -04:00
Paul Pfeister	52cd5fdfc1	feat: gracefully skip sites with invalid errorType	2025-10-04 20:22:34 -04:00
Paul Pfeister	947f1ad2b6	Merge pull request #2574 from dollaransh17/fix/http-request-timeouts Security Fix: Add timeout parameters to HTTP requests	2025-10-04 18:42:13 -04:00
Paul Pfeister	cfcc82aaca	Merge pull request #2597 from sherlock-project/feat/multiple-types Support multiple errorType checks	2025-10-04 17:21:26 -04:00
Paul Pfeister	0794e02b52	feat: support multiple errorTypes	2025-10-04 16:53:30 -04:00
Paul Pfeister	975965abed	Merge pull request #2589 from dollaransh17/fix/threads-false-positive fix(sites): Fix Threads false positive detection	2025-10-04 15:44:04 -04:00
Paul Pfeister	a678bed154	Merge pull request #2587 from akh7177/remediate-cyberdefenders-fp fix(sites): Remediate False Positives for CyberDefenders	2025-10-04 15:43:48 -04:00
Paul Pfeister	4ec6f1eec0	Merge pull request #2585 from akh7177/remediate-slideshare-fp fix(sites): Remediate False Positive for SlideShare	2025-10-04 15:43:36 -04:00
Paul Pfeister	d1527376e7	Merge pull request #2584 from akh7177/remediate-roblox-fp fix(sites): Remediate False Positive for Roblox	2025-10-04 15:43:29 -04:00
dollaransh17	dc869852bc	fix(sites): Fix Threads false positive detection Threads was showing false positives for non-existent users because the error message detection was incorrect. Updated errorMsg: - Old: "<title>Threads</title>" (generic, matches valid pages too) - New: "<title>Threads • Log in</title>" (specific to non-existent users) When a user doesn't exist, Threads redirects to a login page with the title "Threads • Log in". Valid user profiles have titles like "Username (@username) • Threads, Say more". Tested with: - Invalid user (impossibleuser12345): Correctly not found - Valid user (zuck): Correctly found This fixes the false positive issue where non-existent Threads profiles were being reported as found.	2025-10-04 17:22:50 +05:30
Abhyuday K Hegde	5cd769c2f4	Remediate False Positives for CyberDefenders	2025-10-04 15:12:20 +05:30
Abhyuday K Hegde	977ad5c1a4	Remediate False Positive for SlideShare	2025-10-04 14:48:37 +05:30
Abhyuday K Hegde	57a0ccef38	Remediate False Positive for Roblox	2025-10-04 14:30:40 +05:30
dollaransh17	0e7219b191	Security Fix: Add timeout parameters to HTTP requests This fix addresses a critical security vulnerability where HTTP requests could hang indefinitely, potentially causing denial of service. Changes: - Added 10-second timeout to version check API call - Added 10-second timeout to GitHub pull request API call - Added 30-second timeout to data file downloads (larger timeout for data) - Added 10-second timeout to exclusions list download Impact: - Prevents infinite hangs that could freeze the application - Improves user experience with predictable response times - Fixes security issue flagged by Bandit static analysis (B113) - Makes the application more robust in poor network conditions The timeouts are conservative enough to work with slow connections while preventing indefinite blocking that could be exploited.	2025-10-03 13:41:43 +05:30
Paul Pfeister	1d2c4b134f	Merge pull request #2570 from shreyasNaik0101/fix/remediate-applediscussions fix(sites): Remediate false positive for Apple Discussions	2025-10-02 20:30:57 -04:00
shreyasNaik0101	b245c462c9	fix(sites): Remediate false positive for Apple Discussions	2025-10-03 05:56:52 +05:30
Paul Pfeister	66d9733da7	Merge pull request #2565 from shreyasNaik0101/fix/remediate-mydramalist fix(sites): Remediate false positive for Mydramalist	2025-10-02 19:40:47 -04:00
Paul Pfeister	c55deab3a2	Merge pull request #2561 from shreyasNaik0101/fix/remediate-deviantart fix(sites): Remediate false positive for DeviantArt	2025-10-02 19:37:00 -04:00
Paul Pfeister	edcb697793	Merge pull request #2564 from shreyasNaik0101/fix/remediate-allmylinks fix(sites): Remediate false positive for AllMyLinks	2025-10-02 19:36:43 -04:00
shreyasNaik0101	d314d75db1	fix(sites): Remediate false positive for Mydramalist	2025-10-03 04:43:05 +05:30
shreyasNaik0101	c89a52caf7	fix(sites): Remediate false positive for AllMyLinks	2025-10-03 04:25:46 +05:30
Paul Pfeister	9c18cfe273	Merge pull request #2563 from sherlock-project/chore/update-co chore: update code owners	2025-10-02 18:25:59 -04:00
shreyasNaik0101	779d4c33f4	fix: Remove username_unclaimed as requested	2025-10-03 03:55:03 +05:30
Paul Pfeister	072c24687b	Merge pull request #2558 from hanjm-github/master feat: Add some popular website in Korea	2025-10-02 18:22:42 -04:00
Paul Pfeister	b811b2bd47	chore: update code owners	2025-10-02 18:21:20 -04:00
shreyasNaik0101	355bfbd328	fix(sites): Remediate false positive for DeviantArt	2025-10-03 00:42:07 +05:30
JongMyeong HAN	7b3632bdad	Add comment to site 'namuwiki' Co-authored-by: Paul Pfeister <code@pfeister.dev>	2025-10-03 04:00:41 +09:00
JongMyeong HAN	cd7c52e4fa	Feat: Add tistory	2025-10-01 00:44:55 +09:00
JongMyeong HAN	86140af50e	feat: Add SOOP	2025-10-01 00:44:02 +09:00
JongMyeong HAN	e5cd5e5bfe	feat: Add namuwiki	2025-10-01 00:43:21 +09:00
JongMyeong HAN	dc89f1cd27	feat: Add dcinside	2025-10-01 00:41:23 +09:00
Paul Pfeister	388a1e06d4	Merge pull request #2459 from kareemeldahshoury/Issue#2442 Fix Issue #2442: Added support for Aparat	2025-09-20 20:47:37 -04:00
Paul Pfeister	61eeeb7876	Merge branch 'master' into Issue#2442	2025-09-20 20:45:09 -04:00
Paul Pfeister	df7da4288c	fix(ci): scoping	2025-09-20 20:44:38 -04:00
Paul Pfeister	70896f1da4	Merge branch 'master' into Issue#2442	2025-09-20 20:26:14 -04:00
Paul Pfeister	0a38cad926	fix(ci): issue write permission	2025-09-20 20:24:41 -04:00
Paul Pfeister	1e38fb6f7b	Merge branch 'master' into Issue#2442	2025-09-20 20:21:48 -04:00
Paul Pfeister	9b3dc3e581	fix(ci): issue write permission	2025-09-20 20:21:28 -04:00
Paul Pfeister	37b30602fd	Merge branch 'master' into Issue#2442	2025-09-20 20:12:21 -04:00
Paul Pfeister	7afdee4c58	fix: incorrect method	2025-09-20 20:09:44 -04:00
Paul Pfeister	d4d8e01e31	chore: remove dead site Fixes: #2433	2025-09-20 19:45:34 -04:00
Paul Pfeister	e5e0da00fe	Merge pull request #2549 from sherlock-project/add/instapaper feat: add instapaper	2025-09-20 18:13:30 -04:00
Paul Pfeister	dc61cdc7a4	chore: set request method	2025-09-20 18:10:33 -04:00
Paul Pfeister	0fa2e1afc7	chore: cleanup everything	2025-09-20 18:09:44 -04:00
Paul Pfeister	7ca90ba728	ci: test result summarization	2025-09-20 18:06:25 -04:00
Paul Pfeister	cd6fa5bb30	ci: fix the thing	2025-09-20 18:04:42 -04:00
Paul Pfeister	fa05641661	ci: improve validation	2025-09-20 17:43:00 -04:00
Paul Pfeister	97ba4e8616	fix(ci): validation issue	2025-09-20 15:39:01 -04:00
Paul Pfeister	9882478fb5	feat: add instapaper	2025-09-20 15:05:44 -04:00
Paul Pfeister	9f5b7e1846	fix(validation ci): parsing and presentation	2025-09-20 15:02:43 -04:00
Paul Pfeister	05afac7082	Merge pull request #2548 from sherlock-project/feature/automatic-testing Automatically test modified targets upon PR	2025-09-20 14:47:38 -04:00
Paul Pfeister	ae362b0f02	ci: automatically validate modified targets on pr	2025-09-20 14:44:19 -04:00
Paul Pfeister	435540606e	chore: add typedef	2025-09-20 13:49:29 -04:00
Paul Pfeister	96aa12c140	Merge pull request #2546 from rezocrypt/add-laracast-support Added Laracast support	2025-09-20 13:38:21 -04:00
My Name	9560355a7c	Added Laracast support	2025-09-18 10:23:09 +04:00
Paul Pfeister	b44ac231c1	chore: move SSOT to pyproject.toml Co-authored-by: ByteXenon <125568681+ByteXenon@users.noreply.github.com>	2025-09-17 17:47:45 -04:00
Paul Pfeister	7ff3924f0b	ci(exclusions): ensure unstaging and removal of tmp	2025-09-17 17:17:49 -04:00
Paul Pfeister	39c3729524	ci(exclusions): fix loss of untracked list	2025-09-17 14:09:15 -04:00
Paul Pfeister	faddcbd15f	ci(exclusions): fix loss of untracked list	2025-09-17 14:03:51 -04:00
Paul Pfeister	78a2d309d1	ci(exclusions): fix loss of untracked list	2025-09-17 13:55:42 -04:00
Paul Pfeister	35940e7584	fix: ignore exclusions list on parameterization for false positive tests	2025-09-17 13:44:02 -04:00
kareemeldahshoury	de81f38622	Fix Issue #2442 : Added support for Aparat	2025-04-29 15:25:31 -05:00