auto: update exclusions list

2026-05-09 05:59:06 +00:00 · 2026-05-08 05:48:42 +00:00 · 2026-05-07 06:08:05 +00:00 · 2026-05-06 06:06:07 +00:00 · 2026-05-05 05:59:06 +00:00 · 2026-05-04 06:07:50 +00:00
54 changed files with 40 additions and 8932 deletions
@@ -1,19 +0,0 @@
 FROM sherlock/sherlock as sherlock
 # Install Node.js
 RUN apt-get update; apt-get install curl gpg -y
 RUN mkdir -p /etc/apt/keyrings
 RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
 RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list
 RUN apt-get update && apt-get install -y curl bash git jq jo xz-utils nodejs
 # Install Apify CLI (node.js) for the Actor Runtime
 RUN npm -g install apify-cli
 # Install Dependencies for the Actor Shell Script
 RUN apt-get update && apt-get install -y bash jq jo xz-utils nodejs
 # Copy Actor dir with the actorization shell script
 COPY .actor/ .actor
 ENTRYPOINT [".actor/actor.sh"]
@@ -1,93 +0,0 @@
 # Sherlock Actor on Apify
 [![Sherlock Actor](https://apify.com/actor-badge?actor=netmilk/sherlock)](https://apify.com/netmilk/sherlock?fpr=sherlock)
 This Actor wraps the [Sherlock Project](https://sherlockproject.xyz/) to provide serverless username reconnaissance across social networks in the cloud. It helps you find usernames across multiple social media platforms without installing and running the tool locally.
 ## What are Actors?
 [Actors](https://docs.apify.com/platform/actors?fpr=sherlock) are serverless microservices running on the [Apify Platform](https://apify.com/?fpr=sherlock). They are based on the [Actor SDK](https://docs.apify.com/sdk/js?fpr=sherlock) and can be found in the [Apify Store](https://apify.com/store?fpr=sherlock). Learn more about Actors in the [Apify Whitepaper](https://whitepaper.actor?fpr=sherlock).
 ## Usage
 ### Apify Console
 1. Go to the Apify Actor page
 2. Click "Run"
 3. In the input form, fill in **Username(s)** to search for
 4. The Actor will run and produce its outputs in the default datastore
 ### Apify CLI
 ```bash
 apify call YOUR_USERNAME/sherlock --input='{
  "usernames": ["johndoe", "janedoe"]
 }'
 ```
 ### Using Apify API
 ```bash
 curl --request POST \
  --url "https://api.apify.com/v2/acts/YOUR_USERNAME~sherlock/run" \
  --header 'Content-Type: application/json' \
  --header 'Authorization: Bearer YOUR_API_TOKEN' \
  --data '{
  "usernames": ["johndoe", "janedoe"],
  }
 }'
 ```
 ## Input Parameters
 The Actor accepts a JSON schema with the following structure:
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `usernames` | array | Yes | - | List of usernames to search for |
 | `usernames[]` | string | Yes | "json" | Username to search for |
 ### Example Input
 ```json
 {
  "usernames": ["techuser", "designuser"],
 }
 ```
 ## Output
 The Actor provides three types of outputs:
 ### Dataset Record*
 | Field | Type | Required | Description |
 |-------|------|----------|-------------|
 | `username` | string | Yes | Username the search was conducted for |
 | `links` | array | Yes | Array with found links to the social media |
 | `links[]`| string | No | URL to the account
 ### Example Dataset Item (JSON)
 ```json
 {
  "username": "johndoe",
  "links": [
    "https://github.com/johndoe" 
  ]
 }
 ```
 ## Performance & Resources
 - **Memory Requirements**:
  - Minimum: 512 MB RAM
  - Recommended: 1 GB RAM for multiple usernames
 - **Processing Time**:
  - Single username: ~1-2 minutes
  - Multiple usernames: 2-5 minutes
  - Varies based on number of sites checked and response times
 For more help, check the [Sherlock Project documentation](https://github.com/sherlock-project/sherlock) or raise an issue in the Actor's repository.
@@ -1,13 +0,0 @@
 {
  "actorSpecification": 1,
  "name": "sherlock",
  "version": "0.0",
  "buildTag": "latest",
  "environmentVariables": {},
  "dockerFile": "./Dockerfile", 
  "dockerContext": "../",
  "input": "./input_schema.json",
  "storages": {
    "dataset": "./dataset_schema.json"
  }
 }
@@ -1,14 +0,0 @@
 #!/bin/bash
 INPUT=`apify actor:get-input | jq -r .usernames[] | xargs echo`
 echo "INPUT: $INPUT"
 sherlock $INPUT
 for username in $INPUT; do
  # escape the special meaning leading characters 
  # https://github.com/jpmens/jo/blob/master/jo.md#description
  safe_username=$(echo $username | sed 's/^@/\\@/' | sed 's/^:/\\:/' | sed 's/%/\\%/')
  echo "pushing results for username: $username, content:"
  cat $username.txt
  sed '$d' $username.txt | jo -a | jo username=$safe_username links:=- | apify actor:push-data
 done
@@ -1,45 +0,0 @@
 {
    "actorSpecification": 1,
    "fields":{
      "title": "Sherlock actor input",
      "description": "This is actor input schema",
      "type": "object",
      "schemaVersion": 1,
      "properties": {
        "links": {
          "title": "Links to accounts",
          "type": "array",
          "description": "A list of social media accounts found for the uername"
        },
        "username": {
          "title": "Lookup username",
          "type": "string",
          "description": "Username the lookup was performed for"
        }
      },
      "required": [
        "username", 
        "links"
      ]
    },
    "views": {
        "overview": {
            "title": "Overview",
            "transformation": {
              "fields": [
                "username",
                "links"
              ],
            },
            "display": {
               "component": "table",
               "links": {
                 "label": "Links"
               },
               "username":{
                 "label": "Username"
               }
            }
        }
    }
 }
@@ -1,18 +0,0 @@
 {
  "title": "Sherlock actor input",
  "description": "This is actor input schema",
  "type": "object",
  "schemaVersion": 1,
  "properties": {
    "usernames": {
      "title": "Usernames to hunt down",
      "type": "array",
      "description": "A list of usernames to be checked for existence across social media",
      "editor": "stringList",
      "prefill": ["johndoe"]
    }
  },
  "required": [
    "usernames"
  ]
 }
@@ -1,8 +0,0 @@
 .git/
 .vscode/
 screenshot/
 tests/
 *.txt
 !/requirements.txt
 venv/
 devel/
@@ -1,18 +0,0 @@
 root = true
 [*]
 indent_style = space
 indent_size = 2
 end_of_line = lf
 charset = utf-8
 trim_trailing_whitespace = true
 insert_final_newline = true
 curly_bracket_next_line = false
 spaces_around_operators = true
 [*.{markdown,md}]
 trim_trailing_whitespace = false
 [*.py]
 indent_size = 4
 quote_type = double
@@ -1,15 +0,0 @@
 ### REPOSITORY
 /.github/CODEOWNERS @sdushantha @ppfeister
 /.github/FUNDING.yml @sdushantha
 /LICENSE @sdushantha
 ### PACKAGING
 # Changes made to these items without code owner approval may negatively
 # impact packaging pipelines.
 /pyproject.toml @ppfeister @sdushantha
 ### REGRESSION
 /.github/workflows/regression.yml @ppfeister
 /tox.ini @ppfeister
 /pytest.ini @ppfeister
 /tests/ @ppfeister
@@ -1 +0,0 @@
 github: [ sdushantha, ppfeister, matheusfelipeog ]
@@ -1,71 +0,0 @@
 name: Bug report
 description: File a bug report
 labels: ["bug"]
 body:
  - type: dropdown
    id: package
    attributes:
      label: Installation method
      description: |
        Some packages are maintained by the community, rather than by the Sherlock Project.
        Knowing which packages are affected helps us diagnose package-specific bugs.
      options:
        - Select one
        - PyPI (via pip)
        - Homebrew
        - Docker
        - Kali repository (via apt)
        - Built from source
        - Other (indicate below)
    validations:
      required: true
  - type: input
    id: package-version
    attributes:
      label: Package version
      description: |
        Knowing the version of the package you are using can help us diagnose your issue more quickly.
        You can find the version by running `sherlock --version`.
    validations:
      required: true
  - type: textarea
    id: description
    attributes:
      label: Description
      description: |
        Detailed descriptions that help contributors understand and reproduce your bug are much more likely to lead to a fix.
        Please include the following information:
        - What you were trying to do
        - What you expected to happen
        - What actually happened
      placeholder: |
        When doing {action}, the expected result should be {expected result}.
        When doing {action}, however, the actual result was {actual result}.
        This is undesirable because {reason}.
    validations:
      required: true
  - type: textarea
    id: steps-to-reproduce
    attributes:
      label: Steps to reproduce
      description: Write a step by step list that will allow us to reproduce this bug.
      placeholder: |
        1. Do something
        2. Then do something else
    validations:
      required: true
  - type: textarea
    id: additional-info
    attributes:
      label: Additional information
      description: If you have some additional information, please write it here.
    validations:
      required: false
  - type: checkboxes
    id: terms
    attributes:
      label: Code of Conduct
      description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/sherlock-project/sherlock/blob/master/docs/CODE_OF_CONDUCT.md). 
      options:
        - label: I agree to follow this project's Code of Conduct
          required: true
@@ -1 +0,0 @@
 blank_issues_enabled: false
@@ -1,27 +0,0 @@
 name: False negative
 description: Report a site that is returning false negative results
 title: "False negative for: "
 labels: ["false negative"]
 body:
  - type: markdown
    attributes:
      value: |
        Please include the site name in the title of your issue.
        Submit **one site per report** for faster resolution. If you have multiple sites in the same report, it often takes longer to fix.
  - type: textarea
    id: additional-info
    attributes:
      label: Additional info
      description: If you know why the site is returning false negatives, or noticed any patterns, please explain.
      placeholder: |
        Reddit is returning false negatives because...
    validations:
      required: false
  - type: checkboxes
    id: terms
    attributes:
      label: Code of Conduct
      description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/sherlock-project/sherlock/blob/master/docs/CODE_OF_CONDUCT.md). 
      options:
        - label: I agree to follow this project's Code of Conduct
          required: true
@@ -1,28 +0,0 @@
 name: False positive
 description: Report a site that is returning false positive results
 title: "False positive for: "
 labels: ["false positive"]
 body:
  - type: markdown
    attributes:
      value: |
        Please include the site name in the title of your issue.
        Submit **one site per report** for faster resolution. If you have multiple sites in the same report, it often takes longer to fix.
  - type: textarea
    id: additional-info
    attributes:
      label: Additional info
      description: If you know why the site is returning false positives, or noticed any patterns, please explain.
      placeholder: |
        Reddit is returning false positives because...
        False positives only occur after x searches...
    validations:
      required: false
  - type: checkboxes
    id: terms
    attributes:
      label: Code of Conduct
      description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/sherlock-project/sherlock/blob/master/docs/CODE_OF_CONDUCT.md). 
      options:
        - label: I agree to follow this project's Code of Conduct
          required: true
@@ -1,24 +0,0 @@
 name: Feature request
 description: Request a feature or enhancement
 labels: ["enhancement"]
 body:
  - type: markdown
    attributes:
      value: |
        Concise and thoughtful titles help other contributors find and add your requested feature.
  - type: textarea
    id: description
    attributes:
      label: Description
      description: Describe the feature you are requesting
      placeholder: I'd like Sherlock to be able to do xyz
    validations:
      required: true
  - type: checkboxes
    id: terms
    attributes:
      label: Code of Conduct
      description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/sherlock-project/sherlock/blob/master/docs/CODE_OF_CONDUCT.md). 
      options:
        - label: I agree to follow this project's Code of Conduct
          required: true
@@ -1,35 +0,0 @@
 name: Reuest a new website
 description: Request that Sherlock add support for a new website
 title: "Requesting support for: "
 labels: ["site support request"]
 body:
  - type: markdown
    attributes:
      value: |
        Ensure that the site name is in the title of your request. Requests without this information will be **closed**.
  - type: input
    id: site-url
    attributes:
      label: Site URL
      description: |
        What is the URL of the website indicated in your title?
        Websites sometimes have similar names. This helps constributors find the correct site.
      placeholder: https://reddit.com
    validations:
      required: true
  - type: textarea
    id: additional-info
    attributes:
      label: Additional info
      description: If you have suggestions on how Sherlock should detect for usernames, please explain below
      placeholder: Sherlock can detect if a username exists on Reddit by checking for...
    validations:
      required: false
  - type: checkboxes
    id: terms
    attributes:
      label: Code of Conduct
      description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/sherlock-project/sherlock/blob/master/docs/CODE_OF_CONDUCT.md). 
      options:
        - label: I agree to follow this project's Code of Conduct
          required: true
@@ -1,11 +0,0 @@
 ## Security Policy
 ### Supported Versions
 Sherlock is a forward looking project. Only the latest and most current version is supported.
 ### Reporting a Vulnerability
 Security concerns can be submitted [__here__][report-url] without risk of exposing sensitive information. For issues that are low severity or unlikely to see exploitation, public issues are often acceptable.
 [report-url]: https://github.com/sherlock-project/sherlock/security/advisories/new
@@ -1,89 +0,0 @@
 name: Exclusions Updater
 on:
  schedule:
    #- cron: '0 5 * * 0'  # Runs at 05:00 every Sunday
    - cron: '0 5 * * *' # Runs at 05:00 every day
  workflow_dispatch:
 jobs:
  update-exclusions:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v5
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: '3.13'
      - name: Install Poetry
        uses: abatilo/actions-poetry@v4
        with:
          poetry-version: 'latest'
      - name: Install dependencies
        run: |
          poetry install --no-interaction --with dev
      - name: Run false positive tests
        run: |
          $(poetry env activate)
          pytest -q --tb no -m validate_targets_fp -n 20 | tee fp_test_results.txt
          deactivate
      - name: Parse false positive detections by desired categories
        run: |
          grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was Claimed)' fp_test_results.txt \
            | sort -u > false_positive_exclusions.txt
          grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was WAF)' fp_test_results.txt \
            | sort -u > waf_hits.txt
      - name: Detect if exclusions list changed
        id: detect_changes
        run: |
          git fetch origin exclusions || true
          if git show origin/exclusions:false_positive_exclusions.txt >/dev/null 2>&1; then
            # If the exclusions branch and file exist, compare
            if git diff --quiet origin/exclusions -- false_positive_exclusions.txt; then
              echo "exclusions_changed=false" >> "$GITHUB_OUTPUT"
            else
              echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
            fi
          else
            # If the exclusions branch or file do not exist, treat as changed
            echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
          fi
      - name: Quantify and display results
        run: |
          FP_COUNT=$(wc -l < false_positive_exclusions.txt | xargs)
          WAF_COUNT=$(wc -l < waf_hits.txt | xargs)
          echo ">>> Found $FP_COUNT false positives and $WAF_COUNT WAF hits."
          echo ">>> False positive exclusions:" && cat false_positive_exclusions.txt
          echo ">>> WAF hits:" && cat waf_hits.txt
      - name: Commit and push exclusions list
        if: steps.detect_changes.outputs.exclusions_changed == 'true'
        run: |
          git config user.name "Paul Pfeister (automation)"
          git config user.email "code@pfeister.dev"
          mv false_positive_exclusions.txt false_positive_exclusions.txt.tmp
          git add -f false_positive_exclusions.txt.tmp # -f required to override .gitignore
          git stash push -m "stash false positive exclusion list" -- false_positive_exclusions.txt.tmp
          git fetch origin exclusions || true # Allows creation of branch if deleted
          git checkout -B exclusions origin/exclusions || (git checkout --orphan exclusions && git rm -rf .)
          git stash pop || true
          mv false_positive_exclusions.txt.tmp false_positive_exclusions.txt
          git rm -f false_positive_exclusions.txt.tmp || true
          git add false_positive_exclusions.txt
          git commit -m "auto: update exclusions list" || echo "No changes to commit"
          git push origin exclusions
@@ -1,94 +0,0 @@
 name: Regression Testing
 on:
  pull_request:
    branches:
      - master
      - release/**
    paths:
      - '.github/workflows/regression.yml'
      - '**/*.json'
      - '**/*.py'
      - '**/*.ini'
      - '**/*.toml'
      - 'Dockerfile'
  push:
    branches:
      - master
      - release/**
    paths:
      - '.github/workflows/regression.yml'
      - '**/*.json'
      - '**/*.py'
      - '**/*.ini'
      - '**/*.toml'
      - 'Dockerfile'
 jobs:
  tox-lint:
    runs-on: ubuntu-latest
    # Linting is run through tox to ensure that the same linter
    # is used by local runners
    steps:
      - uses: actions/checkout@v6
      - name: Set up linting environment
        uses: actions/setup-python@v6
        with:
          python-version: '3.x'
      - name: Install tox and related dependencies
        run: |
          python -m pip install --upgrade pip
          pip install tox
      - name: Run tox linting environment
        run: tox -e lint
  tox-matrix:
    runs-on: ${{ matrix.os }}
    strategy:
      # We want to know what specific versions it fails on
      fail-fast: false
      matrix:
        os: [
          ubuntu-latest,
          windows-latest,
          macos-latest,
        ]
        python-version: [
          '3.10',
          '3.11',
          '3.12',
          '3.13',
          '3.14',
          '3.14t',
        ]
    steps:
      - uses: actions/checkout@v6
      - name: Set up environment ${{ matrix.python-version }}
        uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install tox and related dependencies
        run: |
          python -m pip install --upgrade pip
          pip install tox
          pip install tox-gh-actions
      - name: Run tox
        run: tox
  docker-build-test:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Get version from pyproject.toml
        id: get-version
        run: |
          VERSION=$(grep -m1 'version = ' pyproject.toml | cut -d'"' -f2)
          echo "version=$VERSION" >> $GITHUB_OUTPUT
      - name: Build Docker image
        run: |
          docker build \
            --build-arg VERSION_TAG=${{ steps.get-version.outputs.version }} \
            -t sherlock-test:latest .
      - name: Test Docker image runs
        run: docker run --rm sherlock-test:latest --version
@@ -1,46 +0,0 @@
 name: Update Site List
 # Trigger the workflow when changes are pushed to the main branch
 # and the changes include the sherlock_project/resources/data.json file
 on:
  push:
    branches:
      - master
    paths:
      - sherlock_project/resources/data.json
 jobs:
  sync-json-data:
    # Use the latest version of Ubuntu as the runner environment
    runs-on: ubuntu-latest
    steps:
      # Check out the code at the specified pull request head commit
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ github.event.pull_request.head.sha }}
          fetch-depth: 0
      # Install Python 3
      - name: Install Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.x'
      # Execute the site_list.py Python script
      - name: Execute site-list.py
        run: python devel/site-list.py
      - name: Pushes to another repository
        uses: sdushantha/github-action-push-to-another-repository@main
        env:
          SSH_DEPLOY_KEY: ${{ secrets.SSH_DEPLOY_KEY }}
          API_TOKEN_GITHUB: ${{ secrets.API_TOKEN_GITHUB }}
        with:
          source-directory: 'output'
          destination-github-username: 'sherlock-project'
          commit-message: 'Updated site list'
          destination-repository-name: 'sherlockproject.xyz'
          user-email: siddharth.dushantha@gmail.com
          target-branch: master
@@ -1,127 +0,0 @@
 name: Modified Target Validation
 on:
  pull_request_target:
    branches:
      - master
    paths:
      - "sherlock_project/resources/data.json"
 jobs:
  validate-modified-targets:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      pull-requests: write
    steps:
      - name: Checkout repository
        uses: actions/checkout@v5
        with:
          # Checkout the base branch but fetch all history to avoid a second fetch call
          ref: ${{ github.base_ref }}
          fetch-depth: 0
          persist-credentials: false
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.13"
      - name: Install Poetry
        uses: abatilo/actions-poetry@v4
        with:
          poetry-version: "latest"
      - name: Install dependencies
        run: |
          poetry install --no-interaction --with dev
      - name: Prepare JSON versions for comparison
        run: |
          # Fetch only the PR's branch head (single network call in this step)
          git fetch origin pull/${{ github.event.pull_request.number }}/head:pr
          # Find the merge-base commit between the target branch and the PR branch
          MERGE_BASE=$(git merge-base origin/${{ github.base_ref }} pr)
          echo "Comparing PR head against merge-base commit: $MERGE_BASE"
          # Safely extract the file from the PR's head and the merge-base commit
          git show pr:sherlock_project/resources/data.json > data.json.head
          git show $MERGE_BASE:sherlock_project/resources/data.json > data.json.base
          # CRITICAL FIX: Overwrite the checked-out data.json with the one from the PR
          # This ensures that pytest runs against the new, updated file.
          cp data.json.head sherlock_project/resources/data.json
      - name: Discover modified targets
        id: discover-modified
        run: |
          CHANGED=$(
            python - <<'EOF'
          import json
          import sys
          try:
              with open("data.json.base") as f: base = json.load(f)
              with open("data.json.head") as f: head = json.load(f)
          except FileNotFoundError as e:
              print(f"Error: Could not find {e.filename}", file=sys.stderr)
              sys.exit(1)
          except json.JSONDecodeError as e:
              print(f"Error: Could not decode JSON from a file - {e}", file=sys.stderr)
              sys.exit(1)
          changed = []
          for k, v in head.items():
              if k not in base or base[k] != v:
                  changed.append(k)
          print(",".join(sorted(changed)))
          EOF
          )
          # Preserve changelist
          echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
          echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"
      - name: Validate remote manifest against local schema
        if: steps.discover-modified.outputs.changed_targets != ''
        run: |
          poetry run pytest tests/test_manifest.py::test_validate_manifest_against_local_schema
      # --- The rest of the steps below are unchanged ---
      - name: Validate modified targets
        env:
          CHANGED_TARGETS: ${{ steps.discover-modified.outputs.changed_targets }}
        run: |
          poetry run pytest -q --tb no -rA -m validate_targets -n 20 \
            --chunked-sites "$CHANGED_TARGETS" \
            --junitxml=validation_results.xml
      - name: Prepare validation summary
        if: steps.discover-modified.outputs.changed_targets != ''
        id: prepare-summary
        run: |
          summary=$(
            poetry run python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results"
          )
          echo "$summary" > validation_summary.md
      - name: Announce validation results
        if: steps.discover-modified.outputs.changed_targets != ''
        uses: actions/github-script@v8
        with:
          script: |
            const fs = require('fs');
            const body = fs.readFileSync('validation_summary.md', 'utf8');
            await github.rest.issues.createComment({
              issue_number: context.payload.pull_request.number,
              owner: context.repo.owner,
              repo: context.repo.repo,
              body: body,
            });
      - name: This step shows as ran when no modifications are found
        if: steps.discover-modified.outputs.changed_targets == ''
        run: |
          echo "No modified targets found"
@@ -1,47 +0,0 @@
 # Virtual Environments
 venv/
 bin/
 lib/
 pyvenv.cfg
 poetry.lock
 # Regression Testing
 .coverage
 .tox/
 # Editor Configurations
 .vscode/
 .idea/
 # Python
 __pycache__/
 # Pip
 src/
 # Devel, Build, and Installation
 *.egg-info/
 dist/**
 # Jupyter Notebook
 .ipynb_checkpoints
 *.ipynb
 # Output files, except requirements.txt
 *.txt
 !requirements.txt
 # Comma-Separated Values (CSV) Reports
 *.csv
 #XLSX Reports
 *.xlsx
 # Excluded sites list
 tests/.excluded_sites
 # MacOS Folder Metadata File
 .DS_Store
 # Vim swap files
 *.swp
@@ -1,31 +0,0 @@
 # Release instructions:
  # 1. Update the version tag in the Dockerfile to match the version in sherlock/__init__.py
  # 2. Update the VCS_REF tag to match the tagged version's FULL commit hash
  # 3. Build image with BOTH latest and version tags
    # i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .`
 FROM python:3.12-slim-bullseye AS build
 WORKDIR /sherlock
 RUN pip3 install --no-cache-dir --upgrade pip
 FROM python:3.12-slim-bullseye
 WORKDIR /sherlock
 ARG VCS_REF= # CHANGE ME ON UPDATE
 ARG VCS_URL="https://github.com/sherlock-project/sherlock"
 ARG VERSION_TAG= # CHANGE ME ON UPDATE
 ENV SHERLOCK_ENV=docker
 LABEL org.label-schema.vcs-ref=$VCS_REF \
      org.label-schema.vcs-url=$VCS_URL \
      org.label-schema.name="Sherlock" \
      org.label-schema.version=$VERSION_TAG \
      website="https://sherlockproject.xyz"
 RUN pip3 install --no-cache-dir sherlock-project==$VERSION_TAG
 WORKDIR /sherlock
 ENTRYPOINT ["sherlock"]
@@ -1,21 +0,0 @@
 MIT License
 Copyright (c) 2019 Sherlock Project
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
@@ -1,45 +0,0 @@
 #!/usr/bin/env python
 # This module generates the listing of supported sites which can be found in
 # sites.mdx. It also organizes all the sites in alphanumeric order
 import json
 import os
 DATA_REL_URI: str = "sherlock_project/resources/data.json"
 DEFAULT_ENCODING = "utf-8"
 # Read the data.json file
 with open(DATA_REL_URI, "r", encoding=DEFAULT_ENCODING) as data_file:
    data: dict = json.load(data_file)
 # Removes schema-specific keywords for proper processing
 social_networks = data.copy()
 social_networks.pop('$schema', None)
 # Sort the social networks in alphanumeric order
 social_networks = sorted(social_networks.items())
 # Make output dir where the site list will be written
 os.mkdir("output")
 # Write the list of supported sites to sites.mdx
 with open("output/sites.mdx", "w", encoding=DEFAULT_ENCODING) as site_file:
    site_file.write("---\n")
    site_file.write("title: 'List of supported sites'\n")
    site_file.write("sidebarTitle: 'Supported sites'\n")
    site_file.write("icon: 'globe'\n")
    site_file.write("description: 'Sherlock currently supports **400+** sites'\n")
    site_file.write("---\n\n")
    for social_network, info in social_networks:
        url_main = info["urlMain"]
        is_nsfw = "**(NSFW)**" if info.get("isNSFW") else ""
        site_file.write(f"1. [{social_network}]({url_main}) {is_nsfw}\n")
 # Overwrite the data.json file with sorted data
 with open(DATA_REL_URI, "w", encoding=DEFAULT_ENCODING) as data_file:
    sorted_data = json.dumps(data, indent=2, sort_keys=True)
    data_file.write(sorted_data)
    data_file.write("\n")  # Keep the newline after writing data
 print("Finished updating supported site listing!")
@@ -1,72 +0,0 @@
 #!/usr/bin/env python
 # This module summarizes the results of site validation tests queued by
 # workflow validate_modified_targets for presentation in Issue comments.
 from defusedxml import ElementTree as ET
 import sys
 from pathlib import Path
 def summarize_junit_xml(xml_path: Path) -> str:
    tree = ET.parse(xml_path)
    root = tree.getroot()
    suite = root.find('testsuite')
    pass_message: str = ":heavy_check_mark: &nbsp; Pass"
    fail_message: str = ":x: &nbsp; Fail"
    if suite is None:
        raise ValueError("Invalid JUnit XML: No testsuite found")
    summary_lines: list[str] = []
    summary_lines.append("#### Automatic validation of changes\n")
    summary_lines.append("| Target | F+ Check | F- Check |")
    summary_lines.append("|---|---|---|")
    failures = int(suite.get('failures', 0))
    errors_detected: bool = False
    results: dict[str, dict[str, str]] = {}
    for testcase in suite.findall('testcase'):
        test_name = testcase.get('name').split('[')[0]
        site_name = testcase.get('name').split('[')[1].rstrip(']')
        failure = testcase.find('failure')
        error = testcase.find('error')
        if site_name not in results:
            results[site_name] = {}
        if test_name == "test_false_neg":
            results[site_name]['F- Check'] = pass_message if failure is None and error is None else fail_message
        elif test_name == "test_false_pos":
            results[site_name]['F+ Check'] = pass_message if failure is None and error is None else fail_message
        if error is not None:
            errors_detected = True
    for result in results:
        summary_lines.append(f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |")
    if failures > 0:
        summary_lines.append("\n___\n" +
            "\nFailures were detected on at least one updated target. Commits containing accuracy failures" +
            " will often not be merged (unless a rationale is provided, such as false negatives due to regional differences).")
    if errors_detected:
        summary_lines.append("\n___\n" +
            "\n**Errors were detected during validation. Please review the workflow logs.**")
    return "\n".join(summary_lines)
 if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: summarize_site_validation.py <junit-xml-file>")
        sys.exit(1)
    xml_path: Path = Path(sys.argv[1])
    if not xml_path.is_file():
        print(f"Error: File '{xml_path}' does not exist.")
        sys.exit(1)
    summary: str = summarize_junit_xml(xml_path)
    print(summary)
@@ -1,130 +0,0 @@
 # Contributor Covenant Code of Conduct
 ## Our Pledge
 We as members, contributors, and leaders pledge to make participation in our
 community a harassment-free experience for everyone, regardless of age, body
 size, visible or invisible disability, ethnicity, sex characteristics, gender
 identity and expression, level of experience, education, socio-economic status,
 nationality, personal appearance, race, caste, color, religion, or sexual
 identity and orientation.
 We pledge to act and interact in ways that contribute to an open, welcoming,
 diverse, inclusive, and healthy community.
 ## Our Standards
 Examples of behavior that contributes to a positive environment for our
 community include:
 * Demonstrating empathy and kindness toward other people
 * Being respectful of differing opinions, viewpoints, and experiences
 * Giving and gracefully accepting constructive feedback
 * Accepting responsibility and apologizing to those affected by our mistakes,
  and learning from the experience
 * Focusing on what is best not just for us as individuals, but for the overall
  community
 Examples of unacceptable behavior include:
 * The use of sexualized language or imagery, and sexual attention or advances of
  any kind
 * Trolling, insulting or derogatory comments, and personal or political attacks
 * Public or private harassment
 * Publishing others' private information, such as a physical or email address,
  without their explicit permission
 * Other conduct which could reasonably be considered inappropriate in a
  professional setting
 ## Enforcement Responsibilities
 Community leaders are responsible for clarifying and enforcing our standards of
 acceptable behavior and will take appropriate and fair corrective action in
 response to any behavior that they deem inappropriate, threatening, offensive,
 or harmful.
 Community leaders have the right and responsibility to remove, edit, or reject
 comments, commits, code, wiki edits, issues, and other contributions that are
 not aligned to this Code of Conduct, and will communicate reasons for moderation
 decisions when appropriate.
 ## Scope
 This Code of Conduct applies within all community spaces, and also applies when
 an individual is officially representing the community in public spaces.
 Examples of representing our community include using an official e-mail address,
 posting via an official social media account, or acting as an appointed
 representative at an online or offline event.
 ## Enforcement
 Instances of abusive, harassing, or otherwise unacceptable behavior may be
 reported to the community leaders responsible for enforcement at yahya.arbabi@gmail.com.
 All complaints will be reviewed and investigated promptly and fairly.
 All community leaders are obligated to respect the privacy and security of the
 reporter of any incident.
 ## Enforcement Guidelines
 Community leaders will follow these Community Impact Guidelines in determining
 the consequences for any action they deem in violation of this Code of Conduct:
 ### 1. Correction
 **Community Impact**: Use of inappropriate language or other behavior deemed
 unprofessional or unwelcome in the community.
 **Consequence**: A private, written warning from community leaders, providing
 clarity around the nature of the violation and an explanation of why the
 behavior was inappropriate. A public apology may be requested.
 ### 2. Warning
 **Community Impact**: A violation through a single incident or series of
 actions.
 **Consequence**: A warning with consequences for continued behavior. No
 interaction with the people involved, including unsolicited interaction with
 those enforcing the Code of Conduct, for a specified period of time. This
 includes avoiding interactions in community spaces as well as external channels
 like social media. Violating these terms may lead to a temporary or permanent
 ban.
 ### 3. Temporary Ban
 **Community Impact**: A serious violation of community standards, including
 sustained inappropriate behavior.
 **Consequence**: A temporary ban from any sort of interaction or public
 communication with the community for a specified period of time. No public or
 private interaction with the people involved, including unsolicited interaction
 with those enforcing the Code of Conduct, is allowed during this period.
 Violating these terms may lead to a permanent ban.
 ### 4. Permanent Ban
 **Community Impact**: Demonstrating a pattern of violation of community
 standards, including sustained inappropriate behavior, harassment of an
 individual, or aggression toward or disparagement of classes of individuals.
 **Consequence**: A permanent ban from any sort of public interaction within the
 community.
 ## Attribution
 This Code of Conduct is adapted from the [Contributor Covenant][homepage],
 version 2.1, available at
 [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
 Community Impact Guidelines were inspired by
 [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
 For answers to common questions about this code of conduct, see the FAQ at
 [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
 [https://www.contributor-covenant.org/translations][translations].
 [homepage]: https://www.contributor-covenant.org
 [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
 [Mozilla CoC]: https://github.com/mozilla/diversity
 [FAQ]: https://www.contributor-covenant.org/faq
 [translations]: https://www.contributor-covenant.org/translations
@@ -1,115 +0,0 @@
 <p align="center">
  <br>
  <a href="https://sherlock-project.github.io/" target="_blank"><img src="images/sherlock-logo.png" alt="sherlock"/></a>
  <br>
  <span>Hunt down social media accounts by username across <a href="https://sherlockproject.xyz/sites">400+ social networks</a></span>
  <br>
 </p>
 <p align="center">
  <a href="https://sherlockproject.xyz/installation">Installation</a>
  &nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;&nbsp;
  <a href="https://sherlockproject.xyz/usage">Usage</a>
  &nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;&nbsp;
  <a href="https://sherlockproject.xyz/contribute">Contributing</a>
 </p>
 <p align="center">
 <img width="70%" height="70%" src="images/demo.png" alt="demo"/>
 </p>
 ## Installation
 > [!WARNING]  
 > Packages for ParrotOS and Ubuntu 24.04, maintained by a third party, appear to be __broken__.  
 > Users of these systems should defer to [`uv`](https://docs.astral.sh/uv/)/`pipx`/`pip` or Docker.
 | Method | Notes |
 | - | - |
 | `pipx install sherlock-project` | `pip` or [`uv`](https://docs.astral.sh/uv/) may be used in place of `pipx` |
 | `docker run -it --rm sherlock/sherlock` |
 | `dnf install sherlock-project` | |
 Community-maintained packages are available for Debian (>= 13), Ubuntu (>= 22.10), Homebrew, Kali, and BlackArch. These packages are not directly supported or maintained by the Sherlock Project.
 See all alternative installation methods [here](https://sherlockproject.xyz/installation).
 ## General usage
 To search for only one user:
 ```bash
 sherlock user123
 ```
 To search for more than one user:
 ```bash
 sherlock user1 user2 user3
 ```
 Accounts found will be stored in an individual text file with the corresponding username (e.g ```user123.txt```).
 ```console
 $ sherlock --help
 usage: sherlock [-h] [--version] [--verbose] [--folderoutput FOLDEROUTPUT] [--output OUTPUT] [--csv] [--xlsx] [--site SITE_NAME] [--proxy PROXY_URL] [--dump-response]
                [--json JSON_FILE] [--timeout TIMEOUT] [--print-all] [--print-found] [--no-color] [--browse] [--local] [--nsfw] [--txt] [--ignore-exclusions]
                USERNAMES [USERNAMES ...]
 Sherlock: Find Usernames Across Social Networks (Version 0.16.0)
 positional arguments:
  USERNAMES             One or more usernames to check with social networks. Check similar usernames using {?} (replace to '_', '-', '.').
 options:
  -h, --help            show this help message and exit
  --version             Display version information and dependencies.
  --verbose, -v, -d, --debug
                        Display extra debugging information and metrics.
  --folderoutput FOLDEROUTPUT, -fo FOLDEROUTPUT
                        If using multiple usernames, the output of the results will be saved to this folder.
  --output OUTPUT, -o OUTPUT
                        If using single username, the output of the result will be saved to this file.
  --csv                 Create Comma-Separated Values (CSV) File.
  --xlsx                Create the standard file for the modern Microsoft Excel spreadsheet (xlsx).
  --site SITE_NAME      Limit analysis to just the listed sites. Add multiple options to specify more than one site.
  --proxy PROXY_URL, -p PROXY_URL
                        Make requests over a proxy. e.g. socks5://127.0.0.1:1080
  --dump-response       Dump the HTTP response to stdout for targeted debugging.
  --json JSON_FILE, -j JSON_FILE
                        Load data from a JSON file or an online, valid, JSON file. Upstream PR numbers also accepted.
  --timeout TIMEOUT     Time (in seconds) to wait for response to requests (Default: 60)
  --print-all           Output sites where the username was not found.
  --print-found         Output sites where the username was found (also if exported as file).
  --no-color            Don't color terminal output
  --browse, -b          Browse to all results on default browser.
  --local, -l           Force the use of the local data.json file.
  --nsfw                Include checking of NSFW sites from default list.
  --txt                 Enable creation of a txt file
  --ignore-exclusions   Ignore upstream exclusions (may return more false positives)
 ```
 ## Credits
 Thank you to everyone who has contributed to Sherlock! ❤️
 <a href="https://github.com/sherlock-project/sherlock/graphs/contributors">
  <img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" alt="contributors"/>
 </a>
 ## Star History
 <picture>
  <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date&theme=dark" />
  <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date" />
  <img alt="Sherlock Project Star History Chart" src="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date" />
 </picture>
 ## License
 MIT © Sherlock Project<br/>
 Creator - [Siddharth Dushantha](https://github.com/sdushantha)
 <!-- Reference Links -->
 [ext_pypi]: https://pypi.org/project/sherlock-project/
 [ext_brew]: https://formulae.brew.sh/formula/sherlock
@@ -1,42 +0,0 @@
 <!-- This README should be a mini version at all times for use on pypi -->
 <p align=center>
  <br>
  <a href="https://sherlock-project.github.io/" target="_blank"><img src="https://www.kali.org/tools/sherlock/images/sherlock-logo.svg" width="25%"/></a>
  <br>
  <strong><span>Hunt down social media accounts by username across <a href="https://github.com/sherlock-project/sherlock/blob/master/sites.md">400+ social networks</a></span></strong>
  <br><br>
  <span>Additional documentation can be found at our <a href="https://github.com/sherlock-project/sherlock/">GitHub repository</a></span>
  <br>
 </p>
 ## Usage
 ```console
 $ sherlock --help
 usage: sherlock [-h] [--version] [--verbose] [--folderoutput FOLDEROUTPUT]
                [--output OUTPUT] [--tor] [--unique-tor] [--csv] [--xlsx]
                [--site SITE_NAME] [--proxy PROXY_URL] [--json JSON_FILE]
                [--timeout TIMEOUT] [--print-all] [--print-found] [--no-color]
                [--browse] [--local] [--nsfw]
                USERNAMES [USERNAMES ...]
 ```
 To search for only one user:
 ```bash
 $ sherlock user123
 ```
 To search for more than one user:
 ```bash
 $ sherlock user1 user2 user3
 ```
 <br>
 ___
 <br>
 <p align="center">
 <img width="70%" height="70%" src="https://user-images.githubusercontent.com/27065646/219638267-a5e11090-aa6e-4e77-87f7-0e95f6ad5978.png"/>
 </a>
 </p>
@@ -0,0 +1,40 @@
 7Cups
 APClips
 Airliners
 Apple Discussions
 Archive.org
 Bandcamp
 BitBucket
 Codolio
 Discord.bio
 Envato Forum
 Giphy
 Hashnode
 Hubski
 LessWrong
 Motherless
 Patched
 Pornhub
 Rarible
 Realmeye
 Reddit
 RocketTube
 RuneScape
 Scribd
 Shelf
 SlideShare
 Smule
 Splice
 Spotify
 TryHackMe
 Velomania
 Weblate
 YandexMusic
 dailykos
 igromania
 interpals
 mercadolivre
 opennet
 phpRU
 svidbook
 xHamster
@@ -1,68 +0,0 @@
 [build-system]
 requires = [ "poetry-core>=1.2.0" ]
 build-backend = "poetry.core.masonry.api"
 # poetry-core 1.8 not available in .fc39. Can upgrade to 1.8.0 at .fc39 EOL
 [tool.poetry-version-plugin]
 source = "init"
 [tool.poetry]
 name = "sherlock-project"
 version = "0.16.1"
 description = "Hunt down social media accounts by username across social networks"
 license = "MIT"
 authors = [
    "Siddharth Dushantha <siddharth.dushantha@gmail.com>"
 ]
 maintainers = [
    "Paul Pfeister <code@pfeister.dev>",
    "Matheus Felipe <matheusfelipeog@protonmail.com>",
    "Sondre Karlsen Dyrnes <sondre@villdyr.no>"
 ]
 readme = "docs/pyproject/README.md"
 packages = [ { include = "sherlock_project"} ]
 keywords = [ "osint", "reconnaissance", "information gathering" ]
 classifiers = [
    "Development Status :: 5 - Production/Stable",
    "Intended Audience :: Developers",
    "Intended Audience :: Information Technology",
    "Natural Language :: English",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
    "Topic :: Security"
 ]
 homepage = "https://sherlockproject.xyz/"
 repository = "https://github.com/sherlock-project/sherlock"
 [tool.poetry.urls]
 "Bug Tracker" = "https://github.com/sherlock-project/sherlock/issues"
 [tool.poetry.dependencies]
 python = "^3.9"
 certifi = ">=2019.6.16"
 colorama = "^0.4.1"
 PySocks = "^1.7.0"
 requests = "^2.22.0"
 requests-futures = "^1.0.0"
 stem = "^1.8.0"
 pandas = "^2.2.1"
 openpyxl = "^3.0.10"
 tomli = "^2.2.1"
 [tool.poetry.group.dev.dependencies]
 jsonschema = "^4.0.0"
 rstr = "^3.2.2"
 pytest = "^8.4.2"
 pytest-xdist = "^3.8.0"
 [tool.poetry.group.ci.dependencies]
 defusedxml = "^0.7.1"
 [tool.poetry.scripts]
 sherlock = 'sherlock_project.sherlock:main'
@@ -1,7 +0,0 @@
 [pytest]
 addopts = --strict-markers -m "not validate_targets"
 markers =
    online: mark tests are requiring internet access.
    validate_targets: mark tests for sweeping manifest validation (sends many requests).
    validate_targets_fp: validate_targets, false positive tests only.
    validate_targets_fn: validate_targets, false negative tests only.
@@ -1,30 +0,0 @@
 """ Sherlock Module
 This module contains the main logic to search for usernames at social
 networks.
 """
 from importlib.metadata import version as pkg_version, PackageNotFoundError
 import pathlib
 import tomli
 def get_version() -> str:
    """Fetch the version number of the installed package."""
    try:
        return pkg_version("sherlock_project")
    except PackageNotFoundError:
        pyproject_path: pathlib.Path = pathlib.Path(__file__).resolve().parent.parent / "pyproject.toml"
        with pyproject_path.open("rb") as f:
            pyproject_data = tomli.load(f)
        return pyproject_data["tool"]["poetry"]["version"]
 # This variable is only used to check for ImportErrors induced by users running as script rather than as module or package
 import_error_test_var = None
 __shortname__   = "Sherlock"
 __longname__    = "Sherlock: Find Usernames Across Social Networks"
 __version__     = get_version()
 forge_api_latest_release = "https://api.github.com/repos/sherlock-project/sherlock/releases/latest"
@@ -1,22 +0,0 @@
 #! /usr/bin/env python3
 """
 Sherlock: Find Usernames Across Social Networks Module
 This module contains the main logic to search for usernames at social
 networks.
 """
 import sys
 if __name__ == "__main__":
    # Check if the user is using the correct version of Python
    python_version = sys.version.split()[0]
    if sys.version_info < (3, 9):
        print(f"Sherlock requires Python 3.9+\nYou are using Python {python_version}, which is not supported by Sherlock.")
        sys.exit(1)
    from sherlock_project import sherlock
    sherlock.main()
@@ -1,279 +0,0 @@
 """Sherlock Notify Module
 This module defines the objects for notifying the caller about the
 results of queries.
 """
 from sherlock_project.result import QueryStatus
 from colorama import Fore, Style
 import webbrowser
 # Global variable to count the number of results.
 globvar = 0
 class QueryNotify:
    """Query Notify Object.
    Base class that describes methods available to notify the results of
    a query.
    It is intended that other classes inherit from this base class and
    override the methods to implement specific functionality.
    """
    def __init__(self, result=None):
        """Create Query Notify Object.
        Contains information about a specific method of notifying the results
        of a query.
        Keyword Arguments:
        self                   -- This object.
        result                 -- Object of type QueryResult() containing
                                  results for this query.
        Return Value:
        Nothing.
        """
        self.result = result
    def start(self, message=None):
        """Notify Start.
        Notify method for start of query.  This method will be called before
        any queries are performed.  This method will typically be
        overridden by higher level classes that will inherit from it.
        Keyword Arguments:
        self                   -- This object.
        message                -- Object that is used to give context to start
                                  of query.
                                  Default is None.
        Return Value:
        Nothing.
        """
    def update(self, result):
        """Notify Update.
        Notify method for query result.  This method will typically be
        overridden by higher level classes that will inherit from it.
        Keyword Arguments:
        self                   -- This object.
        result                 -- Object of type QueryResult() containing
                                  results for this query.
        Return Value:
        Nothing.
        """
        self.result = result
    def finish(self, message=None):
        """Notify Finish.
        Notify method for finish of query.  This method will be called after
        all queries have been performed.  This method will typically be
        overridden by higher level classes that will inherit from it.
        Keyword Arguments:
        self                   -- This object.
        message                -- Object that is used to give context to start
                                  of query.
                                  Default is None.
        Return Value:
        Nothing.
        """
    def __str__(self):
        """Convert Object To String.
        Keyword Arguments:
        self                   -- This object.
        Return Value:
        Nicely formatted string to get information about this object.
        """
        return str(self.result)
 class QueryNotifyPrint(QueryNotify):
    """Query Notify Print Object.
    Query notify class that prints results.
    """
    def __init__(self, result=None, verbose=False, print_all=False, browse=False):
        """Create Query Notify Print Object.
        Contains information about a specific method of notifying the results
        of a query.
        Keyword Arguments:
        self                   -- This object.
        result                 -- Object of type QueryResult() containing
                                  results for this query.
        verbose                -- Boolean indicating whether to give verbose output.
        print_all              -- Boolean indicating whether to only print all sites, including not found.
        browse                 -- Boolean indicating whether to open found sites in a web browser.
        Return Value:
        Nothing.
        """
        super().__init__(result)
        self.verbose = verbose
        self.print_all = print_all
        self.browse = browse
    def start(self, message):
        """Notify Start.
        Will print the title to the standard output.
        Keyword Arguments:
        self                   -- This object.
        message                -- String containing username that the series
                                  of queries are about.
        Return Value:
        Nothing.
        """
        title = "Checking username"
        print(Style.BRIGHT + Fore.GREEN + "[" +
              Fore.YELLOW + "*" +
              Fore.GREEN + f"] {title}" +
              Fore.WHITE + f" {message}" +
              Fore.GREEN + " on:")
        # An empty line between first line and the result(more clear output)
        print('\r')
    def countResults(self):
        """This function counts the number of results. Every time the function is called,
        the number of results is increasing.
        Keyword Arguments:
        self                   -- This object.
        Return Value:
        The number of results by the time we call the function.
        """
        global globvar
        globvar += 1
        return globvar
    def update(self, result):
        """Notify Update.
        Will print the query result to the standard output.
        Keyword Arguments:
        self                   -- This object.
        result                 -- Object of type QueryResult() containing
                                  results for this query.
        Return Value:
        Nothing.
        """
        self.result = result
        response_time_text = ""
        if self.result.query_time is not None and self.verbose is True:
            response_time_text = f" [{round(self.result.query_time * 1000)}ms]"
        # Output to the terminal is desired.
        if result.status == QueryStatus.CLAIMED:
            self.countResults()
            print(Style.BRIGHT + Fore.WHITE + "[" +
                  Fore.GREEN + "+" +
                  Fore.WHITE + "]" +
                  response_time_text +
                  Fore.GREEN +
                  f" {self.result.site_name}: " +
                  Style.RESET_ALL +
                  f"{self.result.site_url_user}")
            if self.browse:
                webbrowser.open(self.result.site_url_user, 2)
        elif result.status == QueryStatus.AVAILABLE:
            if self.print_all:
                print(Style.BRIGHT + Fore.WHITE + "[" +
                      Fore.RED + "-" +
                      Fore.WHITE + "]" +
                      response_time_text +
                      Fore.GREEN + f" {self.result.site_name}:" +
                      Fore.YELLOW + " Not Found!")
        elif result.status == QueryStatus.UNKNOWN:
            if self.print_all:
                print(Style.BRIGHT + Fore.WHITE + "[" +
                      Fore.RED + "-" +
                      Fore.WHITE + "]" +
                      Fore.GREEN + f" {self.result.site_name}:" +
                      Fore.RED + f" {self.result.context}" +
                      Fore.YELLOW + " ")
        elif result.status == QueryStatus.ILLEGAL:
            if self.print_all:
                msg = "Illegal Username Format For This Site!"
                print(Style.BRIGHT + Fore.WHITE + "[" +
                      Fore.RED + "-" +
                      Fore.WHITE + "]" +
                      Fore.GREEN + f" {self.result.site_name}:" +
                      Fore.YELLOW + f" {msg}")
        elif result.status == QueryStatus.WAF:
            if self.print_all:
                print(Style.BRIGHT + Fore.WHITE + "[" +
                      Fore.RED + "-" +
                      Fore.WHITE + "]" +
                      Fore.GREEN + f" {self.result.site_name}:" +
                      Fore.RED + " Blocked by bot detection" +
                      Fore.YELLOW + " (proxy may help)")
        else:
            # It should be impossible to ever get here...
            raise ValueError(
                f"Unknown Query Status '{result.status}' for site '{self.result.site_name}'"
            )
    def finish(self, message="The processing has been finished."):
        """Notify Finish.
        Will print the last line to the standard output.
        Keyword Arguments:
        self                   -- This object.
        message                -- The 2 last phrases.
        Return Value:
        Nothing.
        """
        NumberOfResults = self.countResults() - 1
        print(Style.BRIGHT + Fore.GREEN + "[" +
              Fore.YELLOW + "*" +
              Fore.GREEN + "] Search completed with" +
              Fore.WHITE + f" {NumberOfResults} " +
              Fore.GREEN + "results" + Style.RESET_ALL
              )
    def __str__(self):
        """Convert Object To String.
        Keyword Arguments:
        self                   -- This object.
        Return Value:
        Nicely formatted string to get information about this object.
        """
        return str(self.result)
@@ -1,149 +0,0 @@
 {
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "title": "Sherlock Target Manifest",
  "description": "Social media targets to probe for the existence of known usernames",
  "type": "object",
  "properties": {
    "$schema": { "type": "string" }
  },
  "patternProperties": {
    "^(?!\\$).*?$": {
      "type": "object",
      "description": "Target name and associated information (key should be human readable name)",
      "required": ["url", "urlMain", "errorType", "username_claimed"],
      "properties": {
        "url": { "type": "string" },
        "urlMain": { "type": "string" },
        "urlProbe": { "type": "string" },
        "username_claimed": { "type": "string" },
        "regexCheck": { "type": "string" },
        "isNSFW": { "type": "boolean" },
        "headers": { "type": "object" },
        "request_payload": { "type": "object" },
        "__comment__": {
          "type": "string",
          "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
        },
        "tags": {
          "oneOf": [
            { "$ref": "#/$defs/tag" },
            { "type": "array", "items": { "$ref": "#/$defs/tag" } }
          ]
        },
        "request_method": {
          "type": "string",
          "enum": ["GET", "POST", "HEAD", "PUT"]
        },
        "errorType": {
          "oneOf": [
            {
              "type": "string",
              "enum": ["message", "response_url", "status_code"]
            },
            {
              "type": "array",
              "items": {
                "type": "string",
                "enum": ["message", "response_url", "status_code"]
              }
            }
          ]
        },
        "errorMsg": {
          "oneOf": [
            { "type": "string" },
            { "type": "array", "items": { "type": "string" } }
          ]
        },
        "errorCode": {
          "oneOf": [
            { "type": "integer" },
            { "type": "array", "items": { "type": "integer" } }
          ]
        },
        "errorUrl": { "type": "string" },
        "response_url": { "type": "string" }
      },
      "dependencies": {
        "errorMsg": {
          "oneOf": [
            { "properties": { "errorType": { "const": "message" } } },
            {
              "properties": {
                "errorType": {
                  "type": "array",
                  "contains": { "const": "message" }
                }
              }
            }
          ]
        },
        "errorUrl": {
          "oneOf": [
            { "properties": { "errorType": { "const": "response_url" } } },
            {
              "properties": {
                "errorType": {
                  "type": "array",
                  "contains": { "const": "response_url" }
                }
              }
            }
          ]
        },
        "errorCode": {
          "oneOf": [
            { "properties": { "errorType": { "const": "status_code" } } },
            {
              "properties": {
                "errorType": {
                  "type": "array",
                  "contains": { "const": "status_code" }
                }
              }
            }
          ]
        }
      },
      "allOf": [
        {
          "if": {
            "anyOf": [
              { "properties": { "errorType": { "const": "message" } } },
              {
                "properties": {
                  "errorType": {
                    "type": "array",
                    "contains": { "const": "message" }
                  }
                }
              }
            ]
          },
          "then": { "required": ["errorMsg"] }
        },
        {
          "if": {
            "anyOf": [
              { "properties": { "errorType": { "const": "response_url" } } },
              {
                "properties": {
                  "errorType": {
                    "type": "array",
                    "contains": { "const": "response_url" }
                  }
                }
              }
            ]
          },
          "then": { "required": ["errorUrl"] }
        }
      ],
      "additionalProperties": false
    }
  },
  "additionalProperties": false,
  "$defs": {
    "tag": { "type": "string", "enum": ["adult", "gaming"] }
  }
 }
@@ -1,89 +0,0 @@
 """Sherlock Result Module
 This module defines various objects for recording the results of queries.
 """
 from enum import Enum
 class QueryStatus(Enum):
    """Query Status Enumeration.
    Describes status of query about a given username.
    """
    CLAIMED   = "Claimed"   # Username Detected
    AVAILABLE = "Available" # Username Not Detected
    UNKNOWN   = "Unknown"   # Error Occurred While Trying To Detect Username
    ILLEGAL   = "Illegal"   # Username Not Allowable For This Site
    WAF       = "WAF"       # Request blocked by WAF (i.e. Cloudflare)
    def __str__(self):
        """Convert Object To String.
        Keyword Arguments:
        self                   -- This object.
        Return Value:
        Nicely formatted string to get information about this object.
        """
        return self.value
 class QueryResult():
    """Query Result Object.
    Describes result of query about a given username.
    """
    def __init__(self, username, site_name, site_url_user, status,
                 query_time=None, context=None):
        """Create Query Result Object.
        Contains information about a specific method of detecting usernames on
        a given type of web sites.
        Keyword Arguments:
        self                   -- This object.
        username               -- String indicating username that query result
                                  was about.
        site_name              -- String which identifies site.
        site_url_user          -- String containing URL for username on site.
                                  NOTE:  The site may or may not exist:  this
                                         just indicates what the name would
                                         be, if it existed.
        status                 -- Enumeration of type QueryStatus() indicating
                                  the status of the query.
        query_time             -- Time (in seconds) required to perform query.
                                  Default of None.
        context                -- String indicating any additional context
                                  about the query.  For example, if there was
                                  an error, this might indicate the type of
                                  error that occurred.
                                  Default of None.
        Return Value:
        Nothing.
        """
        self.username      = username
        self.site_name     = site_name
        self.site_url_user = site_url_user
        self.status        = status
        self.query_time    = query_time
        self.context       = context
        return
    def __str__(self):
        """Convert Object To String.
        Keyword Arguments:
        self                   -- This object.
        Return Value:
        Nicely formatted string to get information about this object.
        """
        status = str(self.status)
        if self.context is not None:
            # There is extra context information available about the results.
            # Append it to the normal response text.
            status += f" ({self.context})"
        return status
@@ -1,935 +0,0 @@
 #! /usr/bin/env python3
 """
 Sherlock: Find Usernames Across Social Networks Module
 This module contains the main logic to search for usernames at social
 networks.
 """
 import sys
 try:
    from sherlock_project.__init__ import import_error_test_var # noqa: F401
 except ImportError:
    print("Did you run Sherlock with `python3 sherlock/sherlock.py ...`?")
    print("This is an outdated method. Please see https://sherlockproject.xyz/installation for up to date instructions.")
    sys.exit(1)
 import csv
 import signal
 import pandas as pd
 import os
 import re
 from argparse import ArgumentParser, RawDescriptionHelpFormatter
 from json import loads as json_loads
 from time import monotonic
 from typing import Optional
 import requests
 from requests_futures.sessions import FuturesSession
 from sherlock_project.__init__ import (
    __longname__,
    __shortname__,
    __version__,
    forge_api_latest_release,
 )
 from sherlock_project.result import QueryStatus
 from sherlock_project.result import QueryResult
 from sherlock_project.notify import QueryNotify
 from sherlock_project.notify import QueryNotifyPrint
 from sherlock_project.sites import SitesInformation
 from colorama import init
 from argparse import ArgumentTypeError
 class SherlockFuturesSession(FuturesSession):
    def request(self, method, url, hooks=None, *args, **kwargs):
        """Request URL.
        This extends the FuturesSession request method to calculate a response
        time metric to each request.
        It is taken (almost) directly from the following Stack Overflow answer:
        https://github.com/ross/requests-futures#working-in-the-background
        Keyword Arguments:
        self                   -- This object.
        method                 -- String containing method desired for request.
        url                    -- String containing URL for request.
        hooks                  -- Dictionary containing hooks to execute after
                                  request finishes.
        args                   -- Arguments.
        kwargs                 -- Keyword arguments.
        Return Value:
        Request object.
        """
        # Record the start time for the request.
        if hooks is None:
            hooks = {}
        start = monotonic()
        def response_time(resp, *args, **kwargs):
            """Response Time Hook.
            Keyword Arguments:
            resp                   -- Response object.
            args                   -- Arguments.
            kwargs                 -- Keyword arguments.
            Return Value:
            Nothing.
            """
            resp.elapsed = monotonic() - start
            return
        # Install hook to execute when response completes.
        # Make sure that the time measurement hook is first, so we will not
        # track any later hook's execution time.
        try:
            if isinstance(hooks["response"], list):
                hooks["response"].insert(0, response_time)
            elif isinstance(hooks["response"], tuple):
                # Convert tuple to list and insert time measurement hook first.
                hooks["response"] = list(hooks["response"])
                hooks["response"].insert(0, response_time)
            else:
                # Must have previously contained a single hook function,
                # so convert to list.
                hooks["response"] = [response_time, hooks["response"]]
        except KeyError:
            # No response hook was already defined, so install it ourselves.
            hooks["response"] = [response_time]
        return super(SherlockFuturesSession, self).request(
            method, url, hooks=hooks, *args, **kwargs
        )
 def get_response(request_future, error_type, social_network):
    # Default for Response object if some failure occurs.
    response = None
    error_context = "General Unknown Error"
    exception_text = None
    try:
        response = request_future.result()
        if response.status_code:
            # Status code exists in response object
            error_context = None
    except requests.exceptions.HTTPError as errh:
        error_context = "HTTP Error"
        exception_text = str(errh)
    except requests.exceptions.ProxyError as errp:
        error_context = "Proxy Error"
        exception_text = str(errp)
    except requests.exceptions.ConnectionError as errc:
        error_context = "Error Connecting"
        exception_text = str(errc)
    except requests.exceptions.Timeout as errt:
        error_context = "Timeout Error"
        exception_text = str(errt)
    except requests.exceptions.RequestException as err:
        error_context = "Unknown Error"
        exception_text = str(err)
    except UnicodeError as err:
        error_context = "Encoding Error"
        exception_text = str(err)
    return response, error_context, exception_text
 def interpolate_string(input_object, username):
    if isinstance(input_object, str):
        return input_object.replace("{}", username)
    elif isinstance(input_object, dict):
        return {k: interpolate_string(v, username) for k, v in input_object.items()}
    elif isinstance(input_object, list):
        return [interpolate_string(i, username) for i in input_object]
    return input_object
 def check_for_parameter(username):
    """checks if {?} exists in the username
    if exist it means that sherlock is looking for more multiple username"""
    return "{?}" in username
 checksymbols = ["_", "-", "."]
 def multiple_usernames(username):
    """replace the parameter with with symbols and return a list of usernames"""
    allUsernames = []
    for i in checksymbols:
        allUsernames.append(username.replace("{?}", i))
    return allUsernames
 def sherlock(
    username: str,
    site_data: dict[str, dict[str, str]],
    query_notify: QueryNotify,
    dump_response: bool = False,
    proxy: Optional[str] = None,
    timeout: int = 60,
 ) -> dict[str, dict[str, str | QueryResult]]:
    """Run Sherlock Analysis.
    Checks for existence of username on various social media sites.
    Keyword Arguments:
    username               -- String indicating username that report
                              should be created against.
    site_data              -- Dictionary containing all of the site data.
    query_notify           -- Object with base type of QueryNotify().
                              This will be used to notify the caller about
                              query results.
    proxy                  -- String indicating the proxy URL
    timeout                -- Time in seconds to wait before timing out request.
                              Default is 60 seconds.
    Return Value:
    Dictionary containing results from report. Key of dictionary is the name
    of the social network site, and the value is another dictionary with
    the following keys:
        url_main:      URL of main site.
        url_user:      URL of user on site (if account exists).
        status:        QueryResult() object indicating results of test for
                       account existence.
        http_status:   HTTP status code of query which checked for existence on
                       site.
        response_text: Text that came back from request.  May be None if
                       there was an HTTP error when checking for existence.
    """
    # Notify caller that we are starting the query.
    query_notify.start(username)
    # Normal requests
    underlying_session = requests.session()
    # Limit number of workers to 20.
    # This is probably vastly overkill.
    if len(site_data) >= 20:
        max_workers = 20
    else:
        max_workers = len(site_data)
    # Create multi-threaded session for all requests.
    session = SherlockFuturesSession(
        max_workers=max_workers, session=underlying_session
    )
    # Results from analysis of all sites
    results_total = {}
    # First create futures for all requests. This allows for the requests to run in parallel
    for social_network, net_info in site_data.items():
        # Results from analysis of this specific site
        results_site = {"url_main": net_info.get("urlMain")}
        # Record URL of main site
        # A user agent is needed because some sites don't return the correct
        # information since they think that we are bots (Which we actually are...)
        headers = {
            "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:129.0) Gecko/20100101 Firefox/129.0",
        }
        if "headers" in net_info:
            # Override/append any extra headers required by a given site.
            headers.update(net_info["headers"])
        # URL of user on site (if it exists)
        url = interpolate_string(net_info["url"], username.replace(' ', '%20'))
        # Don't make request if username is invalid for the site
        regex_check = net_info.get("regexCheck")
        if regex_check and re.search(regex_check, username) is None:
            # No need to do the check at the site: this username is not allowed.
            results_site["status"] = QueryResult(
                username, social_network, url, QueryStatus.ILLEGAL
            )
            results_site["url_user"] = ""
            results_site["http_status"] = ""
            results_site["response_text"] = ""
            query_notify.update(results_site["status"])
        else:
            # URL of user on site (if it exists)
            results_site["url_user"] = url
            url_probe = net_info.get("urlProbe")
            request_method = net_info.get("request_method")
            request_payload = net_info.get("request_payload")
            request = None
            if request_method is not None:
                if request_method == "GET":
                    request = session.get
                elif request_method == "HEAD":
                    request = session.head
                elif request_method == "POST":
                    request = session.post
                elif request_method == "PUT":
                    request = session.put
                else:
                    raise RuntimeError(f"Unsupported request_method for {url}")
            if request_payload is not None:
                request_payload = interpolate_string(request_payload, username)
            if url_probe is None:
                # Probe URL is normal one seen by people out on the web.
                url_probe = url
            else:
                # There is a special URL for probing existence separate
                # from where the user profile normally can be found.
                url_probe = interpolate_string(url_probe, username)
            if request is None:
                if net_info["errorType"] == "status_code":
                    # In most cases when we are detecting by status code,
                    # it is not necessary to get the entire body:  we can
                    # detect fine with just the HEAD response.
                    request = session.head
                else:
                    # Either this detect method needs the content associated
                    # with the GET response, or this specific website will
                    # not respond properly unless we request the whole page.
                    request = session.get
            if net_info["errorType"] == "response_url":
                # Site forwards request to a different URL if username not
                # found.  Disallow the redirect so we can capture the
                # http status from the original URL request.
                allow_redirects = False
            else:
                # Allow whatever redirect that the site wants to do.
                # The final result of the request will be what is available.
                allow_redirects = True
            # This future starts running the request in a new thread, doesn't block the main thread
            if proxy is not None:
                proxies = {"http": proxy, "https": proxy}
                future = request(
                    url=url_probe,
                    headers=headers,
                    proxies=proxies,
                    allow_redirects=allow_redirects,
                    timeout=timeout,
                    json=request_payload,
                )
            else:
                future = request(
                    url=url_probe,
                    headers=headers,
                    allow_redirects=allow_redirects,
                    timeout=timeout,
                    json=request_payload,
                )
            # Store future in data for access later
            net_info["request_future"] = future
        # Add this site's results into final dictionary with all the other results.
        results_total[social_network] = results_site
    # Open the file containing account links
    for social_network, net_info in site_data.items():
        # Retrieve results again
        results_site = results_total.get(social_network)
        # Retrieve other site information again
        url = results_site.get("url_user")
        status = results_site.get("status")
        if status is not None:
            # We have already determined the user doesn't exist here
            continue
        # Get the expected error type
        error_type = net_info["errorType"]
        if isinstance(error_type, str):
            error_type: list[str] = [error_type]
        # Retrieve future and ensure it has finished
        future = net_info["request_future"]
        r, error_text, exception_text = get_response(
            request_future=future, error_type=error_type, social_network=social_network
        )
        # Get response time for response of our request.
        try:
            response_time = r.elapsed
        except AttributeError:
            response_time = None
        # Attempt to get request information
        try:
            http_status = r.status_code
        except Exception:
            http_status = "?"
        try:
            response_text = r.text.encode(r.encoding or "UTF-8")
        except Exception:
            response_text = ""
        query_status = QueryStatus.UNKNOWN
        error_context = None
        # As WAFs advance and evolve, they will occasionally block Sherlock and
        # lead to false positives and negatives. Fingerprints should be added
        # here to filter results that fail to bypass WAFs. Fingerprints should
        # be highly targetted. Comment at the end of each fingerprint to
        # indicate target and date fingerprinted.
        WAFHitMsgs = [
            r'.loading-spinner{visibility:hidden}body.no-js .challenge-running{display:none}body.dark{background-color:#222;color:#d9d9d9}body.dark a{color:#fff}body.dark a:hover{color:#ee730a;text-decoration:underline}body.dark .lds-ring div{border-color:#999 transparent transparent}body.dark .font-red{color:#b20f03}body.dark', # 2024-05-13 Cloudflare
            r'<span id="challenge-error-text">', # 2024-11-11 Cloudflare error page
            r'AwsWafIntegration.forceRefreshToken', # 2024-11-11 Cloudfront (AWS)
            r'{return l.onPageView}}),Object.defineProperty(r,"perimeterxIdentifiers",{enumerable:' # 2024-04-09 PerimeterX / Human Security
        ]
        if error_text is not None:
            error_context = error_text
        elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
            query_status = QueryStatus.WAF
        else:
            if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
                error_context = f"Unknown error type '{error_type}' for {social_network}"
                query_status = QueryStatus.UNKNOWN
            else:
                if "message" in error_type:
                    # error_flag True denotes no error found in the HTML
                    # error_flag False denotes error found in the HTML
                    error_flag = True
                    errors = net_info.get("errorMsg")
                    # errors will hold the error message
                    # it can be string or list
                    # by isinstance method we can detect that
                    # and handle the case for strings as normal procedure
                    # and if its list we can iterate the errors
                    if isinstance(errors, str):
                        # Checks if the error message is in the HTML
                        # if error is present we will set flag to False
                        if errors in r.text:
                            error_flag = False
                    else:
                        # If it's list, it will iterate all the error message
                        for error in errors:
                            if error in r.text:
                                error_flag = False
                                break
                    if error_flag:
                        query_status = QueryStatus.CLAIMED
                    else:
                        query_status = QueryStatus.AVAILABLE
                if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
                    error_codes = net_info.get("errorCode")
                    query_status = QueryStatus.CLAIMED
                    # Type consistency, allowing for both singlets and lists in manifest
                    if isinstance(error_codes, int):
                        error_codes = [error_codes]
                    if error_codes is not None and r.status_code in error_codes:
                        query_status = QueryStatus.AVAILABLE
                    elif r.status_code >= 300 or r.status_code < 200:
                        query_status = QueryStatus.AVAILABLE
                if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
                    # For this detection method, we have turned off the redirect.
                    # So, there is no need to check the response URL: it will always
                    # match the request.  Instead, we will ensure that the response
                    # code indicates that the request was successful (i.e. no 404, or
                    # forward to some odd redirect).
                    if 200 <= r.status_code < 300:
                        query_status = QueryStatus.CLAIMED
                    else:
                        query_status = QueryStatus.AVAILABLE
        if dump_response:
            print("+++++++++++++++++++++")
            print(f"TARGET NAME   : {social_network}")
            print(f"USERNAME      : {username}")
            print(f"TARGET URL    : {url}")
            print(f"TEST METHOD   : {error_type}")
            try:
                print(f"STATUS CODES  : {net_info['errorCode']}")
            except KeyError:
                pass
            print("Results...")
            try:
                print(f"RESPONSE CODE : {r.status_code}")
            except Exception:
                pass
            try:
                print(f"ERROR TEXT    : {net_info['errorMsg']}")
            except KeyError:
                pass
            print(">>>>> BEGIN RESPONSE TEXT")
            try:
                print(r.text)
            except Exception:
                pass
            print("<<<<< END RESPONSE TEXT")
            print("VERDICT       : " + str(query_status))
            print("+++++++++++++++++++++")
        # Notify caller about results of query.
        result: QueryResult = QueryResult(
            username=username,
            site_name=social_network,
            site_url_user=url,
            status=query_status,
            query_time=response_time,
            context=error_context,
        )
        query_notify.update(result)
        # Save status of request
        results_site["status"] = result
        # Save results from request
        results_site["http_status"] = http_status
        results_site["response_text"] = response_text
        # Add this site's results into final dictionary with all of the other results.
        results_total[social_network] = results_site
    return results_total
 def timeout_check(value):
    """Check Timeout Argument.
    Checks timeout for validity.
    Keyword Arguments:
    value                  -- Time in seconds to wait before timing out request.
    Return Value:
    Floating point number representing the time (in seconds) that should be
    used for the timeout.
    NOTE:  Will raise an exception if the timeout in invalid.
    """
    float_value = float(value)
    if float_value <= 0:
        raise ArgumentTypeError(
            f"Invalid timeout value: {value}. Timeout must be a positive number."
        )
    return float_value
 def handler(signal_received, frame):
    """Exit gracefully without throwing errors
    Source: https://www.devdungeon.com/content/python-catch-sigint-ctrl-c
    """
    sys.exit(0)
 def main():
    parser = ArgumentParser(
        formatter_class=RawDescriptionHelpFormatter,
        description=f"{__longname__} (Version {__version__})",
    )
    parser.add_argument(
        "--version",
        action="version",
        version=f"{__shortname__} v{__version__}",
        help="Display version information and dependencies.",
    )
    parser.add_argument(
        "--verbose",
        "-v",
        "-d",
        "--debug",
        action="store_true",
        dest="verbose",
        default=False,
        help="Display extra debugging information and metrics.",
    )
    parser.add_argument(
        "--folderoutput",
        "-fo",
        dest="folderoutput",
        help="If using multiple usernames, the output of the results will be saved to this folder.",
    )
    parser.add_argument(
        "--output",
        "-o",
        dest="output",
        help="If using single username, the output of the result will be saved to this file.",
    )
    parser.add_argument(
        "--csv",
        action="store_true",
        dest="csv",
        default=False,
        help="Create Comma-Separated Values (CSV) File.",
    )
    parser.add_argument(
        "--xlsx",
        action="store_true",
        dest="xlsx",
        default=False,
        help="Create the standard file for the modern Microsoft Excel spreadsheet (xlsx).",
    )
    parser.add_argument(
        "--site",
        action="append",
        metavar="SITE_NAME",
        dest="site_list",
        default=[],
        help="Limit analysis to just the listed sites. Add multiple options to specify more than one site.",
    )
    parser.add_argument(
        "--proxy",
        "-p",
        metavar="PROXY_URL",
        action="store",
        dest="proxy",
        default=None,
        help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
    )
    parser.add_argument(
        "--dump-response",
        action="store_true",
        dest="dump_response",
        default=False,
        help="Dump the HTTP response to stdout for targeted debugging.",
    )
    parser.add_argument(
        "--json",
        "-j",
        metavar="JSON_FILE",
        dest="json_file",
        default=None,
        help="Load data from a JSON file or an online, valid, JSON file. Upstream PR numbers also accepted.",
    )
    parser.add_argument(
        "--timeout",
        action="store",
        metavar="TIMEOUT",
        dest="timeout",
        type=timeout_check,
        default=60,
        help="Time (in seconds) to wait for response to requests (Default: 60)",
    )
    parser.add_argument(
        "--print-all",
        action="store_true",
        dest="print_all",
        default=False,
        help="Output sites where the username was not found.",
    )
    parser.add_argument(
        "--print-found",
        action="store_true",
        dest="print_found",
        default=True,
        help="Output sites where the username was found (also if exported as file).",
    )
    parser.add_argument(
        "--no-color",
        action="store_true",
        dest="no_color",
        default=False,
        help="Don't color terminal output",
    )
    parser.add_argument(
        "username",
        nargs="+",
        metavar="USERNAMES",
        action="store",
        help="One or more usernames to check with social networks. Check similar usernames using {?} (replace to '_', '-', '.').",
    )
    parser.add_argument(
        "--browse",
        "-b",
        action="store_true",
        dest="browse",
        default=False,
        help="Browse to all results on default browser.",
    )
    parser.add_argument(
        "--local",
        "-l",
        action="store_true",
        default=False,
        help="Force the use of the local data.json file.",
    )
    parser.add_argument(
        "--nsfw",
        action="store_true",
        default=False,
        help="Include checking of NSFW sites from default list.",
    )
    parser.add_argument(
        "--txt",
        action="store_true",
        dest="output_txt",
        default=False,
        help="Enable creation of a txt file",
    )
    parser.add_argument(
        "--ignore-exclusions",
        action="store_true",
        dest="ignore_exclusions",
        default=False,
        help="Ignore upstream exclusions (may return more false positives)",
    )
    args = parser.parse_args()
    # If the user presses CTRL-C, exit gracefully without throwing errors
    signal.signal(signal.SIGINT, handler)
    # Check for newer version of Sherlock. If it exists, let the user know about it
    try:
        latest_release_raw = requests.get(forge_api_latest_release, timeout=10).text
        latest_release_json = json_loads(latest_release_raw)
        latest_remote_tag = latest_release_json["tag_name"]
        if latest_remote_tag[1:] != __version__:
            print(
                f"Update available! {__version__} --> {latest_remote_tag[1:]}"
                f"\n{latest_release_json['html_url']}"
            )
    except Exception as error:
        print(f"A problem occurred while checking for an update: {error}")
    # Make prompts
    if args.proxy is not None:
        print("Using the proxy: " + args.proxy)
    if args.no_color:
        # Disable color output.
        init(strip=True, convert=False)
    else:
        # Enable color output.
        init(autoreset=True)
    # Check if both output methods are entered as input.
    if args.output is not None and args.folderoutput is not None:
        print("You can only use one of the output methods.")
        sys.exit(1)
    # Check validity for single username output.
    if args.output is not None and len(args.username) != 1:
        print("You can only use --output with a single username")
        sys.exit(1)
    # Create object with all information about sites we are aware of.
    try:
        if args.local:
            sites = SitesInformation(
                os.path.join(os.path.dirname(__file__), "resources/data.json"),
                honor_exclusions=False,
            )
        else:
            json_file_location = args.json_file
            if args.json_file:
                # If --json parameter is a number, interpret it as a pull request number
                if args.json_file.isnumeric():
                    pull_number = args.json_file
                    pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}"
                    pull_request_raw = requests.get(pull_url, timeout=10).text
                    pull_request_json = json_loads(pull_request_raw)
                    # Check if it's a valid pull request
                    if "message" in pull_request_json:
                        print(f"ERROR: Pull request #{pull_number} not found.")
                        sys.exit(1)
                    head_commit_sha = pull_request_json["head"]["sha"]
                    json_file_location = f"https://raw.githubusercontent.com/sherlock-project/sherlock/{head_commit_sha}/sherlock_project/resources/data.json"
            sites = SitesInformation(
                data_file_path=json_file_location,
                honor_exclusions=not args.ignore_exclusions,
                do_not_exclude=args.site_list,
            )
    except Exception as error:
        print(f"ERROR:  {error}")
        sys.exit(1)
    if not args.nsfw:
        sites.remove_nsfw_sites(do_not_remove=args.site_list)
    # Create original dictionary from SitesInformation() object.
    # Eventually, the rest of the code will be updated to use the new object
    # directly, but this will glue the two pieces together.
    site_data_all = {site.name: site.information for site in sites}
    if args.site_list == []:
        # Not desired to look at a sub-set of sites
        site_data = site_data_all
    else:
        # User desires to selectively run queries on a sub-set of the site list.
        # Make sure that the sites are supported & build up pruned site database.
        site_data = {}
        site_missing = []
        for site in args.site_list:
            counter = 0
            for existing_site in site_data_all:
                if site.lower() == existing_site.lower():
                    site_data[existing_site] = site_data_all[existing_site]
                    counter += 1
            if counter == 0:
                # Build up list of sites not supported for future error message.
                site_missing.append(f"'{site}'")
        if site_missing:
            print(f"Error: Desired sites not found: {', '.join(site_missing)}.")
        if not site_data:
            sys.exit(1)
    # Create notify object for query results.
    query_notify = QueryNotifyPrint(
        result=None, verbose=args.verbose, print_all=args.print_all, browse=args.browse
    )
    # Run report on all specified users.
    all_usernames = []
    for username in args.username:
        if check_for_parameter(username):
            for name in multiple_usernames(username):
                all_usernames.append(name)
        else:
            all_usernames.append(username)
    for username in all_usernames:
        results = sherlock(
            username,
            site_data,
            query_notify,
            dump_response=args.dump_response,
            proxy=args.proxy,
            timeout=args.timeout,
        )
        if args.output:
            result_file = args.output
        elif args.folderoutput:
            # The usernames results should be stored in a targeted folder.
            # If the folder doesn't exist, create it first
            os.makedirs(args.folderoutput, exist_ok=True)
            result_file = os.path.join(args.folderoutput, f"{username}.txt")
        else:
            result_file = f"{username}.txt"
        if args.output_txt:
            with open(result_file, "w", encoding="utf-8") as file:
                exists_counter = 0
                for website_name in results:
                    dictionary = results[website_name]
                    if dictionary.get("status").status == QueryStatus.CLAIMED:
                        exists_counter += 1
                        file.write(dictionary["url_user"] + "\n")
                file.write(f"Total Websites Username Detected On : {exists_counter}\n")
        if args.csv:
            result_file = f"{username}.csv"
            if args.folderoutput:
                # The usernames results should be stored in a targeted folder.
                # If the folder doesn't exist, create it first
                os.makedirs(args.folderoutput, exist_ok=True)
                result_file = os.path.join(args.folderoutput, result_file)
            with open(result_file, "w", newline="", encoding="utf-8") as csv_report:
                writer = csv.writer(csv_report)
                writer.writerow(
                    [
                        "username",
                        "name",
                        "url_main",
                        "url_user",
                        "exists",
                        "http_status",
                        "response_time_s",
                    ]
                )
                for site in results:
                    if (
                        args.print_found
                        and not args.print_all
                        and results[site]["status"].status != QueryStatus.CLAIMED
                    ):
                        continue
                    response_time_s = results[site]["status"].query_time
                    if response_time_s is None:
                        response_time_s = ""
                    writer.writerow(
                        [
                            username,
                            site,
                            results[site]["url_main"],
                            results[site]["url_user"],
                            str(results[site]["status"].status),
                            results[site]["http_status"],
                            response_time_s,
                        ]
                    )
        if args.xlsx:
            usernames = []
            names = []
            url_main = []
            url_user = []
            exists = []
            http_status = []
            response_time_s = []
            for site in results:
                if (
                    args.print_found
                    and not args.print_all
                    and results[site]["status"].status != QueryStatus.CLAIMED
                ):
                    continue
                if response_time_s is None:
                    response_time_s.append("")
                else:
                    response_time_s.append(results[site]["status"].query_time)
                usernames.append(username)
                names.append(site)
                url_main.append(results[site]["url_main"])
                url_user.append(results[site]["url_user"])
                exists.append(str(results[site]["status"].status))
                http_status.append(results[site]["http_status"])
            DataFrame = pd.DataFrame(
                {
                    "username": usernames,
                    "name": names,
                    "url_main": [f'=HYPERLINK(\"{u}\")' for u in url_main],
                    "url_user": [f'=HYPERLINK(\"{u}\")' for u in url_user],
                    "exists": exists,
                    "http_status": http_status,
                    "response_time_s": response_time_s,
                }
            )
            DataFrame.to_excel(f"{username}.xlsx", sheet_name="sheet1", index=False)
        print()
    query_notify.finish()
 if __name__ == "__main__":
    main()
@@ -1,260 +0,0 @@
 """Sherlock Sites Information Module
 This module supports storing information about websites.
 This is the raw data that will be used to search for usernames.
 """
 import json
 import requests
 import secrets
 MANIFEST_URL = "https://data.sherlockproject.xyz"
 EXCLUSIONS_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/refs/heads/exclusions/false_positive_exclusions.txt"
 class SiteInformation:
    def __init__(self, name, url_home, url_username_format, username_claimed,
                information, is_nsfw, username_unclaimed=secrets.token_urlsafe(10)):
        """Create Site Information Object.
        Contains information about a specific website.
        Keyword Arguments:
        self                   -- This object.
        name                   -- String which identifies site.
        url_home               -- String containing URL for home of site.
        url_username_format    -- String containing URL for Username format
                                  on site.
                                  NOTE:  The string should contain the
                                         token "{}" where the username should
                                         be substituted.  For example, a string
                                         of "https://somesite.com/users/{}"
                                         indicates that the individual
                                         usernames would show up under the
                                         "https://somesite.com/users/" area of
                                         the website.
        username_claimed       -- String containing username which is known
                                  to be claimed on website.
        username_unclaimed     -- String containing username which is known
                                  to be unclaimed on website.
        information            -- Dictionary containing all known information
                                  about website.
                                  NOTE:  Custom information about how to
                                         actually detect the existence of the
                                         username will be included in this
                                         dictionary.  This information will
                                         be needed by the detection method,
                                         but it is only recorded in this
                                         object for future use.
        is_nsfw                -- Boolean indicating if site is Not Safe For Work.
        Return Value:
        Nothing.
        """
        self.name = name
        self.url_home = url_home
        self.url_username_format = url_username_format
        self.username_claimed = username_claimed
        self.username_unclaimed = secrets.token_urlsafe(32)
        self.information = information
        self.is_nsfw  = is_nsfw
        return
    def __str__(self):
        """Convert Object To String.
        Keyword Arguments:
        self                   -- This object.
        Return Value:
        Nicely formatted string to get information about this object.
        """
        return f"{self.name} ({self.url_home})"
 class SitesInformation:
    def __init__(
            self,
            data_file_path: str|None = None,
            honor_exclusions: bool = True,
            do_not_exclude: list[str] = [],
        ):
        """Create Sites Information Object.
        Contains information about all supported websites.
        Keyword Arguments:
        self                   -- This object.
        data_file_path         -- String which indicates path to data file.
                                  The file name must end in ".json".
                                  There are 3 possible formats:
                                   * Absolute File Format
                                     For example, "c:/stuff/data.json".
                                   * Relative File Format
                                     The current working directory is used
                                     as the context.
                                     For example, "data.json".
                                   * URL Format
                                     For example,
                                     "https://example.com/data.json", or
                                     "http://example.com/data.json".
                                  An exception will be thrown if the path
                                  to the data file is not in the expected
                                  format, or if there was any problem loading
                                  the file.
                                  If this option is not specified, then a
                                  default site list will be used.
        Return Value:
        Nothing.
        """
        if not data_file_path:
            # The default data file is the live data.json which is in the GitHub repo. The reason why we are using
            # this instead of the local one is so that the user has the most up-to-date data. This prevents
            # users from creating issue about false positives which has already been fixed or having outdated data
            data_file_path = MANIFEST_URL
        if data_file_path.lower().startswith("http"):
            # Reference is to a URL.
            try:
                response = requests.get(url=data_file_path, timeout=30)
            except Exception as error:
                raise FileNotFoundError(
                    f"Problem while attempting to access data file URL '{data_file_path}':  {error}"
                )
            if response.status_code != 200:
                raise FileNotFoundError(f"Bad response while accessing "
                                        f"data file URL '{data_file_path}'."
                                        )
            try:
                site_data = response.json()
            except Exception as error:
                raise ValueError(
                    f"Problem parsing json contents at '{data_file_path}':  {error}."
                )
        else:
            # Reference is to a file.
            try:
                with open(data_file_path, "r", encoding="utf-8") as file:
                    try:
                        site_data = json.load(file)
                    except Exception as error:
                        raise ValueError(
                            f"Problem parsing json contents at '{data_file_path}':  {error}."
                        )
            except FileNotFoundError:
                raise FileNotFoundError(f"Problem while attempting to access "
                                        f"data file '{data_file_path}'."
                                        )
        site_data.pop('$schema', None)
        if honor_exclusions:
            try:
                response = requests.get(url=EXCLUSIONS_URL, timeout=10)
                if response.status_code == 200:
                    exclusions = response.text.splitlines()
                    exclusions = [exclusion.strip() for exclusion in exclusions]
                    for site in do_not_exclude:
                        if site in exclusions:
                            exclusions.remove(site)
                    for exclusion in exclusions:
                        try:
                            site_data.pop(exclusion, None)
                        except KeyError:
                            pass
            except Exception:
                # If there was any problem loading the exclusions, just continue without them
                print("Warning: Could not load exclusions, continuing without them.")
                honor_exclusions = False
        self.sites = {}
        # Add all site information from the json file to internal site list.
        for site_name in site_data:
            try:
                self.sites[site_name] = \
                    SiteInformation(site_name,
                                    site_data[site_name]["urlMain"],
                                    site_data[site_name]["url"],
                                    site_data[site_name]["username_claimed"],
                                    site_data[site_name],
                                    site_data[site_name].get("isNSFW",False)
                                    )
            except KeyError as error:
                raise ValueError(
                    f"Problem parsing json contents at '{data_file_path}':  Missing attribute {error}."
                )
            except TypeError:
                print(f"Encountered TypeError parsing json contents for target '{site_name}' at {data_file_path}\nSkipping target.\n")
        return
    def remove_nsfw_sites(self, do_not_remove: list = []):
        """
        Remove NSFW sites from the sites, if isNSFW flag is true for site
        Keyword Arguments:
        self                   -- This object.
        Return Value:
        None
        """
        sites = {}
        do_not_remove = [site.casefold() for site in do_not_remove]
        for site in self.sites:
            if self.sites[site].is_nsfw and site.casefold() not in do_not_remove:
                continue
            sites[site] = self.sites[site]
        self.sites =  sites
    def site_name_list(self):
        """Get Site Name List.
        Keyword Arguments:
        self                   -- This object.
        Return Value:
        List of strings containing names of sites.
        """
        return sorted([site.name for site in self], key=str.lower)
    def __iter__(self):
        """Iterator For Object.
        Keyword Arguments:
        self                   -- This object.
        Return Value:
        Iterator for sites object.
        """
        for site_name in self.sites:
            yield self.sites[site_name]
    def __len__(self):
        """Length For Object.
        Keyword Arguments:
        self                   -- This object.
        Return Value:
        Length of sites object.
        """
        return len(self.sites)
@@ -1,51 +0,0 @@
 import os
 import json
 import urllib
 import pytest
 from sherlock_project.sites import SitesInformation
 def fetch_local_manifest(honor_exclusions: bool = True) -> dict[str, dict[str, str]]:
    sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"), honor_exclusions=honor_exclusions)
    sites_iterable: dict[str, dict[str, str]] = {site.name: site.information for site in sites_obj}
    return sites_iterable
@pytest.fixture()
 def sites_obj():
    sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
    yield sites_obj
@pytest.fixture(scope="session")
 def sites_info():
    yield fetch_local_manifest()
@pytest.fixture(scope="session")
 def remote_schema():
    schema_url: str = 'https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.schema.json'
    with urllib.request.urlopen(schema_url) as remoteschema:
        schemadat = json.load(remoteschema)
    yield schemadat
 def pytest_addoption(parser):
    parser.addoption(
        "--chunked-sites",
        action="store",
        default=None,
        help="For tests utilizing chunked sites, include only the (comma-separated) site(s) specified.",
    )
 def pytest_generate_tests(metafunc):
    if "chunked_sites" in metafunc.fixturenames:
        sites_info = fetch_local_manifest(honor_exclusions=False)
        # Ingest and apply site selections
        site_filter: str | None = metafunc.config.getoption("--chunked-sites")
        if site_filter:
            selected_sites: list[str] = [site.strip() for site in site_filter.split(",")]
            sites_info = {
                site: data for site, data in sites_info.items()
                if site in selected_sites
            }
        params = [{name: data} for name, data in sites_info.items()]
        ids = list(sites_info.keys())
        metafunc.parametrize("chunked_sites", params, ids=ids)
@@ -1,7 +0,0 @@
 import sherlock_project
 #from sherlock.sites import SitesInformation
 #local_manifest = data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json")
 def test_username_via_message():
    sherlock_project.__main__("--version")
@@ -1,38 +0,0 @@
 import os
 import platform
 import re
 import subprocess
 class Interactives:
    def run_cli(args:str = "") -> str:
        """Pass arguments to Sherlock as a normal user on the command line"""
        # Adapt for platform differences (Windows likes to be special)
        if platform.system() == "Windows":
            command:str = f"py -m sherlock_project {args}"
        else:
            command:str = f"sherlock {args}"
        proc_out:str = ""
        try:
            proc_out = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT)
            return proc_out.decode()
        except subprocess.CalledProcessError as e:
            raise InteractivesSubprocessError(e.output.decode())
    def walk_sherlock_for_files_with(pattern: str) -> list[str]:
        """Check all files within the Sherlock package for matching patterns"""
        pattern:re.Pattern = re.compile(pattern)
        matching_files:list[str] = []
        for root, dirs, files in os.walk("sherlock_project"):
            for file in files:
                file_path = os.path.join(root,file)
                if "__pycache__" in file_path:
                    continue
                with open(file_path, 'r', errors='ignore') as f:
                    if pattern.search(f.read()):
                        matching_files.append(file_path)
        return matching_files
 class InteractivesSubprocessError(Exception):
    pass
@@ -1,39 +0,0 @@
 import os
 import json
 import pytest
 from jsonschema import validate
 def test_validate_manifest_against_local_schema():
    """Ensures that the manifest matches the local schema, for situations where the schema is being changed."""
    json_relative: str = '../sherlock_project/resources/data.json'
    schema_relative: str = '../sherlock_project/resources/data.schema.json'
    json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
    schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative)
    with open(json_path, 'r') as f:
        jsondat = json.load(f)
    with open(schema_path, 'r') as f:
        schemadat = json.load(f)
    validate(instance=jsondat, schema=schemadat)
@pytest.mark.online
 def test_validate_manifest_against_remote_schema(remote_schema):
    """Ensures that the manifest matches the remote schema, so as to not unexpectedly break clients."""
    json_relative: str = '../sherlock_project/resources/data.json'
    json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
    with open(json_path, 'r') as f:
        jsondat = json.load(f)
    validate(instance=jsondat, schema=remote_schema)
 # Ensure that the expected values are beind returned by the site list
@pytest.mark.parametrize("target_name,target_expected_err_type", [
    ('GitHub', 'status_code'),
    ('GitLab', 'message'),
 ])
 def test_site_list_iterability (sites_info, target_name, target_expected_err_type):
    assert sites_info[target_name]['errorType'] == target_expected_err_type
@@ -1,105 +0,0 @@
 import pytest
 import random
 import string
 import re
 from sherlock_project.sherlock import sherlock
 from sherlock_project.notify import QueryNotify
 from sherlock_project.result import QueryStatus
 #from sherlock_interactives import Interactives
 def simple_query(sites_info: dict, site: str, username: str) -> QueryStatus:
    query_notify = QueryNotify()
    site_data: dict = {}
    site_data[site] = sites_info[site]
    return sherlock(
        username=username,
        site_data=site_data,
        query_notify=query_notify,
    )[site]['status'].status
@pytest.mark.online
 class TestLiveTargets:
    """Actively test probes against live and trusted targets"""
    # Known positives should only use sites trusted to be reliable and unchanging
    @pytest.mark.parametrize('site,username',[
        ('GitLab', 'ppfeister'),
        ('AllMyLinks', 'blue'),
    ])
    def test_known_positives_via_message(self, sites_info, site, username):
        assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
    # Known positives should only use sites trusted to be reliable and unchanging
    @pytest.mark.parametrize('site,username',[
        ('GitHub', 'ppfeister'),
        ('GitHub', 'sherlock-project'),
        ('Docker Hub', 'ppfeister'),
        ('Docker Hub', 'sherlock'),
    ])
    def test_known_positives_via_status_code(self, sites_info, site, username):
        assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
    # Known positives should only use sites trusted to be reliable and unchanging
    @pytest.mark.parametrize('site,username',[
        ('Keybase', 'blue'),
        ('devRant', 'blue'),
    ])
    def test_known_positives_via_response_url(self, sites_info, site, username):
        assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
    # Randomly generate usernames of high length and test for positive availability
    # Randomly generated usernames should be simple alnum for simplicity and high
    # compatibility. Several attempts may be made ~just in case~ a real username is
    # generated.
    @pytest.mark.parametrize('site,random_len',[
        ('GitLab', 255),
        ('Codecademy', 30)
    ])
    def test_likely_negatives_via_message(self, sites_info, site, random_len):
        num_attempts: int = 3
        attempted_usernames: list[str] = []
        status: QueryStatus = QueryStatus.CLAIMED
        for i in range(num_attempts):
            acceptable_types = string.ascii_letters + string.digits
            random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len))
            attempted_usernames.append(random_handle)
            status = simple_query(sites_info=sites_info, site=site, username=random_handle)
            if status is QueryStatus.AVAILABLE:
                break
        assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}."
    # Randomly generate usernames of high length and test for positive availability
    # Randomly generated usernames should be simple alnum for simplicity and high
    # compatibility. Several attempts may be made ~just in case~ a real username is
    # generated.
    @pytest.mark.parametrize('site,random_len',[
        ('GitHub', 39),
        ('Docker Hub', 30)
    ])
    def test_likely_negatives_via_status_code(self, sites_info, site, random_len):
        num_attempts: int = 3
        attempted_usernames: list[str] = []
        status: QueryStatus = QueryStatus.CLAIMED
        for i in range(num_attempts):
            acceptable_types = string.ascii_letters + string.digits
            random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len))
            attempted_usernames.append(random_handle)
            status = simple_query(sites_info=sites_info, site=site, username=random_handle)
            if status is QueryStatus.AVAILABLE:
                break
        assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}."
 def test_username_illegal_regex(sites_info):
    site: str = 'BitBucket'
    invalid_handle: str = '*#$Y&*JRE'
    pattern = re.compile(sites_info[site]['regexCheck'])
    # Ensure that the username actually fails regex before testing sherlock
    assert pattern.match(invalid_handle) is None
    assert simple_query(sites_info=sites_info, site=site, username=invalid_handle) is QueryStatus.ILLEGAL
@@ -1,47 +0,0 @@
 """Tests for handling usernames with special/unicode characters."""
 from concurrent.futures import Future
 from sherlock_project.sherlock import get_response
 def _make_future_with_exception(exc):
    """Create a Future that raises the given exception."""
    future = Future()
    future.set_exception(exc)
    return future
 def test_get_response_handles_unicode_decode_error():
    """Regression test for issue #2730.
    Usernames with special characters (e.g. 'Émile') can trigger a
    UnicodeDecodeError inside the requests library during redirect
    handling. This must not crash the program.
    """
    future = _make_future_with_exception(
        UnicodeDecodeError("utf-8", b"\xe9", 0, 1, "invalid continuation byte")
    )
    response, error_context, exception_text = get_response(
        request_future=future,
        error_type=["status_code"],
        social_network="TestSite",
    )
    assert response is None
    assert error_context == "Encoding Error"
    assert "utf-8" in exception_text
 def test_get_response_handles_unicode_encode_error():
    """UnicodeEncodeError should also be caught (subclass of UnicodeError)."""
    future = _make_future_with_exception(
        UnicodeEncodeError("ascii", "É", 0, 1, "ordinal not in range(128)")
    )
    response, error_context, exception_text = get_response(
        request_future=future,
        error_type=["status_code"],
        social_network="TestSite",
    )
    assert response is None
    assert error_context == "Encoding Error"
    assert "ascii" in exception_text
@@ -1,43 +0,0 @@
 import pytest
 from sherlock_project import sherlock
 from sherlock_interactives import Interactives
 from sherlock_interactives import InteractivesSubprocessError
 def test_remove_nsfw(sites_obj):
    nsfw_target: str = 'Xvideos'
    assert nsfw_target in {site.name: site.information for site in sites_obj}
    sites_obj.remove_nsfw_sites()
    assert nsfw_target not in {site.name: site.information for site in sites_obj}
 # Parametrized sites should *not* include Motherless, which is acting as the control
@pytest.mark.parametrize('nsfwsites', [
    ['Xvideos'],
    ['Xvideos', 'Erome'],
 ])
 def test_nsfw_explicit_selection(sites_obj, nsfwsites):
    for site in nsfwsites:
        assert site in {site.name: site.information for site in sites_obj}
    sites_obj.remove_nsfw_sites(do_not_remove=nsfwsites)
    for site in nsfwsites:
        assert site in {site.name: site.information for site in sites_obj}
        assert 'Motherless' not in {site.name: site.information for site in sites_obj}
 def test_wildcard_username_expansion():
    assert sherlock.check_for_parameter('test{?}test') is True
    assert sherlock.check_for_parameter('test{.}test') is False
    assert sherlock.check_for_parameter('test{}test') is False
    assert sherlock.check_for_parameter('testtest') is False
    assert sherlock.check_for_parameter('test{?test') is False
    assert sherlock.check_for_parameter('test?}test') is False
    assert sherlock.multiple_usernames('test{?}test') == ["test_test" , "test-test" , "test.test"]
@pytest.mark.parametrize('cliargs', [
    '',
    '--site urghrtuight --egiotr',
    '--',
 ])
 def test_no_usernames_provided(cliargs):
    with pytest.raises(InteractivesSubprocessError, match=r"error: the following arguments are required: USERNAMES"):
        Interactives.run_cli(cliargs)
@@ -1,100 +0,0 @@
 import pytest
 import re
 import rstr
 from sherlock_project.sherlock import sherlock
 from sherlock_project.notify import QueryNotify
 from sherlock_project.result import QueryResult, QueryStatus
 FALSE_POSITIVE_ATTEMPTS: int = 2    # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit
 FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND: int = 15  # If a pattern uses quantifiers such as `+` `*` or `{n,}`, limit the upper bound (0 to disable)
 FALSE_POSITIVE_DEFAULT_PATTERN: str = r'^[a-zA-Z0-9]{7,20}$'  # Used in absence of a regexCheck entry
 def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND) -> str:
    """Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`."""
    def replace_upper_bound(match: re.Match) -> str: # type: ignore
        lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore
        nonlocal upper_bound
        upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore  # noqa: F823
        return f'{{{lower_bound},{upper_bound}}}'
    pattern = re.sub(r'(?<!\\)\{(\d+),\}', replace_upper_bound, pattern) # {n,} # type: ignore
    pattern = re.sub(r'(?<!\\)\+', f'{{1,{upper_bound}}}', pattern) # +
    pattern = re.sub(r'(?<!\\)\*', f'{{0,{upper_bound}}}', pattern) # *
    return pattern
 def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, pattern: str) -> QueryStatus:
    """Check if a site is likely to produce false positives."""
    status: QueryStatus = QueryStatus.UNKNOWN
    for _ in range(FALSE_POSITIVE_ATTEMPTS):
        query_notify: QueryNotify = QueryNotify()
        username: str = rstr.xeger(pattern)
        result: QueryResult | str = sherlock(
            username=username,
            site_data=sites_info,
            query_notify=query_notify,
        )[site]['status']
        if not hasattr(result, 'status'):
            raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
        if type(result.status) is not QueryStatus: # type: ignore
            raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
        status = result.status # type: ignore
        if status in (QueryStatus.AVAILABLE, QueryStatus.WAF):
            return status
    return status
 def false_negative_check(sites_info: dict[str, dict[str, str]], site: str) -> QueryStatus:
    """Check if a site is likely to produce false negatives."""
    status: QueryStatus = QueryStatus.UNKNOWN
    query_notify: QueryNotify = QueryNotify()
    result: QueryResult | str = sherlock(
        username=sites_info[site]['username_claimed'],
        site_data=sites_info,
        query_notify=query_notify,
    )[site]['status']
    if not hasattr(result, 'status'):
            raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
    if type(result.status) is not QueryStatus: # type: ignore
        raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
    status = result.status # type: ignore
    return status
@pytest.mark.validate_targets
@pytest.mark.online
 class Test_All_Targets:
    @pytest.mark.validate_targets_fp
    def test_false_pos(self, chunked_sites: dict[str, dict[str, str]]):
        """Iterate through all sites in the manifest to discover possible false-positive inducting targets."""
        pattern: str
        for site in chunked_sites:
            try:
                pattern = chunked_sites[site]['regexCheck']
            except KeyError:
                pattern = FALSE_POSITIVE_DEFAULT_PATTERN
            if FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND > 0:
                pattern = set_pattern_upper_bound(pattern)
            result: QueryStatus = false_positive_check(chunked_sites, site, pattern)
            assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}"
    @pytest.mark.validate_targets_fn
    def test_false_neg(self, chunked_sites: dict[str, dict[str, str]]):
        """Iterate through all sites in the manifest to discover possible false-negative inducting targets."""
        for site in chunked_sites:
            result: QueryStatus = false_negative_check(chunked_sites, site)
            assert result is QueryStatus.CLAIMED, f"{site} produced false negative, result was {result}"
@@ -1,16 +0,0 @@
 import os
 from sherlock_interactives import Interactives
 import sherlock_project
 def test_versioning() -> None:
    # Ensure __version__ matches version presented to the user
    assert sherlock_project.__version__ in Interactives.run_cli("--version")
    # Ensure __init__ is single source of truth for __version__ in package
    # Temporarily allows sherlock.py so as to not trigger early upgrades
    found:list = Interactives.walk_sherlock_for_files_with(r'__version__ *= *')
    expected:list = [
        # Normalization is REQUIRED for Windows ( / vs \ )
        os.path.normpath("sherlock_project/__init__.py"),
    ]
    # Sorting is REQUIRED for Mac
    assert sorted(found) == sorted(expected)
@@ -1,42 +0,0 @@
 [tox]
 requires =
    tox >= 3
 envlist =
    lint
    py313
    py312
    py311
    py310
 [testenv]
 description = Attempt to build and install the package
 deps =
    coverage
    jsonschema
    pytest
    rstr
 allowlist_externals = coverage
 commands =
    coverage run --source=sherlock_project --module pytest -v
    coverage report --show-missing
 [testenv:offline]
 deps =
    jsonschema
    pytest
 commands =
    pytest -v -m "not online"
 [testenv:lint]
 description = Lint with Ruff
 deps =
    ruff
 commands =
    ruff check
 [gh-actions]
 python =
    3.13: py313
    3.12: py312
    3.11: py311
    3.10: py310
		`@@ -1 +0,0 @@`
			`github: [ sdushantha, ppfeister, matheusfelipeog ]`