Compare commits
247 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4e2a4f6b66 | |||
| 2b985b57ad | |||
| ed0865363f | |||
| 43a354b235 | |||
| aa5c3b0010 | |||
| 2df7c61be8 | |||
| 61aae782ee | |||
| 6eaec5cccd | |||
| dca64e35d3 | |||
| 2e2248a8a6 | |||
| a9960ff9a4 | |||
| d731f715bf | |||
| 271608fb22 | |||
| eb79980c33 | |||
| e2a225697f | |||
| 173ae5b824 | |||
| dcb935337c | |||
| ed883ad7c8 | |||
| a68ea46fb4 | |||
| ed73b175d7 | |||
| a192cb4bfe | |||
| b5e891550c | |||
| 190c2af514 | |||
| 8175af39ae | |||
| 32fde9bfc6 | |||
| 4656d95702 | |||
| 574aeb4ac5 | |||
| 382bc3210a | |||
| 17c443af19 | |||
| 9d6c47fdb4 | |||
| 10bed20e70 | |||
| fd3833b744 | |||
| 8f8ebf3c15 | |||
| 4253014085 | |||
| 725c68907a | |||
| c66d10bfed | |||
| e0002779b4 | |||
| 8f1308b90d | |||
| e856b05c2c | |||
| fe9e750dab | |||
| 842ae1f754 | |||
| 339634f7bc | |||
| c1632693bb | |||
| e19cb32009 | |||
| b69c8ef940 | |||
| 2724711060 | |||
| 0a68ab7f4c | |||
| 8675178be1 | |||
| 9bafb8a280 | |||
| 8e5549862a | |||
| 8797fcd517 | |||
| 0995d4d669 | |||
| 6c0c273a0b | |||
| 3eeba790fd | |||
| 61a29ec373 | |||
| 9fbbbf7c73 | |||
| 331b68d909 | |||
| 8c3e093561 | |||
| e35e5e3af1 | |||
| 906287b305 | |||
| 0dbb6abcc5 | |||
| 03e097cc82 | |||
| 91c1964918 | |||
| 373f3d389a | |||
| 828c47109d | |||
| 94245b25df | |||
| 734542f0af | |||
| 1f8166ba9f | |||
| 6f1ddaa615 | |||
| 7ee2891517 | |||
| b893e4aa20 | |||
| eff869906a | |||
| 2a0107e189 | |||
| 5d8c4de212 | |||
| 1f9d7e8373 | |||
| 184470f871 | |||
| 342dbc85cc | |||
| 457e16e84f | |||
| 43b3736b75 | |||
| 64a49ffe17 | |||
| 0afd2006c6 | |||
| 3c270173a7 | |||
| 8d73f9ef4c | |||
| 472c086805 | |||
| 400c277f24 | |||
| e759564550 | |||
| deebe7137c | |||
| cb14ccbaaf | |||
| eb892795e9 | |||
| 09de90066b | |||
| cd1f27c12b | |||
| b837de8358 | |||
| 7a70f35883 | |||
| 4b17dae385 | |||
| efefe3f54a | |||
| 4b70a1fc25 | |||
| a7893f399e | |||
| 1cb6c12851 | |||
| c4f7485ecf | |||
| 228f50413e | |||
| d1867b1b51 | |||
| 6d2497582e | |||
| 885c43b8af | |||
| 8ad47b0b23 | |||
| e93af99424 | |||
| 5862ab4f92 | |||
| 4110cac45c | |||
| d66b18e8ae | |||
| b532fc6a38 | |||
| 99cf073835 | |||
| ec7e1b8b81 | |||
| a4aab38901 | |||
| 5202900618 | |||
| 26444a98ad | |||
| bced3242f3 | |||
| 08aabdad76 | |||
| 170ee0b928 | |||
| 2c9a54438a | |||
| 84f4886809 | |||
| e26fd6b643 | |||
| ce5de20f80 | |||
| 3ff2d135b5 | |||
| 1e65b4a209 | |||
| db3545b7b0 | |||
| 1898a0c4a9 | |||
| 0d32357b10 | |||
| 1be2abb056 | |||
| fb392534ef | |||
| bd49aac9d1 | |||
| 94838863fd | |||
| 79973a58ea | |||
| b9a72b55ca | |||
| ef55f7ddd3 | |||
| 28b78e7ddd | |||
| d2072e2cac | |||
| 3edb73cb23 | |||
| 6d1280ee9d | |||
| 0c457e590a | |||
| dc307fc0fd | |||
| d6256e9fc6 | |||
| 1645828527 | |||
| e774b08dc5 | |||
| 99067b2e59 | |||
| f039b50c4e | |||
| 7d5bd97142 | |||
| 70b5055631 | |||
| 1be25e70df | |||
| 9000575f7c | |||
| 220ebf935c | |||
| 959c4a2b26 | |||
| 443d43df21 | |||
| 80080cd57c | |||
| 80922a93fa | |||
| 45494fc74b | |||
| d92e2339a1 | |||
| 659bf92d99 | |||
| 3e4d9bcd85 | |||
| d3076cdfe0 | |||
| 51436cefe8 | |||
| 08a8177286 | |||
| e6d5fd64e0 | |||
| ac9f3a7fd5 | |||
| 289ab28b98 | |||
| 46ad6c9a5e | |||
| d20dcbe8db | |||
| 70c3c84196 | |||
| 53840c6a98 | |||
| 068fff8711 | |||
| 5735d01804 | |||
| f60de0d8f8 | |||
| cb3ab91492 | |||
| 4eea79ed6a | |||
| 03c051a525 | |||
| eccdf80b95 | |||
| eb51bf9b1a | |||
| 5d7b438fd6 | |||
| ef0b97fb57 | |||
| c6c3522159 | |||
| 2908c8eaa8 | |||
| f05b8e0ed6 | |||
| 01bca6b39f | |||
| d2835e56a4 | |||
| 0cf110e69e | |||
| a88adb0488 | |||
| 4010a58dde | |||
| b9e28b9b23 | |||
| d0e005da23 | |||
| 7a4f19e6b3 | |||
| f958e7b96f | |||
| 4c99bf3b75 | |||
| e3066a1d7a | |||
| f0510a169a | |||
| 738df6c362 | |||
| 83a38db110 | |||
| 9e3448d992 | |||
| 70e3c0ddd8 | |||
| 017c08a45d | |||
| f32f4ffaee | |||
| 7379ba7b19 | |||
| 3aeb6d6356 | |||
| 4246a7b16f | |||
| e44fe49c8f | |||
| 52cd5fdfc1 | |||
| 947f1ad2b6 | |||
| 4d00884d8c | |||
| cfcc82aaca | |||
| 0794e02b52 | |||
| 975965abed | |||
| a678bed154 | |||
| 4ec6f1eec0 | |||
| d1527376e7 | |||
| b99719ce60 | |||
| dc869852bc | |||
| 3079e7a218 | |||
| 5cd769c2f4 | |||
| 977ad5c1a4 | |||
| 57a0ccef38 | |||
| 94c013886a | |||
| c5e209d78e | |||
| 3e653c46b0 | |||
| 91f3b16993 | |||
| 0f3df0f4da | |||
| 0e7219b191 | |||
| 1d2c4b134f | |||
| b245c462c9 | |||
| 876e58b159 | |||
| 66d9733da7 | |||
| c55deab3a2 | |||
| edcb697793 | |||
| d314d75db1 | |||
| c89a52caf7 | |||
| 9c18cfe273 | |||
| 779d4c33f4 | |||
| 072c24687b | |||
| 355bfbd328 | |||
| 7b3632bdad | |||
| 4fe41f09ff | |||
| cd7c52e4fa | |||
| 86140af50e | |||
| e5cd5e5bfe | |||
| dc89f1cd27 | |||
| 4706323976 | |||
| 4721c7f553 | |||
| 193de54b6d | |||
| 2016892e64 | |||
| 44ad8f506a | |||
| cfa4097df9 |
+1
-1
@@ -65,7 +65,7 @@ The Actor provides three types of outputs:
|
|||||||
| Field | Type | Required | Description |
|
| Field | Type | Required | Description |
|
||||||
|-------|------|----------|-------------|
|
|-------|------|----------|-------------|
|
||||||
| `username` | string | Yes | Username the search was conducted for |
|
| `username` | string | Yes | Username the search was conducted for |
|
||||||
| `links` | arrray | Yes | Array with found links to the social media |
|
| `links` | array | Yes | Array with found links to the social media |
|
||||||
| `links[]`| string | No | URL to the account
|
| `links[]`| string | No | URL to the account
|
||||||
|
|
||||||
### Example Dataset Item (JSON)
|
### Example Dataset Item (JSON)
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ on:
|
|||||||
- '**/*.py'
|
- '**/*.py'
|
||||||
- '**/*.ini'
|
- '**/*.ini'
|
||||||
- '**/*.toml'
|
- '**/*.toml'
|
||||||
|
- 'Dockerfile'
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
@@ -21,15 +22,17 @@ on:
|
|||||||
- '**/*.py'
|
- '**/*.py'
|
||||||
- '**/*.ini'
|
- '**/*.ini'
|
||||||
- '**/*.toml'
|
- '**/*.toml'
|
||||||
|
- 'Dockerfile'
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
tox-lint:
|
tox-lint:
|
||||||
# Linting is ran through tox to ensure that the same linter is used by local runners
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
# Linting is run through tox to ensure that the same linter
|
||||||
|
# is used by local runners
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v6
|
||||||
- name: Set up linting environment
|
- name: Set up linting environment
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v6
|
||||||
with:
|
with:
|
||||||
python-version: '3.x'
|
python-version: '3.x'
|
||||||
- name: Install tox and related dependencies
|
- name: Install tox and related dependencies
|
||||||
@@ -41,7 +44,8 @@ jobs:
|
|||||||
tox-matrix:
|
tox-matrix:
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false # We want to know what specicic versions it fails on
|
# We want to know what specific versions it fails on
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
os: [
|
os: [
|
||||||
ubuntu-latest,
|
ubuntu-latest,
|
||||||
@@ -53,11 +57,13 @@ jobs:
|
|||||||
'3.11',
|
'3.11',
|
||||||
'3.12',
|
'3.12',
|
||||||
'3.13',
|
'3.13',
|
||||||
|
'3.14',
|
||||||
|
'3.14t',
|
||||||
]
|
]
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v6
|
||||||
- name: Set up environment ${{ matrix.python-version }}
|
- name: Set up environment ${{ matrix.python-version }}
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v6
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
- name: Install tox and related dependencies
|
- name: Install tox and related dependencies
|
||||||
@@ -67,3 +73,22 @@ jobs:
|
|||||||
pip install tox-gh-actions
|
pip install tox-gh-actions
|
||||||
- name: Run tox
|
- name: Run tox
|
||||||
run: tox
|
run: tox
|
||||||
|
docker-build-test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
- name: Get version from pyproject.toml
|
||||||
|
id: get-version
|
||||||
|
run: |
|
||||||
|
VERSION=$(grep -m1 'version = ' pyproject.toml | cut -d'"' -f2)
|
||||||
|
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||||
|
- name: Build Docker image
|
||||||
|
run: |
|
||||||
|
docker build \
|
||||||
|
--build-arg VERSION_TAG=${{ steps.get-version.outputs.version }} \
|
||||||
|
-t sherlock-test:latest .
|
||||||
|
- name: Test Docker image runs
|
||||||
|
run: docker run --rm sherlock-test:latest --version
|
||||||
|
|||||||
@@ -17,29 +17,41 @@ jobs:
|
|||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v5
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
|
# Checkout the base branch but fetch all history to avoid a second fetch call
|
||||||
ref: ${{ github.base_ref }}
|
ref: ${{ github.base_ref }}
|
||||||
fetch-depth: 1
|
fetch-depth: 0
|
||||||
|
persist-credentials: false
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v6
|
||||||
with:
|
with:
|
||||||
python-version: '3.13'
|
python-version: "3.13"
|
||||||
|
|
||||||
- name: Install Poetry
|
- name: Install Poetry
|
||||||
uses: abatilo/actions-poetry@v4
|
uses: abatilo/actions-poetry@v4
|
||||||
with:
|
with:
|
||||||
poetry-version: 'latest'
|
poetry-version: "latest"
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
poetry install --no-interaction --with dev
|
poetry install --no-interaction --with dev
|
||||||
|
|
||||||
- name: Drop in place updated manifest from base
|
- name: Prepare JSON versions for comparison
|
||||||
run: |
|
run: |
|
||||||
cp sherlock_project/resources/data.json data.json.base
|
# Fetch only the PR's branch head (single network call in this step)
|
||||||
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr --depth=1
|
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr
|
||||||
git show pr:sherlock_project/resources/data.json > sherlock_project/resources/data.json
|
|
||||||
cp sherlock_project/resources/data.json data.json.head
|
# Find the merge-base commit between the target branch and the PR branch
|
||||||
|
MERGE_BASE=$(git merge-base origin/${{ github.base_ref }} pr)
|
||||||
|
echo "Comparing PR head against merge-base commit: $MERGE_BASE"
|
||||||
|
|
||||||
|
# Safely extract the file from the PR's head and the merge-base commit
|
||||||
|
git show pr:sherlock_project/resources/data.json > data.json.head
|
||||||
|
git show $MERGE_BASE:sherlock_project/resources/data.json > data.json.base
|
||||||
|
|
||||||
|
# CRITICAL FIX: Overwrite the checked-out data.json with the one from the PR
|
||||||
|
# This ensures that pytest runs against the new, updated file.
|
||||||
|
cp data.json.head sherlock_project/resources/data.json
|
||||||
|
|
||||||
- name: Discover modified targets
|
- name: Discover modified targets
|
||||||
id: discover-modified
|
id: discover-modified
|
||||||
@@ -47,8 +59,16 @@ jobs:
|
|||||||
CHANGED=$(
|
CHANGED=$(
|
||||||
python - <<'EOF'
|
python - <<'EOF'
|
||||||
import json
|
import json
|
||||||
with open("data.json.base") as f: base = json.load(f)
|
import sys
|
||||||
with open("data.json.head") as f: head = json.load(f)
|
try:
|
||||||
|
with open("data.json.base") as f: base = json.load(f)
|
||||||
|
with open("data.json.head") as f: head = json.load(f)
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
print(f"Error: Could not find {e.filename}", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
print(f"Error: Could not decode JSON from a file - {e}", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
changed = []
|
changed = []
|
||||||
for k, v in head.items():
|
for k, v in head.items():
|
||||||
@@ -63,12 +83,19 @@ jobs:
|
|||||||
echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
|
echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
|
||||||
echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"
|
echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
- name: Validate modified targets
|
- name: Validate remote manifest against local schema
|
||||||
if: steps.discover-modified.outputs.changed_targets != ''
|
if: steps.discover-modified.outputs.changed_targets != ''
|
||||||
continue-on-error: true
|
run: |
|
||||||
|
poetry run pytest tests/test_manifest.py::test_validate_manifest_against_local_schema
|
||||||
|
|
||||||
|
# --- The rest of the steps below are unchanged ---
|
||||||
|
|
||||||
|
- name: Validate modified targets
|
||||||
|
env:
|
||||||
|
CHANGED_TARGETS: ${{ steps.discover-modified.outputs.changed_targets }}
|
||||||
run: |
|
run: |
|
||||||
poetry run pytest -q --tb no -rA -m validate_targets -n 20 \
|
poetry run pytest -q --tb no -rA -m validate_targets -n 20 \
|
||||||
--chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" \
|
--chunked-sites "$CHANGED_TARGETS" \
|
||||||
--junitxml=validation_results.xml
|
--junitxml=validation_results.xml
|
||||||
|
|
||||||
- name: Prepare validation summary
|
- name: Prepare validation summary
|
||||||
|
|||||||
+1
-1
@@ -4,7 +4,7 @@
|
|||||||
# 3. Build image with BOTH latest and version tags
|
# 3. Build image with BOTH latest and version tags
|
||||||
# i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .`
|
# i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .`
|
||||||
|
|
||||||
FROM python:3.12-slim-bullseye as build
|
FROM python:3.12-slim-bullseye AS build
|
||||||
WORKDIR /sherlock
|
WORKDIR /sherlock
|
||||||
|
|
||||||
RUN pip3 install --no-cache-dir --upgrade pip
|
RUN pip3 install --no-cache-dir --upgrade pip
|
||||||
|
|||||||
+17
-11
@@ -1,39 +1,45 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# This module generates the listing of supported sites which can be found in
|
# This module generates the listing of supported sites which can be found in
|
||||||
# sites.md. It also organizes all the sites in alphanumeric order
|
# sites.mdx. It also organizes all the sites in alphanumeric order
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
DATA_REL_URI: str = "sherlock_project/resources/data.json"
|
DATA_REL_URI: str = "sherlock_project/resources/data.json"
|
||||||
|
|
||||||
|
DEFAULT_ENCODING = "utf-8"
|
||||||
|
|
||||||
# Read the data.json file
|
# Read the data.json file
|
||||||
with open(DATA_REL_URI, "r", encoding="utf-8") as data_file:
|
with open(DATA_REL_URI, "r", encoding=DEFAULT_ENCODING) as data_file:
|
||||||
data: dict = json.load(data_file)
|
data: dict = json.load(data_file)
|
||||||
|
|
||||||
# Removes schema-specific keywords for proper processing
|
# Removes schema-specific keywords for proper processing
|
||||||
social_networks: dict = dict(data)
|
social_networks = data.copy()
|
||||||
social_networks.pop('$schema', None)
|
social_networks.pop('$schema', None)
|
||||||
|
|
||||||
# Sort the social networks in alphanumeric order
|
# Sort the social networks in alphanumeric order
|
||||||
social_networks: list = sorted(social_networks.items())
|
social_networks = sorted(social_networks.items())
|
||||||
|
|
||||||
# Make output dir where the site list will be written
|
# Make output dir where the site list will be written
|
||||||
os.mkdir("output")
|
os.mkdir("output")
|
||||||
|
|
||||||
# Write the list of supported sites to sites.md
|
# Write the list of supported sites to sites.mdx
|
||||||
with open("output/sites.mdx", "w") as site_file:
|
with open("output/sites.mdx", "w", encoding=DEFAULT_ENCODING) as site_file:
|
||||||
site_file.write("---\ntitle: 'List of supported sites'\nsidebarTitle: 'Supported sites'\nicon: 'globe'\ndescription: 'Sherlock currently supports **400+** sites'\n---\n\n")
|
site_file.write("---\n")
|
||||||
|
site_file.write("title: 'List of supported sites'\n")
|
||||||
|
site_file.write("sidebarTitle: 'Supported sites'\n")
|
||||||
|
site_file.write("icon: 'globe'\n")
|
||||||
|
site_file.write("description: 'Sherlock currently supports **400+** sites'\n")
|
||||||
|
site_file.write("---\n\n")
|
||||||
|
|
||||||
for social_network, info in social_networks:
|
for social_network, info in social_networks:
|
||||||
url_main = info["urlMain"]
|
url_main = info["urlMain"]
|
||||||
is_nsfw = "**(NSFW)**" if info.get("isNSFW") else ""
|
is_nsfw = "**(NSFW)**" if info.get("isNSFW") else ""
|
||||||
site_file.write(f"1. [{social_network}]({url_main}) {is_nsfw}\n")
|
site_file.write(f"1. [{social_network}]({url_main}) {is_nsfw}\n")
|
||||||
|
|
||||||
# Overwrite the data.json file with sorted data
|
# Overwrite the data.json file with sorted data
|
||||||
with open(DATA_REL_URI, "w") as data_file:
|
with open(DATA_REL_URI, "w", encoding=DEFAULT_ENCODING) as data_file:
|
||||||
sorted_data = json.dumps(data, indent=2, sort_keys=True)
|
sorted_data = json.dumps(data, indent=2, sort_keys=True)
|
||||||
data_file.write(sorted_data)
|
data_file.write(sorted_data)
|
||||||
data_file.write("\n")
|
data_file.write("\n") # Keep the newline after writing data
|
||||||
|
|
||||||
print("Finished updating supported site listing!")
|
print("Finished updating supported site listing!")
|
||||||
|
|
||||||
|
|||||||
+19
-47
@@ -23,17 +23,17 @@
|
|||||||
|
|
||||||
> [!WARNING]
|
> [!WARNING]
|
||||||
> Packages for ParrotOS and Ubuntu 24.04, maintained by a third party, appear to be __broken__.
|
> Packages for ParrotOS and Ubuntu 24.04, maintained by a third party, appear to be __broken__.
|
||||||
> Users of these systems should defer to pipx/pip or Docker.
|
> Users of these systems should defer to [`uv`](https://docs.astral.sh/uv/)/`pipx`/`pip` or Docker.
|
||||||
|
|
||||||
| Method | Notes |
|
| Method | Notes |
|
||||||
| - | - |
|
| - | - |
|
||||||
| `pipx install sherlock-project` | `pip` may be used in place of `pipx` |
|
| `pipx install sherlock-project` | `pip` or [`uv`](https://docs.astral.sh/uv/) may be used in place of `pipx` |
|
||||||
| `docker run -it --rm sherlock/sherlock` |
|
| `docker run -it --rm sherlock/sherlock` |
|
||||||
| `dnf install sherlock-project` | |
|
| `dnf install sherlock-project` | |
|
||||||
|
|
||||||
Community-maintained packages are available for Debian (>= 13), Ubuntu (>= 22.10), Homebrew, Kali, and BlackArch. These packages are not directly supported or maintained by the Sherlock Project.
|
Community-maintained packages are available for Debian (>= 13), Ubuntu (>= 22.10), Homebrew, Kali, and BlackArch. These packages are not directly supported or maintained by the Sherlock Project.
|
||||||
|
|
||||||
See all alternative installation methods [here](https://sherlockproject.xyz/installation)
|
See all alternative installation methods [here](https://sherlockproject.xyz/installation).
|
||||||
|
|
||||||
## General usage
|
## General usage
|
||||||
|
|
||||||
@@ -51,70 +51,42 @@ Accounts found will be stored in an individual text file with the corresponding
|
|||||||
|
|
||||||
```console
|
```console
|
||||||
$ sherlock --help
|
$ sherlock --help
|
||||||
usage: sherlock [-h] [--version] [--verbose] [--folderoutput FOLDEROUTPUT]
|
usage: sherlock [-h] [--version] [--verbose] [--folderoutput FOLDEROUTPUT] [--output OUTPUT] [--csv] [--xlsx] [--site SITE_NAME] [--proxy PROXY_URL] [--dump-response]
|
||||||
[--output OUTPUT] [--tor] [--unique-tor] [--csv] [--xlsx]
|
[--json JSON_FILE] [--timeout TIMEOUT] [--print-all] [--print-found] [--no-color] [--browse] [--local] [--nsfw] [--txt] [--ignore-exclusions]
|
||||||
[--site SITE_NAME] [--proxy PROXY_URL] [--json JSON_FILE]
|
|
||||||
[--timeout TIMEOUT] [--print-all] [--print-found] [--no-color]
|
|
||||||
[--browse] [--local] [--nsfw]
|
|
||||||
USERNAMES [USERNAMES ...]
|
USERNAMES [USERNAMES ...]
|
||||||
|
|
||||||
Sherlock: Find Usernames Across Social Networks (Version 0.14.3)
|
Sherlock: Find Usernames Across Social Networks (Version 0.16.0)
|
||||||
|
|
||||||
positional arguments:
|
positional arguments:
|
||||||
USERNAMES One or more usernames to check with social networks.
|
USERNAMES One or more usernames to check with social networks. Check similar usernames using {?} (replace to '_', '-', '.').
|
||||||
Check similar usernames using {?} (replace to '_', '-', '.').
|
|
||||||
|
|
||||||
optional arguments:
|
options:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
--version Display version information and dependencies.
|
--version Display version information and dependencies.
|
||||||
--verbose, -v, -d, --debug
|
--verbose, -v, -d, --debug
|
||||||
Display extra debugging information and metrics.
|
Display extra debugging information and metrics.
|
||||||
--folderoutput FOLDEROUTPUT, -fo FOLDEROUTPUT
|
--folderoutput FOLDEROUTPUT, -fo FOLDEROUTPUT
|
||||||
If using multiple usernames, the output of the results will be
|
If using multiple usernames, the output of the results will be saved to this folder.
|
||||||
saved to this folder.
|
|
||||||
--output OUTPUT, -o OUTPUT
|
--output OUTPUT, -o OUTPUT
|
||||||
If using single username, the output of the result will be saved
|
If using single username, the output of the result will be saved to this file.
|
||||||
to this file.
|
|
||||||
--tor, -t Make requests over Tor; increases runtime; requires Tor to be
|
|
||||||
installed and in system path.
|
|
||||||
--unique-tor, -u Make requests over Tor with new Tor circuit after each request;
|
|
||||||
increases runtime; requires Tor to be installed and in system
|
|
||||||
path.
|
|
||||||
--csv Create Comma-Separated Values (CSV) File.
|
--csv Create Comma-Separated Values (CSV) File.
|
||||||
--xlsx Create the standard file for the modern Microsoft Excel
|
--xlsx Create the standard file for the modern Microsoft Excel spreadsheet (xlsx).
|
||||||
spreadsheet (xlsx).
|
--site SITE_NAME Limit analysis to just the listed sites. Add multiple options to specify more than one site.
|
||||||
--site SITE_NAME Limit analysis to just the listed sites. Add multiple options to
|
|
||||||
specify more than one site.
|
|
||||||
--proxy PROXY_URL, -p PROXY_URL
|
--proxy PROXY_URL, -p PROXY_URL
|
||||||
Make requests over a proxy. e.g. socks5://127.0.0.1:1080
|
Make requests over a proxy. e.g. socks5://127.0.0.1:1080
|
||||||
|
--dump-response Dump the HTTP response to stdout for targeted debugging.
|
||||||
--json JSON_FILE, -j JSON_FILE
|
--json JSON_FILE, -j JSON_FILE
|
||||||
Load data from a JSON file or an online, valid, JSON file.
|
Load data from a JSON file or an online, valid, JSON file. Upstream PR numbers also accepted.
|
||||||
--timeout TIMEOUT Time (in seconds) to wait for response to requests (Default: 60)
|
--timeout TIMEOUT Time (in seconds) to wait for response to requests (Default: 60)
|
||||||
--print-all Output sites where the username was not found.
|
--print-all Output sites where the username was not found.
|
||||||
--print-found Output sites where the username was found.
|
--print-found Output sites where the username was found (also if exported as file).
|
||||||
--no-color Don't color terminal output
|
--no-color Don't color terminal output
|
||||||
--browse, -b Browse to all results on default browser.
|
--browse, -b Browse to all results on default browser.
|
||||||
--local, -l Force the use of the local data.json file.
|
--local, -l Force the use of the local data.json file.
|
||||||
--nsfw Include checking of NSFW sites from default list.
|
--nsfw Include checking of NSFW sites from default list.
|
||||||
|
--txt Enable creation of a txt file
|
||||||
|
--ignore-exclusions Ignore upstream exclusions (may return more false positives)
|
||||||
```
|
```
|
||||||
## Apify Actor Usage [](https://apify.com/netmilk/sherlock?fpr=sherlock)
|
|
||||||
|
|
||||||
<a href="https://apify.com/netmilk/sherlock?fpr=sherlock"><img src="https://apify.com/ext/run-on-apify.png" alt="Run Sherlock Actor on Apify" width="176" height="39" /></a>
|
|
||||||
|
|
||||||
You can run Sherlock in the cloud without installation using the [Sherlock Actor](https://apify.com/netmilk/sherlock?fpr=sherlock) on [Apify](https://apify.com?fpr=sherlock) free of charge.
|
|
||||||
|
|
||||||
``` bash
|
|
||||||
$ echo '{"usernames":["user123"]}' | apify call -so netmilk/sherlock
|
|
||||||
[{
|
|
||||||
"username": "user123",
|
|
||||||
"links": [
|
|
||||||
"https://www.1337x.to/user/user123/",
|
|
||||||
...
|
|
||||||
]
|
|
||||||
}]
|
|
||||||
```
|
|
||||||
|
|
||||||
Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmatically via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock).
|
|
||||||
|
|
||||||
## Credits
|
## Credits
|
||||||
|
|
||||||
@@ -124,7 +96,7 @@ Thank you to everyone who has contributed to Sherlock! ❤️
|
|||||||
<img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" alt="contributors"/>
|
<img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" alt="contributors"/>
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
## Star history
|
## Star History
|
||||||
|
|
||||||
<picture>
|
<picture>
|
||||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date&theme=dark" />
|
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date&theme=dark" />
|
||||||
@@ -135,7 +107,7 @@ Thank you to everyone who has contributed to Sherlock! ❤️
|
|||||||
## License
|
## License
|
||||||
|
|
||||||
MIT © Sherlock Project<br/>
|
MIT © Sherlock Project<br/>
|
||||||
Original Creator - [Siddharth Dushantha](https://github.com/sdushantha)
|
Creator - [Siddharth Dushantha](https://github.com/sdushantha)
|
||||||
|
|
||||||
<!-- Reference Links -->
|
<!-- Reference Links -->
|
||||||
|
|
||||||
|
|||||||
+5
-5
@@ -8,7 +8,7 @@ source = "init"
|
|||||||
|
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "sherlock-project"
|
name = "sherlock-project"
|
||||||
version = "0.16.0"
|
version = "0.16.1"
|
||||||
description = "Hunt down social media accounts by username across social networks"
|
description = "Hunt down social media accounts by username across social networks"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
authors = [
|
authors = [
|
||||||
@@ -29,6 +29,10 @@ classifiers = [
|
|||||||
"Natural Language :: English",
|
"Natural Language :: English",
|
||||||
"Operating System :: OS Independent",
|
"Operating System :: OS Independent",
|
||||||
"Programming Language :: Python :: 3",
|
"Programming Language :: Python :: 3",
|
||||||
|
"Programming Language :: Python :: 3.10",
|
||||||
|
"Programming Language :: Python :: 3.11",
|
||||||
|
"Programming Language :: Python :: 3.12",
|
||||||
|
"Programming Language :: Python :: 3.13",
|
||||||
"Topic :: Security"
|
"Topic :: Security"
|
||||||
]
|
]
|
||||||
homepage = "https://sherlockproject.xyz/"
|
homepage = "https://sherlockproject.xyz/"
|
||||||
@@ -46,14 +50,10 @@ PySocks = "^1.7.0"
|
|||||||
requests = "^2.22.0"
|
requests = "^2.22.0"
|
||||||
requests-futures = "^1.0.0"
|
requests-futures = "^1.0.0"
|
||||||
stem = "^1.8.0"
|
stem = "^1.8.0"
|
||||||
torrequest = "^0.1.0"
|
|
||||||
pandas = "^2.2.1"
|
pandas = "^2.2.1"
|
||||||
openpyxl = "^3.0.10"
|
openpyxl = "^3.0.10"
|
||||||
tomli = "^2.2.1"
|
tomli = "^2.2.1"
|
||||||
|
|
||||||
[tool.poetry.extras]
|
|
||||||
tor = ["torrequest"]
|
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
jsonschema = "^4.0.0"
|
jsonschema = "^4.0.0"
|
||||||
rstr = "^3.2.2"
|
rstr = "^3.2.2"
|
||||||
|
|||||||
@@ -37,7 +37,6 @@ class QueryNotify:
|
|||||||
|
|
||||||
self.result = result
|
self.result = result
|
||||||
|
|
||||||
# return
|
|
||||||
|
|
||||||
def start(self, message=None):
|
def start(self, message=None):
|
||||||
"""Notify Start.
|
"""Notify Start.
|
||||||
@@ -56,7 +55,6 @@ class QueryNotify:
|
|||||||
Nothing.
|
Nothing.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# return
|
|
||||||
|
|
||||||
def update(self, result):
|
def update(self, result):
|
||||||
"""Notify Update.
|
"""Notify Update.
|
||||||
@@ -75,7 +73,6 @@ class QueryNotify:
|
|||||||
|
|
||||||
self.result = result
|
self.result = result
|
||||||
|
|
||||||
# return
|
|
||||||
|
|
||||||
def finish(self, message=None):
|
def finish(self, message=None):
|
||||||
"""Notify Finish.
|
"""Notify Finish.
|
||||||
@@ -94,7 +91,6 @@ class QueryNotify:
|
|||||||
Nothing.
|
Nothing.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# return
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
"""Convert Object To String.
|
"""Convert Object To String.
|
||||||
@@ -137,7 +133,6 @@ class QueryNotifyPrint(QueryNotify):
|
|||||||
self.print_all = print_all
|
self.print_all = print_all
|
||||||
self.browse = browse
|
self.browse = browse
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def start(self, message):
|
def start(self, message):
|
||||||
"""Notify Start.
|
"""Notify Start.
|
||||||
@@ -163,7 +158,6 @@ class QueryNotifyPrint(QueryNotify):
|
|||||||
# An empty line between first line and the result(more clear output)
|
# An empty line between first line and the result(more clear output)
|
||||||
print('\r')
|
print('\r')
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def countResults(self):
|
def countResults(self):
|
||||||
"""This function counts the number of results. Every time the function is called,
|
"""This function counts the number of results. Every time the function is called,
|
||||||
@@ -238,7 +232,7 @@ class QueryNotifyPrint(QueryNotify):
|
|||||||
Fore.WHITE + "]" +
|
Fore.WHITE + "]" +
|
||||||
Fore.GREEN + f" {self.result.site_name}:" +
|
Fore.GREEN + f" {self.result.site_name}:" +
|
||||||
Fore.YELLOW + f" {msg}")
|
Fore.YELLOW + f" {msg}")
|
||||||
|
|
||||||
elif result.status == QueryStatus.WAF:
|
elif result.status == QueryStatus.WAF:
|
||||||
if self.print_all:
|
if self.print_all:
|
||||||
print(Style.BRIGHT + Fore.WHITE + "[" +
|
print(Style.BRIGHT + Fore.WHITE + "[" +
|
||||||
@@ -254,10 +248,9 @@ class QueryNotifyPrint(QueryNotify):
|
|||||||
f"Unknown Query Status '{result.status}' for site '{self.result.site_name}'"
|
f"Unknown Query Status '{result.status}' for site '{self.result.site_name}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def finish(self, message="The processing has been finished."):
|
def finish(self, message="The processing has been finished."):
|
||||||
"""Notify Start.
|
"""Notify Finish.
|
||||||
Will print the last line to the standard output.
|
Will print the last line to the standard output.
|
||||||
Keyword Arguments:
|
Keyword Arguments:
|
||||||
self -- This object.
|
self -- This object.
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,80 +1,149 @@
|
|||||||
{
|
{
|
||||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||||
"title": "Sherlock Target Manifest",
|
"title": "Sherlock Target Manifest",
|
||||||
"description": "Social media targets to probe for the existence of known usernames",
|
"description": "Social media targets to probe for the existence of known usernames",
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"$schema": { "type": "string" }
|
"$schema": { "type": "string" }
|
||||||
},
|
},
|
||||||
"patternProperties": {
|
"patternProperties": {
|
||||||
"^(?!\\$).*?$": {
|
"^(?!\\$).*?$": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"description": "Target name and associated information (key should be human readable name)",
|
"description": "Target name and associated information (key should be human readable name)",
|
||||||
"required": [ "url", "urlMain", "errorType", "username_claimed" ],
|
"required": ["url", "urlMain", "errorType", "username_claimed"],
|
||||||
"properties": {
|
"properties": {
|
||||||
"url": { "type": "string" },
|
"url": { "type": "string" },
|
||||||
"urlMain": { "type": "string" },
|
"urlMain": { "type": "string" },
|
||||||
"urlProbe": { "type": "string" },
|
"urlProbe": { "type": "string" },
|
||||||
"username_claimed": { "type": "string" },
|
"username_claimed": { "type": "string" },
|
||||||
"regexCheck": { "type": "string" },
|
"regexCheck": { "type": "string" },
|
||||||
"isNSFW": { "type": "boolean" },
|
"isNSFW": { "type": "boolean" },
|
||||||
"headers": { "type": "object" },
|
"headers": { "type": "object" },
|
||||||
"request_payload": { "type": "object" },
|
"request_payload": { "type": "object" },
|
||||||
"__comment__": {
|
"__comment__": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
|
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
|
||||||
},
|
},
|
||||||
"tags": {
|
"tags": {
|
||||||
"oneOf": [
|
"oneOf": [
|
||||||
{ "$ref": "#/$defs/tag" },
|
{ "$ref": "#/$defs/tag" },
|
||||||
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
|
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"request_method": {
|
"request_method": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": [ "GET", "POST", "HEAD", "PUT" ]
|
"enum": ["GET", "POST", "HEAD", "PUT"]
|
||||||
},
|
},
|
||||||
|
"errorType": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["message", "response_url", "status_code"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["message", "response_url", "status_code"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"errorMsg": {
|
||||||
|
"oneOf": [
|
||||||
|
{ "type": "string" },
|
||||||
|
{ "type": "array", "items": { "type": "string" } }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"errorCode": {
|
||||||
|
"oneOf": [
|
||||||
|
{ "type": "integer" },
|
||||||
|
{ "type": "array", "items": { "type": "integer" } }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"errorUrl": { "type": "string" },
|
||||||
|
"response_url": { "type": "string" }
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"errorMsg": {
|
||||||
|
"oneOf": [
|
||||||
|
{ "properties": { "errorType": { "const": "message" } } },
|
||||||
|
{
|
||||||
|
"properties": {
|
||||||
"errorType": {
|
"errorType": {
|
||||||
"type": "string",
|
"type": "array",
|
||||||
"enum": [ "message", "response_url", "status_code" ]
|
"contains": { "const": "message" }
|
||||||
},
|
|
||||||
"errorMsg": {
|
|
||||||
"oneOf": [
|
|
||||||
{ "type": "string" },
|
|
||||||
{ "type": "array", "items": { "type": "string" } }
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"errorCode": {
|
|
||||||
"oneOf": [
|
|
||||||
{ "type": "integer" },
|
|
||||||
{ "type": "array", "items": { "type": "integer" } }
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"errorUrl": { "type": "string" },
|
|
||||||
"response_url": { "type": "string" }
|
|
||||||
},
|
|
||||||
"dependencies": {
|
|
||||||
"errorMsg": {
|
|
||||||
"properties" : { "errorType": { "const": "message" } }
|
|
||||||
},
|
|
||||||
"errorUrl": {
|
|
||||||
"properties": { "errorType": { "const": "response_url" } }
|
|
||||||
},
|
|
||||||
"errorCode": {
|
|
||||||
"properties": { "errorType": { "const": "status_code" } }
|
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
"if": { "properties": { "errorType": { "const": "message" } } },
|
}
|
||||||
"then": { "required": [ "errorMsg" ] },
|
]
|
||||||
"else": {
|
},
|
||||||
"if": { "properties": { "errorType": { "const": "response_url" } } },
|
"errorUrl": {
|
||||||
"then": { "required": [ "errorUrl" ] }
|
"oneOf": [
|
||||||
},
|
{ "properties": { "errorType": { "const": "response_url" } } },
|
||||||
"additionalProperties": false
|
{
|
||||||
|
"properties": {
|
||||||
|
"errorType": {
|
||||||
|
"type": "array",
|
||||||
|
"contains": { "const": "response_url" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"errorCode": {
|
||||||
|
"oneOf": [
|
||||||
|
{ "properties": { "errorType": { "const": "status_code" } } },
|
||||||
|
{
|
||||||
|
"properties": {
|
||||||
|
"errorType": {
|
||||||
|
"type": "array",
|
||||||
|
"contains": { "const": "status_code" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"allOf": [
|
||||||
"$defs": {
|
{
|
||||||
"tag": { "type": "string", "enum": [ "adult", "gaming" ] }
|
"if": {
|
||||||
|
"anyOf": [
|
||||||
|
{ "properties": { "errorType": { "const": "message" } } },
|
||||||
|
{
|
||||||
|
"properties": {
|
||||||
|
"errorType": {
|
||||||
|
"type": "array",
|
||||||
|
"contains": { "const": "message" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"then": { "required": ["errorMsg"] }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"if": {
|
||||||
|
"anyOf": [
|
||||||
|
{ "properties": { "errorType": { "const": "response_url" } } },
|
||||||
|
{
|
||||||
|
"properties": {
|
||||||
|
"errorType": {
|
||||||
|
"type": "array",
|
||||||
|
"contains": { "const": "response_url" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"then": { "required": ["errorUrl"] }
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"additionalProperties": false
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"$defs": {
|
||||||
|
"tag": { "type": "string", "enum": ["adult", "gaming"] }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+68
-123
@@ -136,6 +136,9 @@ def get_response(request_future, error_type, social_network):
|
|||||||
except requests.exceptions.RequestException as err:
|
except requests.exceptions.RequestException as err:
|
||||||
error_context = "Unknown Error"
|
error_context = "Unknown Error"
|
||||||
exception_text = str(err)
|
exception_text = str(err)
|
||||||
|
except UnicodeError as err:
|
||||||
|
error_context = "Encoding Error"
|
||||||
|
exception_text = str(err)
|
||||||
|
|
||||||
return response, error_context, exception_text
|
return response, error_context, exception_text
|
||||||
|
|
||||||
@@ -171,8 +174,6 @@ def sherlock(
|
|||||||
username: str,
|
username: str,
|
||||||
site_data: dict[str, dict[str, str]],
|
site_data: dict[str, dict[str, str]],
|
||||||
query_notify: QueryNotify,
|
query_notify: QueryNotify,
|
||||||
tor: bool = False,
|
|
||||||
unique_tor: bool = False,
|
|
||||||
dump_response: bool = False,
|
dump_response: bool = False,
|
||||||
proxy: Optional[str] = None,
|
proxy: Optional[str] = None,
|
||||||
timeout: int = 60,
|
timeout: int = 60,
|
||||||
@@ -188,8 +189,6 @@ def sherlock(
|
|||||||
query_notify -- Object with base type of QueryNotify().
|
query_notify -- Object with base type of QueryNotify().
|
||||||
This will be used to notify the caller about
|
This will be used to notify the caller about
|
||||||
query results.
|
query results.
|
||||||
tor -- Boolean indicating whether to use a tor circuit for the requests.
|
|
||||||
unique_tor -- Boolean indicating whether to use a new tor circuit for each request.
|
|
||||||
proxy -- String indicating the proxy URL
|
proxy -- String indicating the proxy URL
|
||||||
timeout -- Time in seconds to wait before timing out request.
|
timeout -- Time in seconds to wait before timing out request.
|
||||||
Default is 60 seconds.
|
Default is 60 seconds.
|
||||||
@@ -210,32 +209,9 @@ def sherlock(
|
|||||||
|
|
||||||
# Notify caller that we are starting the query.
|
# Notify caller that we are starting the query.
|
||||||
query_notify.start(username)
|
query_notify.start(username)
|
||||||
# Create session based on request methodology
|
|
||||||
if tor or unique_tor:
|
|
||||||
try:
|
|
||||||
from torrequest import TorRequest # noqa: E402
|
|
||||||
except ImportError:
|
|
||||||
print("Important!")
|
|
||||||
print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.")
|
|
||||||
print("> If you've installed Sherlock via pip, you can include the optional dependency via `pip install 'sherlock-project[tor]'`.")
|
|
||||||
print("> Other packages should refer to their documentation, or install it separately with `pip install torrequest`.\n")
|
|
||||||
sys.exit(query_notify.finish())
|
|
||||||
|
|
||||||
print("Important!")
|
# Normal requests
|
||||||
print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.")
|
underlying_session = requests.session()
|
||||||
|
|
||||||
# Requests using Tor obfuscation
|
|
||||||
try:
|
|
||||||
underlying_request = TorRequest()
|
|
||||||
except OSError:
|
|
||||||
print("Tor not found in system path. Unable to continue.\n")
|
|
||||||
sys.exit(query_notify.finish())
|
|
||||||
|
|
||||||
underlying_session = underlying_request.session
|
|
||||||
else:
|
|
||||||
# Normal requests
|
|
||||||
underlying_session = requests.session()
|
|
||||||
underlying_request = requests.Request()
|
|
||||||
|
|
||||||
# Limit number of workers to 20.
|
# Limit number of workers to 20.
|
||||||
# This is probably vastly overkill.
|
# This is probably vastly overkill.
|
||||||
@@ -359,15 +335,10 @@ def sherlock(
|
|||||||
# Store future in data for access later
|
# Store future in data for access later
|
||||||
net_info["request_future"] = future
|
net_info["request_future"] = future
|
||||||
|
|
||||||
# Reset identify for tor (if needed)
|
|
||||||
if unique_tor:
|
|
||||||
underlying_request.reset_identity()
|
|
||||||
|
|
||||||
# Add this site's results into final dictionary with all the other results.
|
# Add this site's results into final dictionary with all the other results.
|
||||||
results_total[social_network] = results_site
|
results_total[social_network] = results_site
|
||||||
|
|
||||||
# Open the file containing account links
|
# Open the file containing account links
|
||||||
# Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses
|
|
||||||
for social_network, net_info in site_data.items():
|
for social_network, net_info in site_data.items():
|
||||||
# Retrieve results again
|
# Retrieve results again
|
||||||
results_site = results_total.get(social_network)
|
results_site = results_total.get(social_network)
|
||||||
@@ -381,6 +352,8 @@ def sherlock(
|
|||||||
|
|
||||||
# Get the expected error type
|
# Get the expected error type
|
||||||
error_type = net_info["errorType"]
|
error_type = net_info["errorType"]
|
||||||
|
if isinstance(error_type, str):
|
||||||
|
error_type: list[str] = [error_type]
|
||||||
|
|
||||||
# Retrieve future and ensure it has finished
|
# Retrieve future and ensure it has finished
|
||||||
future = net_info["request_future"]
|
future = net_info["request_future"]
|
||||||
@@ -425,58 +398,60 @@ def sherlock(
|
|||||||
elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
|
elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
|
||||||
query_status = QueryStatus.WAF
|
query_status = QueryStatus.WAF
|
||||||
|
|
||||||
elif error_type == "message":
|
|
||||||
# error_flag True denotes no error found in the HTML
|
|
||||||
# error_flag False denotes error found in the HTML
|
|
||||||
error_flag = True
|
|
||||||
errors = net_info.get("errorMsg")
|
|
||||||
# errors will hold the error message
|
|
||||||
# it can be string or list
|
|
||||||
# by isinstance method we can detect that
|
|
||||||
# and handle the case for strings as normal procedure
|
|
||||||
# and if its list we can iterate the errors
|
|
||||||
if isinstance(errors, str):
|
|
||||||
# Checks if the error message is in the HTML
|
|
||||||
# if error is present we will set flag to False
|
|
||||||
if errors in r.text:
|
|
||||||
error_flag = False
|
|
||||||
else:
|
|
||||||
# If it's list, it will iterate all the error message
|
|
||||||
for error in errors:
|
|
||||||
if error in r.text:
|
|
||||||
error_flag = False
|
|
||||||
break
|
|
||||||
if error_flag:
|
|
||||||
query_status = QueryStatus.CLAIMED
|
|
||||||
else:
|
|
||||||
query_status = QueryStatus.AVAILABLE
|
|
||||||
elif error_type == "status_code":
|
|
||||||
error_codes = net_info.get("errorCode")
|
|
||||||
query_status = QueryStatus.CLAIMED
|
|
||||||
|
|
||||||
# Type consistency, allowing for both singlets and lists in manifest
|
|
||||||
if isinstance(error_codes, int):
|
|
||||||
error_codes = [error_codes]
|
|
||||||
|
|
||||||
if error_codes is not None and r.status_code in error_codes:
|
|
||||||
query_status = QueryStatus.AVAILABLE
|
|
||||||
elif r.status_code >= 300 or r.status_code < 200:
|
|
||||||
query_status = QueryStatus.AVAILABLE
|
|
||||||
elif error_type == "response_url":
|
|
||||||
# For this detection method, we have turned off the redirect.
|
|
||||||
# So, there is no need to check the response URL: it will always
|
|
||||||
# match the request. Instead, we will ensure that the response
|
|
||||||
# code indicates that the request was successful (i.e. no 404, or
|
|
||||||
# forward to some odd redirect).
|
|
||||||
if 200 <= r.status_code < 300:
|
|
||||||
query_status = QueryStatus.CLAIMED
|
|
||||||
else:
|
|
||||||
query_status = QueryStatus.AVAILABLE
|
|
||||||
else:
|
else:
|
||||||
# It should be impossible to ever get here...
|
if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
|
||||||
raise ValueError(
|
error_context = f"Unknown error type '{error_type}' for {social_network}"
|
||||||
f"Unknown Error Type '{error_type}' for " f"site '{social_network}'"
|
query_status = QueryStatus.UNKNOWN
|
||||||
)
|
else:
|
||||||
|
if "message" in error_type:
|
||||||
|
# error_flag True denotes no error found in the HTML
|
||||||
|
# error_flag False denotes error found in the HTML
|
||||||
|
error_flag = True
|
||||||
|
errors = net_info.get("errorMsg")
|
||||||
|
# errors will hold the error message
|
||||||
|
# it can be string or list
|
||||||
|
# by isinstance method we can detect that
|
||||||
|
# and handle the case for strings as normal procedure
|
||||||
|
# and if its list we can iterate the errors
|
||||||
|
if isinstance(errors, str):
|
||||||
|
# Checks if the error message is in the HTML
|
||||||
|
# if error is present we will set flag to False
|
||||||
|
if errors in r.text:
|
||||||
|
error_flag = False
|
||||||
|
else:
|
||||||
|
# If it's list, it will iterate all the error message
|
||||||
|
for error in errors:
|
||||||
|
if error in r.text:
|
||||||
|
error_flag = False
|
||||||
|
break
|
||||||
|
if error_flag:
|
||||||
|
query_status = QueryStatus.CLAIMED
|
||||||
|
else:
|
||||||
|
query_status = QueryStatus.AVAILABLE
|
||||||
|
|
||||||
|
if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
|
||||||
|
error_codes = net_info.get("errorCode")
|
||||||
|
query_status = QueryStatus.CLAIMED
|
||||||
|
|
||||||
|
# Type consistency, allowing for both singlets and lists in manifest
|
||||||
|
if isinstance(error_codes, int):
|
||||||
|
error_codes = [error_codes]
|
||||||
|
|
||||||
|
if error_codes is not None and r.status_code in error_codes:
|
||||||
|
query_status = QueryStatus.AVAILABLE
|
||||||
|
elif r.status_code >= 300 or r.status_code < 200:
|
||||||
|
query_status = QueryStatus.AVAILABLE
|
||||||
|
|
||||||
|
if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
|
||||||
|
# For this detection method, we have turned off the redirect.
|
||||||
|
# So, there is no need to check the response URL: it will always
|
||||||
|
# match the request. Instead, we will ensure that the response
|
||||||
|
# code indicates that the request was successful (i.e. no 404, or
|
||||||
|
# forward to some odd redirect).
|
||||||
|
if 200 <= r.status_code < 300:
|
||||||
|
query_status = QueryStatus.CLAIMED
|
||||||
|
else:
|
||||||
|
query_status = QueryStatus.AVAILABLE
|
||||||
|
|
||||||
if dump_response:
|
if dump_response:
|
||||||
print("+++++++++++++++++++++")
|
print("+++++++++++++++++++++")
|
||||||
@@ -596,22 +571,6 @@ def main():
|
|||||||
dest="output",
|
dest="output",
|
||||||
help="If using single username, the output of the result will be saved to this file.",
|
help="If using single username, the output of the result will be saved to this file.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"--tor",
|
|
||||||
"-t",
|
|
||||||
action="store_true",
|
|
||||||
dest="tor",
|
|
||||||
default=False,
|
|
||||||
help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--unique-tor",
|
|
||||||
"-u",
|
|
||||||
action="store_true",
|
|
||||||
dest="unique_tor",
|
|
||||||
default=False,
|
|
||||||
help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--csv",
|
"--csv",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
@@ -720,11 +679,11 @@ def main():
|
|||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-txt",
|
"--txt",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
dest="no_txt",
|
dest="output_txt",
|
||||||
default=False,
|
default=False,
|
||||||
help="Disable creation of a txt file",
|
help="Enable creation of a txt file",
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@@ -742,7 +701,7 @@ def main():
|
|||||||
|
|
||||||
# Check for newer version of Sherlock. If it exists, let the user know about it
|
# Check for newer version of Sherlock. If it exists, let the user know about it
|
||||||
try:
|
try:
|
||||||
latest_release_raw = requests.get(forge_api_latest_release).text
|
latest_release_raw = requests.get(forge_api_latest_release, timeout=10).text
|
||||||
latest_release_json = json_loads(latest_release_raw)
|
latest_release_json = json_loads(latest_release_raw)
|
||||||
latest_remote_tag = latest_release_json["tag_name"]
|
latest_remote_tag = latest_release_json["tag_name"]
|
||||||
|
|
||||||
@@ -755,22 +714,10 @@ def main():
|
|||||||
except Exception as error:
|
except Exception as error:
|
||||||
print(f"A problem occurred while checking for an update: {error}")
|
print(f"A problem occurred while checking for an update: {error}")
|
||||||
|
|
||||||
# Argument check
|
|
||||||
# TODO regex check on args.proxy
|
|
||||||
if args.tor and (args.proxy is not None):
|
|
||||||
raise Exception("Tor and Proxy cannot be set at the same time.")
|
|
||||||
|
|
||||||
# Make prompts
|
# Make prompts
|
||||||
if args.proxy is not None:
|
if args.proxy is not None:
|
||||||
print("Using the proxy: " + args.proxy)
|
print("Using the proxy: " + args.proxy)
|
||||||
|
|
||||||
if args.tor or args.unique_tor:
|
|
||||||
print("Using Tor to make requests")
|
|
||||||
|
|
||||||
print(
|
|
||||||
"Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors."
|
|
||||||
)
|
|
||||||
|
|
||||||
if args.no_color:
|
if args.no_color:
|
||||||
# Disable color output.
|
# Disable color output.
|
||||||
init(strip=True, convert=False)
|
init(strip=True, convert=False)
|
||||||
@@ -802,7 +749,7 @@ def main():
|
|||||||
if args.json_file.isnumeric():
|
if args.json_file.isnumeric():
|
||||||
pull_number = args.json_file
|
pull_number = args.json_file
|
||||||
pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}"
|
pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}"
|
||||||
pull_request_raw = requests.get(pull_url).text
|
pull_request_raw = requests.get(pull_url, timeout=10).text
|
||||||
pull_request_json = json_loads(pull_request_raw)
|
pull_request_json = json_loads(pull_request_raw)
|
||||||
|
|
||||||
# Check if it's a valid pull request
|
# Check if it's a valid pull request
|
||||||
@@ -871,8 +818,6 @@ def main():
|
|||||||
username,
|
username,
|
||||||
site_data,
|
site_data,
|
||||||
query_notify,
|
query_notify,
|
||||||
tor=args.tor,
|
|
||||||
unique_tor=args.unique_tor,
|
|
||||||
dump_response=args.dump_response,
|
dump_response=args.dump_response,
|
||||||
proxy=args.proxy,
|
proxy=args.proxy,
|
||||||
timeout=args.timeout,
|
timeout=args.timeout,
|
||||||
@@ -888,7 +833,7 @@ def main():
|
|||||||
else:
|
else:
|
||||||
result_file = f"{username}.txt"
|
result_file = f"{username}.txt"
|
||||||
|
|
||||||
if not args.no_txt:
|
if args.output_txt:
|
||||||
with open(result_file, "w", encoding="utf-8") as file:
|
with open(result_file, "w", encoding="utf-8") as file:
|
||||||
exists_counter = 0
|
exists_counter = 0
|
||||||
for website_name in results:
|
for website_name in results:
|
||||||
@@ -973,8 +918,8 @@ def main():
|
|||||||
{
|
{
|
||||||
"username": usernames,
|
"username": usernames,
|
||||||
"name": names,
|
"name": names,
|
||||||
"url_main": url_main,
|
"url_main": [f'=HYPERLINK(\"{u}\")' for u in url_main],
|
||||||
"url_user": url_user,
|
"url_user": [f'=HYPERLINK(\"{u}\")' for u in url_user],
|
||||||
"exists": exists,
|
"exists": exists,
|
||||||
"http_status": http_status,
|
"http_status": http_status,
|
||||||
"response_time_s": response_time_s,
|
"response_time_s": response_time_s,
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import requests
|
|||||||
import secrets
|
import secrets
|
||||||
|
|
||||||
|
|
||||||
MANIFEST_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.json"
|
MANIFEST_URL = "https://data.sherlockproject.xyz"
|
||||||
EXCLUSIONS_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/refs/heads/exclusions/false_positive_exclusions.txt"
|
EXCLUSIONS_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/refs/heads/exclusions/false_positive_exclusions.txt"
|
||||||
|
|
||||||
class SiteInformation:
|
class SiteInformation:
|
||||||
@@ -121,15 +121,10 @@ class SitesInformation:
|
|||||||
# users from creating issue about false positives which has already been fixed or having outdated data
|
# users from creating issue about false positives which has already been fixed or having outdated data
|
||||||
data_file_path = MANIFEST_URL
|
data_file_path = MANIFEST_URL
|
||||||
|
|
||||||
# Ensure that specified data file has correct extension.
|
|
||||||
if not data_file_path.lower().endswith(".json"):
|
|
||||||
raise FileNotFoundError(f"Incorrect JSON file extension for data file '{data_file_path}'.")
|
|
||||||
|
|
||||||
# if "http://" == data_file_path[:7].lower() or "https://" == data_file_path[:8].lower():
|
|
||||||
if data_file_path.lower().startswith("http"):
|
if data_file_path.lower().startswith("http"):
|
||||||
# Reference is to a URL.
|
# Reference is to a URL.
|
||||||
try:
|
try:
|
||||||
response = requests.get(url=data_file_path)
|
response = requests.get(url=data_file_path, timeout=30)
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
raise FileNotFoundError(
|
raise FileNotFoundError(
|
||||||
f"Problem while attempting to access data file URL '{data_file_path}': {error}"
|
f"Problem while attempting to access data file URL '{data_file_path}': {error}"
|
||||||
@@ -166,7 +161,7 @@ class SitesInformation:
|
|||||||
|
|
||||||
if honor_exclusions:
|
if honor_exclusions:
|
||||||
try:
|
try:
|
||||||
response = requests.get(url=EXCLUSIONS_URL)
|
response = requests.get(url=EXCLUSIONS_URL, timeout=10)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
exclusions = response.text.splitlines()
|
exclusions = response.text.splitlines()
|
||||||
exclusions = [exclusion.strip() for exclusion in exclusions]
|
exclusions = [exclusion.strip() for exclusion in exclusions]
|
||||||
|
|||||||
@@ -0,0 +1,47 @@
|
|||||||
|
"""Tests for handling usernames with special/unicode characters."""
|
||||||
|
|
||||||
|
from concurrent.futures import Future
|
||||||
|
|
||||||
|
from sherlock_project.sherlock import get_response
|
||||||
|
|
||||||
|
|
||||||
|
def _make_future_with_exception(exc):
|
||||||
|
"""Create a Future that raises the given exception."""
|
||||||
|
future = Future()
|
||||||
|
future.set_exception(exc)
|
||||||
|
return future
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_response_handles_unicode_decode_error():
|
||||||
|
"""Regression test for issue #2730.
|
||||||
|
|
||||||
|
Usernames with special characters (e.g. 'Émile') can trigger a
|
||||||
|
UnicodeDecodeError inside the requests library during redirect
|
||||||
|
handling. This must not crash the program.
|
||||||
|
"""
|
||||||
|
future = _make_future_with_exception(
|
||||||
|
UnicodeDecodeError("utf-8", b"\xe9", 0, 1, "invalid continuation byte")
|
||||||
|
)
|
||||||
|
response, error_context, exception_text = get_response(
|
||||||
|
request_future=future,
|
||||||
|
error_type=["status_code"],
|
||||||
|
social_network="TestSite",
|
||||||
|
)
|
||||||
|
assert response is None
|
||||||
|
assert error_context == "Encoding Error"
|
||||||
|
assert "utf-8" in exception_text
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_response_handles_unicode_encode_error():
|
||||||
|
"""UnicodeEncodeError should also be caught (subclass of UnicodeError)."""
|
||||||
|
future = _make_future_with_exception(
|
||||||
|
UnicodeEncodeError("ascii", "É", 0, 1, "ordinal not in range(128)")
|
||||||
|
)
|
||||||
|
response, error_context, exception_text = get_response(
|
||||||
|
request_future=future,
|
||||||
|
error_type=["status_code"],
|
||||||
|
social_network="TestSite",
|
||||||
|
)
|
||||||
|
assert response is None
|
||||||
|
assert error_context == "Encoding Error"
|
||||||
|
assert "ascii" in exception_text
|
||||||
+3
-3
@@ -4,7 +4,7 @@ from sherlock_interactives import Interactives
|
|||||||
from sherlock_interactives import InteractivesSubprocessError
|
from sherlock_interactives import InteractivesSubprocessError
|
||||||
|
|
||||||
def test_remove_nsfw(sites_obj):
|
def test_remove_nsfw(sites_obj):
|
||||||
nsfw_target: str = 'Pornhub'
|
nsfw_target: str = 'Xvideos'
|
||||||
assert nsfw_target in {site.name: site.information for site in sites_obj}
|
assert nsfw_target in {site.name: site.information for site in sites_obj}
|
||||||
sites_obj.remove_nsfw_sites()
|
sites_obj.remove_nsfw_sites()
|
||||||
assert nsfw_target not in {site.name: site.information for site in sites_obj}
|
assert nsfw_target not in {site.name: site.information for site in sites_obj}
|
||||||
@@ -12,8 +12,8 @@ def test_remove_nsfw(sites_obj):
|
|||||||
|
|
||||||
# Parametrized sites should *not* include Motherless, which is acting as the control
|
# Parametrized sites should *not* include Motherless, which is acting as the control
|
||||||
@pytest.mark.parametrize('nsfwsites', [
|
@pytest.mark.parametrize('nsfwsites', [
|
||||||
['Pornhub'],
|
['Xvideos'],
|
||||||
['Pornhub', 'Xvideos'],
|
['Xvideos', 'Erome'],
|
||||||
])
|
])
|
||||||
def test_nsfw_explicit_selection(sites_obj, nsfwsites):
|
def test_nsfw_explicit_selection(sites_obj, nsfwsites):
|
||||||
for site in nsfwsites:
|
for site in nsfwsites:
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUAN
|
|||||||
"""Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`."""
|
"""Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`."""
|
||||||
def replace_upper_bound(match: re.Match) -> str: # type: ignore
|
def replace_upper_bound(match: re.Match) -> str: # type: ignore
|
||||||
lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore
|
lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore
|
||||||
|
nonlocal upper_bound
|
||||||
upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore # noqa: F823
|
upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore # noqa: F823
|
||||||
return f'{{{lower_bound},{upper_bound}}}'
|
return f'{{{lower_bound},{upper_bound}}}'
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user