Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 707597202d |
+1
-1
@@ -65,7 +65,7 @@ The Actor provides three types of outputs:
|
||||
| Field | Type | Required | Description |
|
||||
|-------|------|----------|-------------|
|
||||
| `username` | string | Yes | Username the search was conducted for |
|
||||
| `links` | array | Yes | Array with found links to the social media |
|
||||
| `links` | arrray | Yes | Array with found links to the social media |
|
||||
| `links[]`| string | No | URL to the account
|
||||
|
||||
### Example Dataset Item (JSON)
|
||||
|
||||
+1
-1
@@ -1,5 +1,5 @@
|
||||
### REPOSITORY
|
||||
/.github/CODEOWNERS @sdushantha @ppfeister
|
||||
/.github/CODEOWNERS @sdushantha
|
||||
/.github/FUNDING.yml @sdushantha
|
||||
/LICENSE @sdushantha
|
||||
|
||||
|
||||
@@ -11,7 +11,6 @@ on:
|
||||
- '**/*.py'
|
||||
- '**/*.ini'
|
||||
- '**/*.toml'
|
||||
- 'Dockerfile'
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
@@ -22,17 +21,15 @@ on:
|
||||
- '**/*.py'
|
||||
- '**/*.ini'
|
||||
- '**/*.toml'
|
||||
- 'Dockerfile'
|
||||
|
||||
jobs:
|
||||
tox-lint:
|
||||
# Linting is ran through tox to ensure that the same linter is used by local runners
|
||||
runs-on: ubuntu-latest
|
||||
# Linting is run through tox to ensure that the same linter
|
||||
# is used by local runners
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up linting environment
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Install tox and related dependencies
|
||||
@@ -44,8 +41,7 @@ jobs:
|
||||
tox-matrix:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
# We want to know what specific versions it fails on
|
||||
fail-fast: false
|
||||
fail-fast: false # We want to know what specicic versions it fails on
|
||||
matrix:
|
||||
os: [
|
||||
ubuntu-latest,
|
||||
@@ -57,13 +53,11 @@ jobs:
|
||||
'3.11',
|
||||
'3.12',
|
||||
'3.13',
|
||||
'3.14',
|
||||
'3.14t',
|
||||
]
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up environment ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install tox and related dependencies
|
||||
@@ -73,22 +67,3 @@ jobs:
|
||||
pip install tox-gh-actions
|
||||
- name: Run tox
|
||||
run: tox
|
||||
docker-build-test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v6
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Get version from pyproject.toml
|
||||
id: get-version
|
||||
run: |
|
||||
VERSION=$(grep -m1 'version = ' pyproject.toml | cut -d'"' -f2)
|
||||
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||
- name: Build Docker image
|
||||
run: |
|
||||
docker build \
|
||||
--build-arg VERSION_TAG=${{ steps.get-version.outputs.version }} \
|
||||
-t sherlock-test:latest .
|
||||
- name: Test Docker image runs
|
||||
run: docker run --rm sherlock-test:latest --version
|
||||
|
||||
@@ -17,40 +17,29 @@ jobs:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
# Checkout the base branch but fetch all history to avoid a second fetch call
|
||||
ref: ${{ github.base_ref }}
|
||||
fetch-depth: 0
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.13"
|
||||
python-version: '3.13'
|
||||
|
||||
- name: Install Poetry
|
||||
uses: abatilo/actions-poetry@v4
|
||||
with:
|
||||
poetry-version: "latest"
|
||||
poetry-version: 'latest'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install --no-interaction --with dev
|
||||
|
||||
- name: Prepare JSON versions for comparison
|
||||
- name: Drop in place updated manifest from base
|
||||
run: |
|
||||
# Fetch only the PR's branch head (single network call in this step)
|
||||
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr
|
||||
|
||||
# Find the merge-base commit between the target branch and the PR branch
|
||||
MERGE_BASE=$(git merge-base origin/${{ github.base_ref }} pr)
|
||||
echo "Comparing PR head against merge-base commit: $MERGE_BASE"
|
||||
|
||||
# Safely extract the file from the PR's head and the merge-base commit
|
||||
git show pr:sherlock_project/resources/data.json > data.json.head
|
||||
git show $MERGE_BASE:sherlock_project/resources/data.json > data.json.base
|
||||
|
||||
# CRITICAL FIX: Overwrite the checked-out data.json with the one from the PR
|
||||
# This ensures that pytest runs against the new, updated file.
|
||||
cp data.json.head sherlock_project/resources/data.json
|
||||
cp sherlock_project/resources/data.json data.json.base
|
||||
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr --depth=1
|
||||
git show pr:sherlock_project/resources/data.json > sherlock_project/resources/data.json
|
||||
cp sherlock_project/resources/data.json data.json.head
|
||||
|
||||
- name: Discover modified targets
|
||||
id: discover-modified
|
||||
@@ -58,16 +47,8 @@ jobs:
|
||||
CHANGED=$(
|
||||
python - <<'EOF'
|
||||
import json
|
||||
import sys
|
||||
try:
|
||||
with open("data.json.base") as f: base = json.load(f)
|
||||
with open("data.json.head") as f: head = json.load(f)
|
||||
except FileNotFoundError as e:
|
||||
print(f"Error: Could not find {e.filename}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error: Could not decode JSON from a file - {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
with open("data.json.base") as f: base = json.load(f)
|
||||
with open("data.json.head") as f: head = json.load(f)
|
||||
|
||||
changed = []
|
||||
for k, v in head.items():
|
||||
@@ -82,13 +63,6 @@ jobs:
|
||||
echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
|
||||
echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Validate remote manifest against local schema
|
||||
if: steps.discover-modified.outputs.changed_targets != ''
|
||||
run: |
|
||||
poetry run pytest tests/test_manifest.py::test_validate_manifest_against_local_schema
|
||||
|
||||
# --- The rest of the steps below are unchanged ---
|
||||
|
||||
- name: Validate modified targets
|
||||
if: steps.discover-modified.outputs.changed_targets != ''
|
||||
continue-on-error: true
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@
|
||||
# 3. Build image with BOTH latest and version tags
|
||||
# i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .`
|
||||
|
||||
FROM python:3.12-slim-bullseye AS build
|
||||
FROM python:3.12-slim-bullseye as build
|
||||
WORKDIR /sherlock
|
||||
|
||||
RUN pip3 install --no-cache-dir --upgrade pip
|
||||
|
||||
+11
-17
@@ -1,45 +1,39 @@
|
||||
#!/usr/bin/env python
|
||||
# This module generates the listing of supported sites which can be found in
|
||||
# sites.mdx. It also organizes all the sites in alphanumeric order
|
||||
# sites.md. It also organizes all the sites in alphanumeric order
|
||||
import json
|
||||
import os
|
||||
|
||||
|
||||
DATA_REL_URI: str = "sherlock_project/resources/data.json"
|
||||
|
||||
DEFAULT_ENCODING = "utf-8"
|
||||
|
||||
# Read the data.json file
|
||||
with open(DATA_REL_URI, "r", encoding=DEFAULT_ENCODING) as data_file:
|
||||
with open(DATA_REL_URI, "r", encoding="utf-8") as data_file:
|
||||
data: dict = json.load(data_file)
|
||||
|
||||
# Removes schema-specific keywords for proper processing
|
||||
social_networks = data.copy()
|
||||
social_networks: dict = dict(data)
|
||||
social_networks.pop('$schema', None)
|
||||
|
||||
# Sort the social networks in alphanumeric order
|
||||
social_networks = sorted(social_networks.items())
|
||||
social_networks: list = sorted(social_networks.items())
|
||||
|
||||
# Make output dir where the site list will be written
|
||||
os.mkdir("output")
|
||||
|
||||
# Write the list of supported sites to sites.mdx
|
||||
with open("output/sites.mdx", "w", encoding=DEFAULT_ENCODING) as site_file:
|
||||
site_file.write("---\n")
|
||||
site_file.write("title: 'List of supported sites'\n")
|
||||
site_file.write("sidebarTitle: 'Supported sites'\n")
|
||||
site_file.write("icon: 'globe'\n")
|
||||
site_file.write("description: 'Sherlock currently supports **400+** sites'\n")
|
||||
site_file.write("---\n\n")
|
||||
|
||||
# Write the list of supported sites to sites.md
|
||||
with open("output/sites.mdx", "w") as site_file:
|
||||
site_file.write("---\ntitle: 'List of supported sites'\nsidebarTitle: 'Supported sites'\nicon: 'globe'\ndescription: 'Sherlock currently supports **400+** sites'\n---\n\n")
|
||||
for social_network, info in social_networks:
|
||||
url_main = info["urlMain"]
|
||||
is_nsfw = "**(NSFW)**" if info.get("isNSFW") else ""
|
||||
site_file.write(f"1. [{social_network}]({url_main}) {is_nsfw}\n")
|
||||
|
||||
# Overwrite the data.json file with sorted data
|
||||
with open(DATA_REL_URI, "w", encoding=DEFAULT_ENCODING) as data_file:
|
||||
with open(DATA_REL_URI, "w") as data_file:
|
||||
sorted_data = json.dumps(data, indent=2, sort_keys=True)
|
||||
data_file.write(sorted_data)
|
||||
data_file.write("\n") # Keep the newline after writing data
|
||||
data_file.write("\n")
|
||||
|
||||
print("Finished updating supported site listing!")
|
||||
|
||||
|
||||
+22
-4
@@ -23,11 +23,11 @@
|
||||
|
||||
> [!WARNING]
|
||||
> Packages for ParrotOS and Ubuntu 24.04, maintained by a third party, appear to be __broken__.
|
||||
> Users of these systems should defer to [`uv`](https://docs.astral.sh/uv/)/`pipx`/`pip` or Docker.
|
||||
> Users of these systems should defer to pipx/pip or Docker.
|
||||
|
||||
| Method | Notes |
|
||||
| - | - |
|
||||
| `pipx install sherlock-project` | `pip` or [`uv`](https://docs.astral.sh/uv/) may be used in place of `pipx` |
|
||||
| `pipx install sherlock-project` | `pip` may be used in place of `pipx` |
|
||||
| `docker run -it --rm sherlock/sherlock` |
|
||||
| `dnf install sherlock-project` | |
|
||||
|
||||
@@ -97,6 +97,24 @@ optional arguments:
|
||||
--local, -l Force the use of the local data.json file.
|
||||
--nsfw Include checking of NSFW sites from default list.
|
||||
```
|
||||
## Apify Actor Usage [](https://apify.com/netmilk/sherlock?fpr=sherlock)
|
||||
|
||||
<a href="https://apify.com/netmilk/sherlock?fpr=sherlock"><img src="https://apify.com/ext/run-on-apify.png" alt="Run Sherlock Actor on Apify" width="176" height="39" /></a>
|
||||
|
||||
You can run Sherlock in the cloud without installation using the [Sherlock Actor](https://apify.com/netmilk/sherlock?fpr=sherlock) on [Apify](https://apify.com?fpr=sherlock) free of charge.
|
||||
|
||||
``` bash
|
||||
$ echo '{"usernames":["user123"]}' | apify call -so netmilk/sherlock
|
||||
[{
|
||||
"username": "user123",
|
||||
"links": [
|
||||
"https://www.1337x.to/user/user123/",
|
||||
...
|
||||
]
|
||||
}]
|
||||
```
|
||||
|
||||
Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmatically via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock).
|
||||
|
||||
## Credits
|
||||
|
||||
@@ -106,7 +124,7 @@ Thank you to everyone who has contributed to Sherlock! ❤️
|
||||
<img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" alt="contributors"/>
|
||||
</a>
|
||||
|
||||
## Star History
|
||||
## Star history
|
||||
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date&theme=dark" />
|
||||
@@ -117,7 +135,7 @@ Thank you to everyone who has contributed to Sherlock! ❤️
|
||||
## License
|
||||
|
||||
MIT © Sherlock Project<br/>
|
||||
Creator - [Siddharth Dushantha](https://github.com/sdushantha)
|
||||
Original Creator - [Siddharth Dushantha](https://github.com/sdushantha)
|
||||
|
||||
<!-- Reference Links -->
|
||||
|
||||
|
||||
+4
-4
@@ -29,10 +29,6 @@ classifiers = [
|
||||
"Natural Language :: English",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Topic :: Security"
|
||||
]
|
||||
homepage = "https://sherlockproject.xyz/"
|
||||
@@ -50,10 +46,14 @@ PySocks = "^1.7.0"
|
||||
requests = "^2.22.0"
|
||||
requests-futures = "^1.0.0"
|
||||
stem = "^1.8.0"
|
||||
torrequest = "^0.1.0"
|
||||
pandas = "^2.2.1"
|
||||
openpyxl = "^3.0.10"
|
||||
tomli = "^2.2.1"
|
||||
|
||||
[tool.poetry.extras]
|
||||
tor = ["torrequest"]
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
jsonschema = "^4.0.0"
|
||||
rstr = "^3.2.2"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,149 +1,80 @@
|
||||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"title": "Sherlock Target Manifest",
|
||||
"description": "Social media targets to probe for the existence of known usernames",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"$schema": { "type": "string" }
|
||||
},
|
||||
"patternProperties": {
|
||||
"^(?!\\$).*?$": {
|
||||
"type": "object",
|
||||
"description": "Target name and associated information (key should be human readable name)",
|
||||
"required": ["url", "urlMain", "errorType", "username_claimed"],
|
||||
"properties": {
|
||||
"url": { "type": "string" },
|
||||
"urlMain": { "type": "string" },
|
||||
"urlProbe": { "type": "string" },
|
||||
"username_claimed": { "type": "string" },
|
||||
"regexCheck": { "type": "string" },
|
||||
"isNSFW": { "type": "boolean" },
|
||||
"headers": { "type": "object" },
|
||||
"request_payload": { "type": "object" },
|
||||
"__comment__": {
|
||||
"type": "string",
|
||||
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
|
||||
},
|
||||
"tags": {
|
||||
"oneOf": [
|
||||
{ "$ref": "#/$defs/tag" },
|
||||
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
|
||||
]
|
||||
},
|
||||
"request_method": {
|
||||
"type": "string",
|
||||
"enum": ["GET", "POST", "HEAD", "PUT"]
|
||||
},
|
||||
"errorType": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"enum": ["message", "response_url", "status_code"]
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"title": "Sherlock Target Manifest",
|
||||
"description": "Social media targets to probe for the existence of known usernames",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"$schema": { "type": "string" }
|
||||
},
|
||||
"patternProperties": {
|
||||
"^(?!\\$).*?$": {
|
||||
"type": "object",
|
||||
"description": "Target name and associated information (key should be human readable name)",
|
||||
"required": [ "url", "urlMain", "errorType", "username_claimed" ],
|
||||
"properties": {
|
||||
"url": { "type": "string" },
|
||||
"urlMain": { "type": "string" },
|
||||
"urlProbe": { "type": "string" },
|
||||
"username_claimed": { "type": "string" },
|
||||
"regexCheck": { "type": "string" },
|
||||
"isNSFW": { "type": "boolean" },
|
||||
"headers": { "type": "object" },
|
||||
"request_payload": { "type": "object" },
|
||||
"__comment__": {
|
||||
"type": "string",
|
||||
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
|
||||
},
|
||||
"tags": {
|
||||
"oneOf": [
|
||||
{ "$ref": "#/$defs/tag" },
|
||||
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
|
||||
]
|
||||
},
|
||||
"request_method": {
|
||||
"type": "string",
|
||||
"enum": [ "GET", "POST", "HEAD", "PUT" ]
|
||||
},
|
||||
"errorType": {
|
||||
"type": "string",
|
||||
"enum": [ "message", "response_url", "status_code" ]
|
||||
},
|
||||
"errorMsg": {
|
||||
"oneOf": [
|
||||
{ "type": "string" },
|
||||
{ "type": "array", "items": { "type": "string" } }
|
||||
]
|
||||
},
|
||||
"errorCode": {
|
||||
"oneOf": [
|
||||
{ "type": "integer" },
|
||||
{ "type": "array", "items": { "type": "integer" } }
|
||||
]
|
||||
},
|
||||
"errorUrl": { "type": "string" },
|
||||
"response_url": { "type": "string" }
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"enum": ["message", "response_url", "status_code"]
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"errorMsg": {
|
||||
"oneOf": [
|
||||
{ "type": "string" },
|
||||
{ "type": "array", "items": { "type": "string" } }
|
||||
]
|
||||
},
|
||||
"errorCode": {
|
||||
"oneOf": [
|
||||
{ "type": "integer" },
|
||||
{ "type": "array", "items": { "type": "integer" } }
|
||||
]
|
||||
},
|
||||
"errorUrl": { "type": "string" },
|
||||
"response_url": { "type": "string" }
|
||||
},
|
||||
"dependencies": {
|
||||
"errorMsg": {
|
||||
"oneOf": [
|
||||
{ "properties": { "errorType": { "const": "message" } } },
|
||||
{
|
||||
"properties": {
|
||||
"errorType": {
|
||||
"type": "array",
|
||||
"contains": { "const": "message" }
|
||||
"dependencies": {
|
||||
"errorMsg": {
|
||||
"properties" : { "errorType": { "const": "message" } }
|
||||
},
|
||||
"errorUrl": {
|
||||
"properties": { "errorType": { "const": "response_url" } }
|
||||
},
|
||||
"errorCode": {
|
||||
"properties": { "errorType": { "const": "status_code" } }
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"errorUrl": {
|
||||
"oneOf": [
|
||||
{ "properties": { "errorType": { "const": "response_url" } } },
|
||||
{
|
||||
"properties": {
|
||||
"errorType": {
|
||||
"type": "array",
|
||||
"contains": { "const": "response_url" }
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"errorCode": {
|
||||
"oneOf": [
|
||||
{ "properties": { "errorType": { "const": "status_code" } } },
|
||||
{
|
||||
"properties": {
|
||||
"errorType": {
|
||||
"type": "array",
|
||||
"contains": { "const": "status_code" }
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"if": { "properties": { "errorType": { "const": "message" } } },
|
||||
"then": { "required": [ "errorMsg" ] },
|
||||
"else": {
|
||||
"if": { "properties": { "errorType": { "const": "response_url" } } },
|
||||
"then": { "required": [ "errorUrl" ] }
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"allOf": [
|
||||
{
|
||||
"if": {
|
||||
"anyOf": [
|
||||
{ "properties": { "errorType": { "const": "message" } } },
|
||||
{
|
||||
"properties": {
|
||||
"errorType": {
|
||||
"type": "array",
|
||||
"contains": { "const": "message" }
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"then": { "required": ["errorMsg"] }
|
||||
},
|
||||
{
|
||||
"if": {
|
||||
"anyOf": [
|
||||
{ "properties": { "errorType": { "const": "response_url" } } },
|
||||
{
|
||||
"properties": {
|
||||
"errorType": {
|
||||
"type": "array",
|
||||
"contains": { "const": "response_url" }
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"then": { "required": ["errorUrl"] }
|
||||
}
|
||||
],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"$defs": {
|
||||
"tag": { "type": "string", "enum": [ "adult", "gaming" ] }
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"$defs": {
|
||||
"tag": { "type": "string", "enum": ["adult", "gaming"] }
|
||||
}
|
||||
}
|
||||
|
||||
+119
-71
@@ -171,6 +171,8 @@ def sherlock(
|
||||
username: str,
|
||||
site_data: dict[str, dict[str, str]],
|
||||
query_notify: QueryNotify,
|
||||
tor: bool = False,
|
||||
unique_tor: bool = False,
|
||||
dump_response: bool = False,
|
||||
proxy: Optional[str] = None,
|
||||
timeout: int = 60,
|
||||
@@ -186,6 +188,8 @@ def sherlock(
|
||||
query_notify -- Object with base type of QueryNotify().
|
||||
This will be used to notify the caller about
|
||||
query results.
|
||||
tor -- Boolean indicating whether to use a tor circuit for the requests.
|
||||
unique_tor -- Boolean indicating whether to use a new tor circuit for each request.
|
||||
proxy -- String indicating the proxy URL
|
||||
timeout -- Time in seconds to wait before timing out request.
|
||||
Default is 60 seconds.
|
||||
@@ -206,9 +210,32 @@ def sherlock(
|
||||
|
||||
# Notify caller that we are starting the query.
|
||||
query_notify.start(username)
|
||||
# Create session based on request methodology
|
||||
if tor or unique_tor:
|
||||
try:
|
||||
from torrequest import TorRequest # noqa: E402
|
||||
except ImportError:
|
||||
print("Important!")
|
||||
print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.")
|
||||
print("> If you've installed Sherlock via pip, you can include the optional dependency via `pip install 'sherlock-project[tor]'`.")
|
||||
print("> Other packages should refer to their documentation, or install it separately with `pip install torrequest`.\n")
|
||||
sys.exit(query_notify.finish())
|
||||
|
||||
# Normal requests
|
||||
underlying_session = requests.session()
|
||||
print("Important!")
|
||||
print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.")
|
||||
|
||||
# Requests using Tor obfuscation
|
||||
try:
|
||||
underlying_request = TorRequest()
|
||||
except OSError:
|
||||
print("Tor not found in system path. Unable to continue.\n")
|
||||
sys.exit(query_notify.finish())
|
||||
|
||||
underlying_session = underlying_request.session
|
||||
else:
|
||||
# Normal requests
|
||||
underlying_session = requests.session()
|
||||
underlying_request = requests.Request()
|
||||
|
||||
# Limit number of workers to 20.
|
||||
# This is probably vastly overkill.
|
||||
@@ -332,10 +359,15 @@ def sherlock(
|
||||
# Store future in data for access later
|
||||
net_info["request_future"] = future
|
||||
|
||||
# Reset identify for tor (if needed)
|
||||
if unique_tor:
|
||||
underlying_request.reset_identity()
|
||||
|
||||
# Add this site's results into final dictionary with all the other results.
|
||||
results_total[social_network] = results_site
|
||||
|
||||
# Open the file containing account links
|
||||
# Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses
|
||||
for social_network, net_info in site_data.items():
|
||||
# Retrieve results again
|
||||
results_site = results_total.get(social_network)
|
||||
@@ -349,8 +381,6 @@ def sherlock(
|
||||
|
||||
# Get the expected error type
|
||||
error_type = net_info["errorType"]
|
||||
if isinstance(error_type, str):
|
||||
error_type: list[str] = [error_type]
|
||||
|
||||
# Retrieve future and ensure it has finished
|
||||
future = net_info["request_future"]
|
||||
@@ -395,60 +425,58 @@ def sherlock(
|
||||
elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
|
||||
query_status = QueryStatus.WAF
|
||||
|
||||
else:
|
||||
if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
|
||||
error_context = f"Unknown error type '{error_type}' for {social_network}"
|
||||
query_status = QueryStatus.UNKNOWN
|
||||
elif error_type == "message":
|
||||
# error_flag True denotes no error found in the HTML
|
||||
# error_flag False denotes error found in the HTML
|
||||
error_flag = True
|
||||
errors = net_info.get("errorMsg")
|
||||
# errors will hold the error message
|
||||
# it can be string or list
|
||||
# by isinstance method we can detect that
|
||||
# and handle the case for strings as normal procedure
|
||||
# and if its list we can iterate the errors
|
||||
if isinstance(errors, str):
|
||||
# Checks if the error message is in the HTML
|
||||
# if error is present we will set flag to False
|
||||
if errors in r.text:
|
||||
error_flag = False
|
||||
else:
|
||||
if "message" in error_type:
|
||||
# error_flag True denotes no error found in the HTML
|
||||
# error_flag False denotes error found in the HTML
|
||||
error_flag = True
|
||||
errors = net_info.get("errorMsg")
|
||||
# errors will hold the error message
|
||||
# it can be string or list
|
||||
# by isinstance method we can detect that
|
||||
# and handle the case for strings as normal procedure
|
||||
# and if its list we can iterate the errors
|
||||
if isinstance(errors, str):
|
||||
# Checks if the error message is in the HTML
|
||||
# if error is present we will set flag to False
|
||||
if errors in r.text:
|
||||
error_flag = False
|
||||
else:
|
||||
# If it's list, it will iterate all the error message
|
||||
for error in errors:
|
||||
if error in r.text:
|
||||
error_flag = False
|
||||
break
|
||||
if error_flag:
|
||||
query_status = QueryStatus.CLAIMED
|
||||
else:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
# If it's list, it will iterate all the error message
|
||||
for error in errors:
|
||||
if error in r.text:
|
||||
error_flag = False
|
||||
break
|
||||
if error_flag:
|
||||
query_status = QueryStatus.CLAIMED
|
||||
else:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
elif error_type == "status_code":
|
||||
error_codes = net_info.get("errorCode")
|
||||
query_status = QueryStatus.CLAIMED
|
||||
|
||||
if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
|
||||
error_codes = net_info.get("errorCode")
|
||||
query_status = QueryStatus.CLAIMED
|
||||
# Type consistency, allowing for both singlets and lists in manifest
|
||||
if isinstance(error_codes, int):
|
||||
error_codes = [error_codes]
|
||||
|
||||
# Type consistency, allowing for both singlets and lists in manifest
|
||||
if isinstance(error_codes, int):
|
||||
error_codes = [error_codes]
|
||||
|
||||
if error_codes is not None and r.status_code in error_codes:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
elif r.status_code >= 300 or r.status_code < 200:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
|
||||
if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
|
||||
# For this detection method, we have turned off the redirect.
|
||||
# So, there is no need to check the response URL: it will always
|
||||
# match the request. Instead, we will ensure that the response
|
||||
# code indicates that the request was successful (i.e. no 404, or
|
||||
# forward to some odd redirect).
|
||||
if 200 <= r.status_code < 300:
|
||||
query_status = QueryStatus.CLAIMED
|
||||
else:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
if error_codes is not None and r.status_code in error_codes:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
elif r.status_code >= 300 or r.status_code < 200:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
elif error_type == "response_url":
|
||||
# For this detection method, we have turned off the redirect.
|
||||
# So, there is no need to check the response URL: it will always
|
||||
# match the request. Instead, we will ensure that the response
|
||||
# code indicates that the request was successful (i.e. no 404, or
|
||||
# forward to some odd redirect).
|
||||
if 200 <= r.status_code < 300:
|
||||
query_status = QueryStatus.CLAIMED
|
||||
else:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
else:
|
||||
# It should be impossible to ever get here...
|
||||
raise ValueError(
|
||||
f"Unknown Error Type '{error_type}' for " f"site '{social_network}'"
|
||||
)
|
||||
|
||||
if dump_response:
|
||||
print("+++++++++++++++++++++")
|
||||
@@ -568,6 +596,22 @@ def main():
|
||||
dest="output",
|
||||
help="If using single username, the output of the result will be saved to this file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tor",
|
||||
"-t",
|
||||
action="store_true",
|
||||
dest="tor",
|
||||
default=False,
|
||||
help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--unique-tor",
|
||||
"-u",
|
||||
action="store_true",
|
||||
dest="unique_tor",
|
||||
default=False,
|
||||
help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--csv",
|
||||
action="store_true",
|
||||
@@ -675,22 +719,12 @@ def main():
|
||||
help="Include checking of NSFW sites from default list.",
|
||||
)
|
||||
|
||||
# TODO deprecated in favor of --txt, retained for workflow compatibility, to be removed
|
||||
# in future release
|
||||
parser.add_argument(
|
||||
"--no-txt",
|
||||
action="store_true",
|
||||
dest="no_txt",
|
||||
default=False,
|
||||
help="Disable creation of a txt file - WILL BE DEPRECATED",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--txt",
|
||||
action="store_true",
|
||||
dest="output_txt",
|
||||
default=False,
|
||||
help="Enable creation of a txt file",
|
||||
help="Disable creation of a txt file",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
@@ -708,7 +742,7 @@ def main():
|
||||
|
||||
# Check for newer version of Sherlock. If it exists, let the user know about it
|
||||
try:
|
||||
latest_release_raw = requests.get(forge_api_latest_release, timeout=10).text
|
||||
latest_release_raw = requests.get(forge_api_latest_release).text
|
||||
latest_release_json = json_loads(latest_release_raw)
|
||||
latest_remote_tag = latest_release_json["tag_name"]
|
||||
|
||||
@@ -721,10 +755,22 @@ def main():
|
||||
except Exception as error:
|
||||
print(f"A problem occurred while checking for an update: {error}")
|
||||
|
||||
# Argument check
|
||||
# TODO regex check on args.proxy
|
||||
if args.tor and (args.proxy is not None):
|
||||
raise Exception("Tor and Proxy cannot be set at the same time.")
|
||||
|
||||
# Make prompts
|
||||
if args.proxy is not None:
|
||||
print("Using the proxy: " + args.proxy)
|
||||
|
||||
if args.tor or args.unique_tor:
|
||||
print("Using Tor to make requests")
|
||||
|
||||
print(
|
||||
"Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors."
|
||||
)
|
||||
|
||||
if args.no_color:
|
||||
# Disable color output.
|
||||
init(strip=True, convert=False)
|
||||
@@ -756,7 +802,7 @@ def main():
|
||||
if args.json_file.isnumeric():
|
||||
pull_number = args.json_file
|
||||
pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}"
|
||||
pull_request_raw = requests.get(pull_url, timeout=10).text
|
||||
pull_request_raw = requests.get(pull_url).text
|
||||
pull_request_json = json_loads(pull_request_raw)
|
||||
|
||||
# Check if it's a valid pull request
|
||||
@@ -825,6 +871,8 @@ def main():
|
||||
username,
|
||||
site_data,
|
||||
query_notify,
|
||||
tor=args.tor,
|
||||
unique_tor=args.unique_tor,
|
||||
dump_response=args.dump_response,
|
||||
proxy=args.proxy,
|
||||
timeout=args.timeout,
|
||||
@@ -840,7 +888,7 @@ def main():
|
||||
else:
|
||||
result_file = f"{username}.txt"
|
||||
|
||||
if args.output_txt:
|
||||
if not args.no_txt:
|
||||
with open(result_file, "w", encoding="utf-8") as file:
|
||||
exists_counter = 0
|
||||
for website_name in results:
|
||||
@@ -925,8 +973,8 @@ def main():
|
||||
{
|
||||
"username": usernames,
|
||||
"name": names,
|
||||
"url_main": [f'=HYPERLINK(\"{u}\")' for u in url_main],
|
||||
"url_user": [f'=HYPERLINK(\"{u}\")' for u in url_user],
|
||||
"url_main": url_main,
|
||||
"url_user": url_user,
|
||||
"exists": exists,
|
||||
"http_status": http_status,
|
||||
"response_time_s": response_time_s,
|
||||
|
||||
@@ -129,7 +129,7 @@ class SitesInformation:
|
||||
if data_file_path.lower().startswith("http"):
|
||||
# Reference is to a URL.
|
||||
try:
|
||||
response = requests.get(url=data_file_path, timeout=30)
|
||||
response = requests.get(url=data_file_path)
|
||||
except Exception as error:
|
||||
raise FileNotFoundError(
|
||||
f"Problem while attempting to access data file URL '{data_file_path}': {error}"
|
||||
@@ -166,7 +166,7 @@ class SitesInformation:
|
||||
|
||||
if honor_exclusions:
|
||||
try:
|
||||
response = requests.get(url=EXCLUSIONS_URL, timeout=10)
|
||||
response = requests.get(url=EXCLUSIONS_URL)
|
||||
if response.status_code == 200:
|
||||
exclusions = response.text.splitlines()
|
||||
exclusions = [exclusion.strip() for exclusion in exclusions]
|
||||
|
||||
@@ -16,7 +16,6 @@ def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUAN
|
||||
"""Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`."""
|
||||
def replace_upper_bound(match: re.Match) -> str: # type: ignore
|
||||
lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore
|
||||
nonlocal upper_bound
|
||||
upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore # noqa: F823
|
||||
return f'{{{lower_bound},{upper_bound}}}'
|
||||
|
||||
|
||||
Reference in New Issue
Block a user