Compare commits
40 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d25848cc5f | |||
| f32f4ffaee | |||
| 7379ba7b19 | |||
| 3aeb6d6356 | |||
| 4246a7b16f | |||
| e44fe49c8f | |||
| 52cd5fdfc1 | |||
| 947f1ad2b6 | |||
| cfcc82aaca | |||
| 0794e02b52 | |||
| 975965abed | |||
| a678bed154 | |||
| 4ec6f1eec0 | |||
| d1527376e7 | |||
| b99719ce60 | |||
| dc869852bc | |||
| 5cd769c2f4 | |||
| 977ad5c1a4 | |||
| 57a0ccef38 | |||
| 0e7219b191 | |||
| 1d2c4b134f | |||
| b245c462c9 | |||
| 66d9733da7 | |||
| c55deab3a2 | |||
| edcb697793 | |||
| d314d75db1 | |||
| c89a52caf7 | |||
| 9c18cfe273 | |||
| 779d4c33f4 | |||
| 072c24687b | |||
| b811b2bd47 | |||
| 355bfbd328 | |||
| 7b3632bdad | |||
| cd7c52e4fa | |||
| 86140af50e | |||
| e5cd5e5bfe | |||
| dc89f1cd27 | |||
| 2016892e64 | |||
| 44ad8f506a | |||
| cfa4097df9 |
+1
-1
@@ -1,5 +1,5 @@
|
||||
### REPOSITORY
|
||||
/.github/CODEOWNERS @sdushantha
|
||||
/.github/CODEOWNERS @sdushantha @ppfeister
|
||||
/.github/FUNDING.yml @sdushantha
|
||||
/LICENSE @sdushantha
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ on:
|
||||
- '**/*.py'
|
||||
- '**/*.ini'
|
||||
- '**/*.toml'
|
||||
- 'Dockerfile'
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
@@ -21,11 +22,13 @@ on:
|
||||
- '**/*.py'
|
||||
- '**/*.ini'
|
||||
- '**/*.toml'
|
||||
- 'Dockerfile'
|
||||
|
||||
jobs:
|
||||
tox-lint:
|
||||
# Linting is ran through tox to ensure that the same linter is used by local runners
|
||||
runs-on: ubuntu-latest
|
||||
# Linting is ran through tox to ensure that the same linter
|
||||
# is used by local runners
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up linting environment
|
||||
@@ -41,7 +44,8 @@ jobs:
|
||||
tox-matrix:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false # We want to know what specicic versions it fails on
|
||||
# We want to know what specicic versions it fails on
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [
|
||||
ubuntu-latest,
|
||||
@@ -67,3 +71,22 @@ jobs:
|
||||
pip install tox-gh-actions
|
||||
- name: Run tox
|
||||
run: tox
|
||||
docker-build-test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Get version from pyproject.toml
|
||||
id: get-version
|
||||
run: |
|
||||
VERSION=$(grep -m1 'version = ' pyproject.toml | cut -d'"' -f2)
|
||||
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||
- name: Build Docker image
|
||||
run: |
|
||||
docker build \
|
||||
--build-arg VERSION_TAG=${{ steps.get-version.outputs.version }} \
|
||||
-t sherlock-test:latest .
|
||||
- name: Test Docker image runs
|
||||
run: docker run --rm sherlock-test:latest --version
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@
|
||||
# 3. Build image with BOTH latest and version tags
|
||||
# i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .`
|
||||
|
||||
FROM python:3.12-slim-bullseye as build
|
||||
FROM python:3.12-slim-bullseye AS build
|
||||
WORKDIR /sherlock
|
||||
|
||||
RUN pip3 install --no-cache-dir --upgrade pip
|
||||
|
||||
+1
-4
@@ -46,13 +46,10 @@ PySocks = "^1.7.0"
|
||||
requests = "^2.22.0"
|
||||
requests-futures = "^1.0.0"
|
||||
stem = "^1.8.0"
|
||||
torrequest = "^0.1.0"
|
||||
pandas = "^2.2.1"
|
||||
openpyxl = "^3.0.10"
|
||||
tomli = "^2.2.1"
|
||||
|
||||
[tool.poetry.extras]
|
||||
tor = ["torrequest"]
|
||||
pyyaml = "^6.0.3"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
jsonschema = "^4.0.0"
|
||||
|
||||
@@ -79,13 +79,13 @@
|
||||
"username_claimed": "pink"
|
||||
},
|
||||
"AllMyLinks": {
|
||||
"errorMsg": "Not Found",
|
||||
"errorType": "message",
|
||||
"regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$",
|
||||
"url": "https://allmylinks.com/{}",
|
||||
"urlMain": "https://allmylinks.com/",
|
||||
"username_claimed": "blue"
|
||||
},
|
||||
"errorMsg": "Page not found",
|
||||
"errorType": "message",
|
||||
"regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$",
|
||||
"url": "https://allmylinks.com/{}",
|
||||
"urlMain": "https://allmylinks.com/",
|
||||
"username_claimed": "blue"
|
||||
},
|
||||
"AniWorld": {
|
||||
"errorMsg": "Dieses Profil ist nicht verf\u00fcgbar",
|
||||
"errorType": "message",
|
||||
@@ -115,7 +115,7 @@
|
||||
"username_claimed": "lio24d"
|
||||
},
|
||||
"Apple Discussions": {
|
||||
"errorMsg": "The page you tried was not found. You may have used an outdated link or may have typed the address (URL) incorrectly.",
|
||||
"errorMsg": "Looking for something in Apple Support Communities?",
|
||||
"errorType": "message",
|
||||
"url": "https://discussions.apple.com/profile/{}",
|
||||
"urlMain": "https://discussions.apple.com",
|
||||
@@ -572,8 +572,7 @@
|
||||
"username_claimed": "brown"
|
||||
},
|
||||
"CyberDefenders": {
|
||||
"errorMsg": "<title>Blue Team Training for SOC analysts and DFIR - CyberDefenders</title>",
|
||||
"errorType": "message",
|
||||
"errorType": "status_code",
|
||||
"regexCheck": "^[^\\/:*?\"<>|@]{3,50}$",
|
||||
"request_method": "GET",
|
||||
"url": "https://cyberdefenders.org/p/{}",
|
||||
@@ -600,6 +599,12 @@
|
||||
"urlMain": "https://www.dailymotion.com/",
|
||||
"username_claimed": "blue"
|
||||
},
|
||||
"dcinside": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://gallog.dcinside.com/{}",
|
||||
"urlMain": "https://www.dcinside.com/",
|
||||
"username_claimed": "anrbrb"
|
||||
},
|
||||
"Dealabs": {
|
||||
"errorMsg": "La page que vous essayez",
|
||||
"errorType": "message",
|
||||
@@ -608,13 +613,14 @@
|
||||
"urlMain": "https://www.dealabs.com/",
|
||||
"username_claimed": "blue"
|
||||
},
|
||||
"DeviantART": {
|
||||
"errorType": "status_code",
|
||||
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
|
||||
"url": "https://{}.deviantart.com",
|
||||
"urlMain": "https://deviantart.com",
|
||||
"username_claimed": "blue"
|
||||
},
|
||||
"DeviantArt": {
|
||||
"errorType": "message",
|
||||
"errorMsg": "Llama Not Found",
|
||||
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
|
||||
"url": "https://www.deviantart.com/{}",
|
||||
"urlMain": "https://www.deviantart.com/",
|
||||
"username_claimed": "blue"
|
||||
},
|
||||
"DigitalSpy": {
|
||||
"errorMsg": "The page you were looking for could not be found.",
|
||||
"errorType": "message",
|
||||
@@ -1440,12 +1446,12 @@
|
||||
"username_claimed": "blue"
|
||||
},
|
||||
"Mydramalist": {
|
||||
"errorMsg": "Sign in - MyDramaList",
|
||||
"errorType": "message",
|
||||
"url": "https://www.mydramalist.com/profile/{}",
|
||||
"urlMain": "https://mydramalist.com",
|
||||
"username_claimed": "elhadidy12398"
|
||||
},
|
||||
"errorMsg": "The requested page was not found",
|
||||
"errorType": "message",
|
||||
"url": "https://www.mydramalist.com/profile/{}",
|
||||
"urlMain": "https://mydramalist.com",
|
||||
"username_claimed": "elhadidy12398"
|
||||
},
|
||||
"Myspace": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://myspace.com/{}",
|
||||
@@ -1459,6 +1465,13 @@
|
||||
"urlMain": "https://www.native-instruments.com/forum/",
|
||||
"username_claimed": "jambert"
|
||||
},
|
||||
"namuwiki": {
|
||||
"__comment__": "This is a Korean site and it's expected to return false negatives in certain other regions.",
|
||||
"errorType": "status_code",
|
||||
"url": "https://namu.wiki/w/%EC%82%AC%EC%9A%A9%EC%9E%90:{}",
|
||||
"urlMain": "https://namu.wiki/",
|
||||
"username_claimed": "namu"
|
||||
},
|
||||
"NationStates Nation": {
|
||||
"errorMsg": "Was this your nation? It may have ceased to exist due to inactivity, but can rise again!",
|
||||
"errorType": "message",
|
||||
@@ -1809,8 +1822,7 @@
|
||||
"username_claimed": "blue"
|
||||
},
|
||||
"Roblox": {
|
||||
"errorMsg": "Page cannot be found or no longer exists",
|
||||
"errorType": "message",
|
||||
"errorType": "status_code",
|
||||
"url": "https://www.roblox.com/user.aspx?username={}",
|
||||
"urlMain": "https://www.roblox.com/",
|
||||
"username_claimed": "bluewolfekiller"
|
||||
@@ -1918,7 +1930,7 @@
|
||||
},
|
||||
"SlideShare": {
|
||||
"errorType": "message",
|
||||
"errorMsg": "<title>Username available</title>",
|
||||
"errorMsg": "<title>Page no longer exists</title>",
|
||||
"url": "https://slideshare.net/{}",
|
||||
"urlMain": "https://slideshare.net/",
|
||||
"username_claimed": "blue"
|
||||
@@ -1952,6 +1964,13 @@
|
||||
"urlMain": "https://www.snapchat.com",
|
||||
"username_claimed": "teamsnapchat"
|
||||
},
|
||||
"SOOP": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://www.sooplive.co.kr/station/{}",
|
||||
"urlMain": "https://www.sooplive.co.kr/",
|
||||
"urlProbe": "https://api-channel.sooplive.co.kr/v1.1/channel/{}/station",
|
||||
"username_claimed": "udkn"
|
||||
},
|
||||
"SoundCloud": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://soundcloud.com/{}",
|
||||
@@ -2119,6 +2138,12 @@
|
||||
"urlMain": "https://themeforest.net/",
|
||||
"username_claimed": "user"
|
||||
},
|
||||
"tistory": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://{}.tistory.com/",
|
||||
"urlMain": "https://www.tistory.com/",
|
||||
"username_claimed": "notice"
|
||||
},
|
||||
"TnAFlix": {
|
||||
"errorType": "status_code",
|
||||
"isNSFW": true,
|
||||
@@ -2793,7 +2818,7 @@
|
||||
"username_claimed": "green"
|
||||
},
|
||||
"threads": {
|
||||
"errorMsg": "<title>Threads</title>",
|
||||
"errorMsg": "<title>Threads • Log in</title>",
|
||||
"errorType": "message",
|
||||
"headers": {
|
||||
"Sec-Fetch-Mode": "navigate"
|
||||
|
||||
@@ -1,80 +1,149 @@
|
||||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"title": "Sherlock Target Manifest",
|
||||
"description": "Social media targets to probe for the existence of known usernames",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"$schema": { "type": "string" }
|
||||
},
|
||||
"patternProperties": {
|
||||
"^(?!\\$).*?$": {
|
||||
"type": "object",
|
||||
"description": "Target name and associated information (key should be human readable name)",
|
||||
"required": [ "url", "urlMain", "errorType", "username_claimed" ],
|
||||
"properties": {
|
||||
"url": { "type": "string" },
|
||||
"urlMain": { "type": "string" },
|
||||
"urlProbe": { "type": "string" },
|
||||
"username_claimed": { "type": "string" },
|
||||
"regexCheck": { "type": "string" },
|
||||
"isNSFW": { "type": "boolean" },
|
||||
"headers": { "type": "object" },
|
||||
"request_payload": { "type": "object" },
|
||||
"__comment__": {
|
||||
"type": "string",
|
||||
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
|
||||
},
|
||||
"tags": {
|
||||
"oneOf": [
|
||||
{ "$ref": "#/$defs/tag" },
|
||||
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
|
||||
]
|
||||
},
|
||||
"request_method": {
|
||||
"type": "string",
|
||||
"enum": [ "GET", "POST", "HEAD", "PUT" ]
|
||||
},
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"title": "Sherlock Target Manifest",
|
||||
"description": "Social media targets to probe for the existence of known usernames",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"$schema": { "type": "string" }
|
||||
},
|
||||
"patternProperties": {
|
||||
"^(?!\\$).*?$": {
|
||||
"type": "object",
|
||||
"description": "Target name and associated information (key should be human readable name)",
|
||||
"required": ["url", "urlMain", "errorType", "username_claimed"],
|
||||
"properties": {
|
||||
"url": { "type": "string" },
|
||||
"urlMain": { "type": "string" },
|
||||
"urlProbe": { "type": "string" },
|
||||
"username_claimed": { "type": "string" },
|
||||
"regexCheck": { "type": "string" },
|
||||
"isNSFW": { "type": "boolean" },
|
||||
"headers": { "type": "object" },
|
||||
"request_payload": { "type": "object" },
|
||||
"__comment__": {
|
||||
"type": "string",
|
||||
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
|
||||
},
|
||||
"tags": {
|
||||
"oneOf": [
|
||||
{ "$ref": "#/$defs/tag" },
|
||||
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
|
||||
]
|
||||
},
|
||||
"request_method": {
|
||||
"type": "string",
|
||||
"enum": ["GET", "POST", "HEAD", "PUT"]
|
||||
},
|
||||
"errorType": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"enum": ["message", "response_url", "status_code"]
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"enum": ["message", "response_url", "status_code"]
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"errorMsg": {
|
||||
"oneOf": [
|
||||
{ "type": "string" },
|
||||
{ "type": "array", "items": { "type": "string" } }
|
||||
]
|
||||
},
|
||||
"errorCode": {
|
||||
"oneOf": [
|
||||
{ "type": "integer" },
|
||||
{ "type": "array", "items": { "type": "integer" } }
|
||||
]
|
||||
},
|
||||
"errorUrl": { "type": "string" },
|
||||
"response_url": { "type": "string" }
|
||||
},
|
||||
"dependencies": {
|
||||
"errorMsg": {
|
||||
"oneOf": [
|
||||
{ "properties": { "errorType": { "const": "message" } } },
|
||||
{
|
||||
"properties": {
|
||||
"errorType": {
|
||||
"type": "string",
|
||||
"enum": [ "message", "response_url", "status_code" ]
|
||||
},
|
||||
"errorMsg": {
|
||||
"oneOf": [
|
||||
{ "type": "string" },
|
||||
{ "type": "array", "items": { "type": "string" } }
|
||||
]
|
||||
},
|
||||
"errorCode": {
|
||||
"oneOf": [
|
||||
{ "type": "integer" },
|
||||
{ "type": "array", "items": { "type": "integer" } }
|
||||
]
|
||||
},
|
||||
"errorUrl": { "type": "string" },
|
||||
"response_url": { "type": "string" }
|
||||
},
|
||||
"dependencies": {
|
||||
"errorMsg": {
|
||||
"properties" : { "errorType": { "const": "message" } }
|
||||
},
|
||||
"errorUrl": {
|
||||
"properties": { "errorType": { "const": "response_url" } }
|
||||
},
|
||||
"errorCode": {
|
||||
"properties": { "errorType": { "const": "status_code" } }
|
||||
"type": "array",
|
||||
"contains": { "const": "message" }
|
||||
}
|
||||
},
|
||||
"if": { "properties": { "errorType": { "const": "message" } } },
|
||||
"then": { "required": [ "errorMsg" ] },
|
||||
"else": {
|
||||
"if": { "properties": { "errorType": { "const": "response_url" } } },
|
||||
"then": { "required": [ "errorUrl" ] }
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"errorUrl": {
|
||||
"oneOf": [
|
||||
{ "properties": { "errorType": { "const": "response_url" } } },
|
||||
{
|
||||
"properties": {
|
||||
"errorType": {
|
||||
"type": "array",
|
||||
"contains": { "const": "response_url" }
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"errorCode": {
|
||||
"oneOf": [
|
||||
{ "properties": { "errorType": { "const": "status_code" } } },
|
||||
{
|
||||
"properties": {
|
||||
"errorType": {
|
||||
"type": "array",
|
||||
"contains": { "const": "status_code" }
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"$defs": {
|
||||
"tag": { "type": "string", "enum": [ "adult", "gaming" ] }
|
||||
},
|
||||
"allOf": [
|
||||
{
|
||||
"if": {
|
||||
"anyOf": [
|
||||
{ "properties": { "errorType": { "const": "message" } } },
|
||||
{
|
||||
"properties": {
|
||||
"errorType": {
|
||||
"type": "array",
|
||||
"contains": { "const": "message" }
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"then": { "required": ["errorMsg"] }
|
||||
},
|
||||
{
|
||||
"if": {
|
||||
"anyOf": [
|
||||
{ "properties": { "errorType": { "const": "response_url" } } },
|
||||
{
|
||||
"properties": {
|
||||
"errorType": {
|
||||
"type": "array",
|
||||
"contains": { "const": "response_url" }
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"then": { "required": ["errorUrl"] }
|
||||
}
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"$defs": {
|
||||
"tag": { "type": "string", "enum": ["adult", "gaming"] }
|
||||
}
|
||||
}
|
||||
|
||||
+71
-119
@@ -171,8 +171,6 @@ def sherlock(
|
||||
username: str,
|
||||
site_data: dict[str, dict[str, str]],
|
||||
query_notify: QueryNotify,
|
||||
tor: bool = False,
|
||||
unique_tor: bool = False,
|
||||
dump_response: bool = False,
|
||||
proxy: Optional[str] = None,
|
||||
timeout: int = 60,
|
||||
@@ -188,8 +186,6 @@ def sherlock(
|
||||
query_notify -- Object with base type of QueryNotify().
|
||||
This will be used to notify the caller about
|
||||
query results.
|
||||
tor -- Boolean indicating whether to use a tor circuit for the requests.
|
||||
unique_tor -- Boolean indicating whether to use a new tor circuit for each request.
|
||||
proxy -- String indicating the proxy URL
|
||||
timeout -- Time in seconds to wait before timing out request.
|
||||
Default is 60 seconds.
|
||||
@@ -210,32 +206,9 @@ def sherlock(
|
||||
|
||||
# Notify caller that we are starting the query.
|
||||
query_notify.start(username)
|
||||
# Create session based on request methodology
|
||||
if tor or unique_tor:
|
||||
try:
|
||||
from torrequest import TorRequest # noqa: E402
|
||||
except ImportError:
|
||||
print("Important!")
|
||||
print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.")
|
||||
print("> If you've installed Sherlock via pip, you can include the optional dependency via `pip install 'sherlock-project[tor]'`.")
|
||||
print("> Other packages should refer to their documentation, or install it separately with `pip install torrequest`.\n")
|
||||
sys.exit(query_notify.finish())
|
||||
|
||||
print("Important!")
|
||||
print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.")
|
||||
|
||||
# Requests using Tor obfuscation
|
||||
try:
|
||||
underlying_request = TorRequest()
|
||||
except OSError:
|
||||
print("Tor not found in system path. Unable to continue.\n")
|
||||
sys.exit(query_notify.finish())
|
||||
|
||||
underlying_session = underlying_request.session
|
||||
else:
|
||||
# Normal requests
|
||||
underlying_session = requests.session()
|
||||
underlying_request = requests.Request()
|
||||
# Normal requests
|
||||
underlying_session = requests.session()
|
||||
|
||||
# Limit number of workers to 20.
|
||||
# This is probably vastly overkill.
|
||||
@@ -359,15 +332,10 @@ def sherlock(
|
||||
# Store future in data for access later
|
||||
net_info["request_future"] = future
|
||||
|
||||
# Reset identify for tor (if needed)
|
||||
if unique_tor:
|
||||
underlying_request.reset_identity()
|
||||
|
||||
# Add this site's results into final dictionary with all the other results.
|
||||
results_total[social_network] = results_site
|
||||
|
||||
# Open the file containing account links
|
||||
# Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses
|
||||
for social_network, net_info in site_data.items():
|
||||
# Retrieve results again
|
||||
results_site = results_total.get(social_network)
|
||||
@@ -381,6 +349,8 @@ def sherlock(
|
||||
|
||||
# Get the expected error type
|
||||
error_type = net_info["errorType"]
|
||||
if isinstance(error_type, str):
|
||||
error_type: list[str] = [error_type]
|
||||
|
||||
# Retrieve future and ensure it has finished
|
||||
future = net_info["request_future"]
|
||||
@@ -425,58 +395,60 @@ def sherlock(
|
||||
elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
|
||||
query_status = QueryStatus.WAF
|
||||
|
||||
elif error_type == "message":
|
||||
# error_flag True denotes no error found in the HTML
|
||||
# error_flag False denotes error found in the HTML
|
||||
error_flag = True
|
||||
errors = net_info.get("errorMsg")
|
||||
# errors will hold the error message
|
||||
# it can be string or list
|
||||
# by isinstance method we can detect that
|
||||
# and handle the case for strings as normal procedure
|
||||
# and if its list we can iterate the errors
|
||||
if isinstance(errors, str):
|
||||
# Checks if the error message is in the HTML
|
||||
# if error is present we will set flag to False
|
||||
if errors in r.text:
|
||||
error_flag = False
|
||||
else:
|
||||
# If it's list, it will iterate all the error message
|
||||
for error in errors:
|
||||
if error in r.text:
|
||||
error_flag = False
|
||||
break
|
||||
if error_flag:
|
||||
query_status = QueryStatus.CLAIMED
|
||||
else:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
elif error_type == "status_code":
|
||||
error_codes = net_info.get("errorCode")
|
||||
query_status = QueryStatus.CLAIMED
|
||||
|
||||
# Type consistency, allowing for both singlets and lists in manifest
|
||||
if isinstance(error_codes, int):
|
||||
error_codes = [error_codes]
|
||||
|
||||
if error_codes is not None and r.status_code in error_codes:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
elif r.status_code >= 300 or r.status_code < 200:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
elif error_type == "response_url":
|
||||
# For this detection method, we have turned off the redirect.
|
||||
# So, there is no need to check the response URL: it will always
|
||||
# match the request. Instead, we will ensure that the response
|
||||
# code indicates that the request was successful (i.e. no 404, or
|
||||
# forward to some odd redirect).
|
||||
if 200 <= r.status_code < 300:
|
||||
query_status = QueryStatus.CLAIMED
|
||||
else:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
else:
|
||||
# It should be impossible to ever get here...
|
||||
raise ValueError(
|
||||
f"Unknown Error Type '{error_type}' for " f"site '{social_network}'"
|
||||
)
|
||||
if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
|
||||
error_context = f"Unknown error type '{error_type}' for {social_network}"
|
||||
query_status = QueryStatus.UNKNOWN
|
||||
else:
|
||||
if "message" in error_type:
|
||||
# error_flag True denotes no error found in the HTML
|
||||
# error_flag False denotes error found in the HTML
|
||||
error_flag = True
|
||||
errors = net_info.get("errorMsg")
|
||||
# errors will hold the error message
|
||||
# it can be string or list
|
||||
# by isinstance method we can detect that
|
||||
# and handle the case for strings as normal procedure
|
||||
# and if its list we can iterate the errors
|
||||
if isinstance(errors, str):
|
||||
# Checks if the error message is in the HTML
|
||||
# if error is present we will set flag to False
|
||||
if errors in r.text:
|
||||
error_flag = False
|
||||
else:
|
||||
# If it's list, it will iterate all the error message
|
||||
for error in errors:
|
||||
if error in r.text:
|
||||
error_flag = False
|
||||
break
|
||||
if error_flag:
|
||||
query_status = QueryStatus.CLAIMED
|
||||
else:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
|
||||
if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
|
||||
error_codes = net_info.get("errorCode")
|
||||
query_status = QueryStatus.CLAIMED
|
||||
|
||||
# Type consistency, allowing for both singlets and lists in manifest
|
||||
if isinstance(error_codes, int):
|
||||
error_codes = [error_codes]
|
||||
|
||||
if error_codes is not None and r.status_code in error_codes:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
elif r.status_code >= 300 or r.status_code < 200:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
|
||||
if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
|
||||
# For this detection method, we have turned off the redirect.
|
||||
# So, there is no need to check the response URL: it will always
|
||||
# match the request. Instead, we will ensure that the response
|
||||
# code indicates that the request was successful (i.e. no 404, or
|
||||
# forward to some odd redirect).
|
||||
if 200 <= r.status_code < 300:
|
||||
query_status = QueryStatus.CLAIMED
|
||||
else:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
|
||||
if dump_response:
|
||||
print("+++++++++++++++++++++")
|
||||
@@ -596,22 +568,6 @@ def main():
|
||||
dest="output",
|
||||
help="If using single username, the output of the result will be saved to this file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tor",
|
||||
"-t",
|
||||
action="store_true",
|
||||
dest="tor",
|
||||
default=False,
|
||||
help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--unique-tor",
|
||||
"-u",
|
||||
action="store_true",
|
||||
dest="unique_tor",
|
||||
default=False,
|
||||
help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--csv",
|
||||
action="store_true",
|
||||
@@ -719,12 +675,22 @@ def main():
|
||||
help="Include checking of NSFW sites from default list.",
|
||||
)
|
||||
|
||||
# TODO deprecated in favor of --txt, retained for workflow compatibility, to be removed
|
||||
# in future release
|
||||
parser.add_argument(
|
||||
"--no-txt",
|
||||
action="store_true",
|
||||
dest="no_txt",
|
||||
default=False,
|
||||
help="Disable creation of a txt file",
|
||||
help="Disable creation of a txt file - WILL BE DEPRECATED",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--txt",
|
||||
action="store_true",
|
||||
dest="output_txt",
|
||||
default=False,
|
||||
help="Enable creation of a txt file",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
@@ -742,7 +708,7 @@ def main():
|
||||
|
||||
# Check for newer version of Sherlock. If it exists, let the user know about it
|
||||
try:
|
||||
latest_release_raw = requests.get(forge_api_latest_release).text
|
||||
latest_release_raw = requests.get(forge_api_latest_release, timeout=10).text
|
||||
latest_release_json = json_loads(latest_release_raw)
|
||||
latest_remote_tag = latest_release_json["tag_name"]
|
||||
|
||||
@@ -755,22 +721,10 @@ def main():
|
||||
except Exception as error:
|
||||
print(f"A problem occurred while checking for an update: {error}")
|
||||
|
||||
# Argument check
|
||||
# TODO regex check on args.proxy
|
||||
if args.tor and (args.proxy is not None):
|
||||
raise Exception("Tor and Proxy cannot be set at the same time.")
|
||||
|
||||
# Make prompts
|
||||
if args.proxy is not None:
|
||||
print("Using the proxy: " + args.proxy)
|
||||
|
||||
if args.tor or args.unique_tor:
|
||||
print("Using Tor to make requests")
|
||||
|
||||
print(
|
||||
"Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors."
|
||||
)
|
||||
|
||||
if args.no_color:
|
||||
# Disable color output.
|
||||
init(strip=True, convert=False)
|
||||
@@ -802,7 +756,7 @@ def main():
|
||||
if args.json_file.isnumeric():
|
||||
pull_number = args.json_file
|
||||
pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}"
|
||||
pull_request_raw = requests.get(pull_url).text
|
||||
pull_request_raw = requests.get(pull_url, timeout=10).text
|
||||
pull_request_json = json_loads(pull_request_raw)
|
||||
|
||||
# Check if it's a valid pull request
|
||||
@@ -871,8 +825,6 @@ def main():
|
||||
username,
|
||||
site_data,
|
||||
query_notify,
|
||||
tor=args.tor,
|
||||
unique_tor=args.unique_tor,
|
||||
dump_response=args.dump_response,
|
||||
proxy=args.proxy,
|
||||
timeout=args.timeout,
|
||||
@@ -888,7 +840,7 @@ def main():
|
||||
else:
|
||||
result_file = f"{username}.txt"
|
||||
|
||||
if not args.no_txt:
|
||||
if args.output_txt:
|
||||
with open(result_file, "w", encoding="utf-8") as file:
|
||||
exists_counter = 0
|
||||
for website_name in results:
|
||||
|
||||
@@ -129,7 +129,7 @@ class SitesInformation:
|
||||
if data_file_path.lower().startswith("http"):
|
||||
# Reference is to a URL.
|
||||
try:
|
||||
response = requests.get(url=data_file_path)
|
||||
response = requests.get(url=data_file_path, timeout=30)
|
||||
except Exception as error:
|
||||
raise FileNotFoundError(
|
||||
f"Problem while attempting to access data file URL '{data_file_path}': {error}"
|
||||
@@ -166,7 +166,7 @@ class SitesInformation:
|
||||
|
||||
if honor_exclusions:
|
||||
try:
|
||||
response = requests.get(url=EXCLUSIONS_URL)
|
||||
response = requests.get(url=EXCLUSIONS_URL, timeout=10)
|
||||
if response.status_code == 200:
|
||||
exclusions = response.text.splitlines()
|
||||
exclusions = [exclusion.strip() for exclusion in exclusions]
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
import requests
|
||||
import yaml
|
||||
|
||||
|
||||
NUCLEI_FINGERPRINT_URL: str = "https://raw.githubusercontent.com/projectdiscovery/nuclei-templates/refs/heads/main/http/global-matchers/global-waf-detect.yaml"
|
||||
|
||||
def _check_nuclei_regex(matcher: dict[str,str|list[str]], response: requests.Response) -> bool:
|
||||
import re
|
||||
|
||||
and_cond: bool = matcher.get('condition', '') == 'and'
|
||||
|
||||
target_text: str
|
||||
if matcher['part'] == 'body':
|
||||
target_text = response.text
|
||||
elif matcher['part'] == 'header':
|
||||
target_text = str(response.headers)
|
||||
else:
|
||||
target_text = response.text + str(response.headers)
|
||||
|
||||
for regex in matcher['regex']:
|
||||
if re.search(regex, target_text):
|
||||
if not and_cond:
|
||||
return True
|
||||
else:
|
||||
break
|
||||
else:
|
||||
# `and` conditions will cycle, resulting in this default return True
|
||||
# unless an early failed detection breaks the loop (resulting in False)
|
||||
return True
|
||||
return False
|
||||
|
||||
def _check_nuclei_words(matcher: dict[str,str|list[str]], response: requests.Response) -> bool:
|
||||
and_cond: bool = matcher.get('condition', '') == 'and'
|
||||
|
||||
target_text: str
|
||||
if matcher['part'] == 'body':
|
||||
target_text = response.text
|
||||
elif matcher['part'] == 'header':
|
||||
target_text = str(response.headers)
|
||||
else:
|
||||
target_text = response.text + str(response.headers)
|
||||
|
||||
for word in matcher['words']:
|
||||
if word in target_text:
|
||||
if not and_cond:
|
||||
return True
|
||||
else:
|
||||
break
|
||||
else:
|
||||
# `and` conditions will cycle, resulting in this default return True
|
||||
# unless an early failed detection breaks the loop (resulting in False)
|
||||
return True
|
||||
return False
|
||||
|
||||
def fetch_nuclei_fingerprints() -> list[dict[str,str|list[str]]] | None:
|
||||
"""Fetch the latest Nuclei WAF fingerprints from the official repository."""
|
||||
try:
|
||||
response = requests.get(NUCLEI_FINGERPRINT_URL, timeout=10)
|
||||
response.raise_for_status()
|
||||
raw = yaml.safe_load(response.text)
|
||||
fingerprints: list[dict[str,str|list[str]]] = raw['http'][0]['matchers']
|
||||
return fingerprints
|
||||
except requests.RequestException as e:
|
||||
print(f"Error fetching Nuclei fingerprints: {e}")
|
||||
return None
|
||||
except yaml.YAMLError as e:
|
||||
print(f"Error parsing YAML data: {e}")
|
||||
return None
|
||||
|
||||
def nuclei_check(response: requests.Response, fingerprints: list[dict[str,str|list[str]]]) -> bool:
|
||||
"""Check if the response matches any of the WAF fingerprints.
|
||||
|
||||
Keyword arguments:
|
||||
response -- The HTTP response to check.
|
||||
fingerprints -- The list of Nuclei WAF fingerprints to check against.
|
||||
|
||||
Returns True if a WAF is detected, False otherwise.
|
||||
"""
|
||||
for matcher in fingerprints:
|
||||
if matcher['type'] == 'word':
|
||||
return _check_nuclei_words(matcher, response)
|
||||
elif matcher['type'] == 'regex':
|
||||
return _check_nuclei_regex(matcher, response)
|
||||
return False
|
||||
@@ -0,0 +1,26 @@
|
||||
id: global-waf-detect
|
||||
http:
|
||||
- global-matchers: true
|
||||
matchers-condition: or
|
||||
matchers:
|
||||
- type: regex
|
||||
name: regexSite
|
||||
regex:
|
||||
- '(?i)access.to.this.page.has.been.denied'
|
||||
- '(?i)http(s)?://(www.)?anotheroneblocked.\w+.whywasiblocked'
|
||||
condition: or
|
||||
part: response
|
||||
|
||||
- type: word
|
||||
name: wordSiteBody
|
||||
part: body
|
||||
words:
|
||||
- "bad_text_in_body"
|
||||
|
||||
- type: word
|
||||
name: wordSiteHead
|
||||
part: header
|
||||
condition: or
|
||||
words:
|
||||
- "text_in_head"
|
||||
- "other_in_head"
|
||||
@@ -0,0 +1,107 @@
|
||||
import os
|
||||
import unittest
|
||||
from unittest.mock import patch, Mock
|
||||
import requests
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
import yaml
|
||||
|
||||
from sherlock_project import waf_check
|
||||
|
||||
|
||||
TEMPLATE_BODY_PATH: str = os.path.join(os.path.dirname(__file__), 'mocks', 'global_waf_detect.yaml')
|
||||
|
||||
def side_effect(url, **kwargs) -> Mock:
|
||||
if url == waf_check.NUCLEI_FINGERPRINT_URL:
|
||||
with open(TEMPLATE_BODY_PATH, 'r', encoding='utf-8') as file:
|
||||
template_body: str = file.read()
|
||||
mock_response: Mock = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = template_body
|
||||
return mock_response
|
||||
raise RuntimeError("Unexpected URL")
|
||||
|
||||
class TestWafCheck(unittest.TestCase):
|
||||
|
||||
@patch('sherlock_project.waf_check.requests.get')
|
||||
def test_fetch_nuclei_fingerprints(self, mock_requests_get): # type: ignore
|
||||
mock_requests_get.side_effect = side_effect
|
||||
|
||||
result = waf_check.fetch_nuclei_fingerprints()
|
||||
|
||||
with open(TEMPLATE_BODY_PATH, 'r', encoding='utf-8') as file:
|
||||
template_body: str = file.read()
|
||||
|
||||
expected: list[dict[str, str | list[str]]] = yaml.safe_load(template_body)['http'][0]['matchers']
|
||||
self.assertEqual(result, expected)
|
||||
|
||||
def test_nuclei_regex_check(self):
|
||||
mock_res: requests.Response = requests.Response()
|
||||
mock_res.status_code = 200
|
||||
mock_res._content = b"This is a test response with Test-Regex in the body."
|
||||
mock_res.headers = CaseInsensitiveDict({
|
||||
'Content-Type': 'text/html',
|
||||
'Server': 'TestServer'
|
||||
})
|
||||
matcher: dict[str, str | list[str]] = {
|
||||
'type': 'regex',
|
||||
'name': 'test-regex',
|
||||
'part': 'body',
|
||||
'regex': [r'(?i)not-present'],
|
||||
'condition': 'or'
|
||||
}
|
||||
self.assertFalse(waf_check._check_nuclei_regex(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
matcher['regex'] = [r'(?i)TeSt-REgEx']
|
||||
self.assertTrue(waf_check._check_nuclei_regex(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
matcher['regex'] = [r'(?i)TeSt-REgEx', r'(?i)Not-Present']
|
||||
self.assertTrue(waf_check._check_nuclei_regex(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
matcher['condition'] = 'and'
|
||||
self.assertFalse(waf_check._check_nuclei_regex(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
matcher['part'] = 'header'
|
||||
matcher['regex'] = [r'(?i)testserver']
|
||||
self.assertTrue(waf_check._check_nuclei_regex(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
matcher['part'] = 'response'
|
||||
self.assertTrue(waf_check._check_nuclei_regex(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
matcher['regex'] = [r'(?i)not-present']
|
||||
self.assertFalse(waf_check._check_nuclei_regex(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
def test_nuclei_words_check(self):
|
||||
mock_res: requests.Response = requests.Response()
|
||||
mock_res.status_code = 200
|
||||
mock_res._content = b"This is a test response with test-words in the body."
|
||||
mock_res.headers = CaseInsensitiveDict({
|
||||
'Content-Type': 'text/html',
|
||||
'Server': 'TestServer'
|
||||
})
|
||||
matcher: dict[str, str | list[str]] = {
|
||||
'type': 'word',
|
||||
'name': 'test-word',
|
||||
'part': 'body',
|
||||
'words': ['not-present'],
|
||||
'condition': 'or'
|
||||
}
|
||||
self.assertFalse(waf_check._check_nuclei_words(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
matcher['words'] = ['test-word']
|
||||
self.assertTrue(waf_check._check_nuclei_words(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
matcher['words'] = ['test-word', 'Not-Present']
|
||||
self.assertTrue(waf_check._check_nuclei_words(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
matcher['condition'] = 'and'
|
||||
self.assertFalse(waf_check._check_nuclei_words(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
matcher['part'] = 'header'
|
||||
matcher['words'] = ['testserver']
|
||||
self.assertFalse(waf_check._check_nuclei_words(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
matcher['words'] = ['TestServer']
|
||||
self.assertTrue(waf_check._check_nuclei_words(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
matcher['part'] = 'response'
|
||||
self.assertTrue(waf_check._check_nuclei_words(matcher, mock_res)) # pyright: ignore[reportPrivateUsage]
|
||||
Reference in New Issue
Block a user