15 Commits

Author SHA1 Message Date
Paul Pfeister 4e2a4f6b66 Merge pull request #2919 from quan-nguyen-2110/fix-cracked-forum-false-positive
Fix Cracked Forum false positives
2026-05-04 23:28:52 -04:00
Paul Pfeister 2b985b57ad Merge pull request #2921 from quan-nguyen-2110/fix-akniga-false-negative
Fix akniga false negatives
2026-05-04 23:28:14 -04:00
Paul Pfeister ed0865363f Merge pull request #2929 from mohamedsolaiman/fix/false-positives
fix: resolve false positives for ArtStation, GeeksforGeeks, and LushStories
2026-05-04 23:23:43 -04:00
Paul Pfeister 43a354b235 Merge pull request #2853 from salmanrajz/fix/unicode-decode-error-special-chars
fix: handle UnicodeDecodeError on usernames with special characters
2026-05-04 23:12:52 -04:00
Paul Pfeister aa5c3b0010 Merge pull request #2930 from mohamedsolaiman/feature/new-sites
feat: add Carrd, SpaceHey, and Substack as supported sites
2026-05-04 23:07:07 -04:00
Siddharth Dushantha 2df7c61be8 Merge pull request #2939 from sherlock-project/fix-vuln
Fix command injection vuln
2026-05-02 09:46:59 +02:00
Siddharth Dushantha 61aae782ee version bump 2026-05-02 09:42:36 +02:00
Siddharth Dushantha 6eaec5cccd Fix command injection vuln 2026-05-02 09:27:28 +02:00
Mohamed Solaiman dca64e35d3 feat: add Carrd, SpaceHey, and Substack as supported sites
- Carrd: Simple website builder with profiles at {username}.carrd.co.
  Uses status_code detection (404 for non-existing profiles).

- SpaceHey: Retro social network inspired by MySpace.
  Uses message detection ("Not Found (Error 404) | SpaceHey" title
  for non-existing profiles).

- Substack: Newsletter/publishing platform with profiles at
  {username}.substack.com. Uses status_code detection (404 for
  non-existing publications).
2026-04-28 17:03:23 +00:00
Mohamed Solaiman 2e2248a8a6 fix: resolve false positives for ArtStation, GeeksforGeeks, and LushStories
- ArtStation: Add urlProbe using the JSON API endpoint
  (https://www.artstation.com/users/{}.json) which returns proper
  404 for non-existing users, instead of the main page which
  returns 200 for both existing and non-existing profiles.
  Closes #2714

- GeeksforGeeks: Switch from status_code to message detection.
  Both existing and non-existing profiles return HTTP 200, but
  non-existing profiles have "false" in the page title.
  Closes #2782

- LushStories: Switch from status_code to response_url detection.
  Non-existing profiles redirect (302) to /login while existing
  profiles return 200. Closes #2371
2026-04-28 17:01:37 +00:00
QuanNguyen a9960ff9a4 Fix akniga false negatives
Made-with: Cursor
2026-04-26 16:00:27 +02:00
QuanNguyen d731f715bf Fix Cracked Forum false positives
Made-with: Cursor
2026-04-26 15:44:27 +02:00
Siddharth Dushantha 271608fb22 Merge pull request #2898 from sherlock-project/improvements
Make Minor Improvements
2026-04-12 17:54:11 +02:00
salmanrajz 32fde9bfc6 fix: update NSFW tests to use sites not in exclusions list
Pornhub was added to the remote false_positive_exclusions.txt, causing
test_remove_nsfw and test_nsfw_explicit_selection to fail since the
site gets filtered out before the test runs. Replaced with Xvideos and
Erome which are NSFW-flagged but not excluded.
2026-03-31 20:11:55 +04:00
salmanrajz 4656d95702 fix: handle UnicodeDecodeError on usernames with special characters
Fixes #2730. Usernames containing non-ASCII characters (e.g. 'Émile')
can trigger a UnicodeDecodeError inside the requests library during
redirect handling. This exception is not a subclass of
requests.exceptions.RequestException, so it escaped all existing
except blocks in get_response() and crashed the program.

Added a catch for UnicodeError (parent of both UnicodeDecodeError and
UnicodeEncodeError) so these sites are gracefully skipped instead of
crashing the entire scan.

Added regression tests in tests/test_unicode.py.
2026-03-31 19:57:54 +04:00
6 changed files with 90 additions and 14 deletions
@@ -20,6 +20,7 @@ jobs:
# Checkout the base branch but fetch all history to avoid a second fetch call
ref: ${{ github.base_ref }}
fetch-depth: 0
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@v6
@@ -90,11 +91,11 @@ jobs:
# --- The rest of the steps below are unchanged ---
- name: Validate modified targets
if: steps.discover-modified.outputs.changed_targets != ''
continue-on-error: true
env:
CHANGED_TARGETS: ${{ steps.discover-modified.outputs.changed_targets }}
run: |
poetry run pytest -q --tb no -rA -m validate_targets -n 20 \
--chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" \
--chunked-sites "$CHANGED_TARGETS" \
--junitxml=validation_results.xml
- name: Prepare validation summary
+1 -1
View File
@@ -8,7 +8,7 @@ source = "init"
[tool.poetry]
name = "sherlock-project"
version = "0.16.0"
version = "0.16.1"
description = "Hunt down social media accounts by username across social networks"
license = "MIT"
authors = [
+32 -7
View File
@@ -159,6 +159,7 @@
"errorType": "status_code",
"url": "https://www.artstation.com/{}",
"urlMain": "https://www.artstation.com/",
"urlProbe": "https://www.artstation.com/users/{}.json",
"username_claimed": "Blue"
},
"Asciinema": {
@@ -404,6 +405,13 @@
"urlMain": "https://carbonmade.com/",
"username_claimed": "jenny"
},
"Carrd": {
"errorType": "status_code",
"regexCheck": "^[a-zA-Z0-9_-]{3,50}$",
"url": "https://{}.carrd.co/",
"urlMain": "https://carrd.co/",
"username_claimed": "blue"
},
"Career.habr": {
"errorMsg": "<h1>\u041e\u0448\u0438\u0431\u043a\u0430 404</h1>",
"errorType": "message",
@@ -602,10 +610,9 @@
"username_claimed": "blue"
},
"Cracked Forum": {
"errorMsg": "The member you specified is either invalid or doesn't exist",
"errorType": "message",
"url": "https://cracked.sh/{}",
"urlMain": "https://cracked.sh/",
"errorType": "status_code",
"url": "https://cracked.ax/{}",
"urlMain": "https://cracked.ax/",
"username_claimed": "Blue"
},
"Credly": {
@@ -952,7 +959,8 @@
"username_claimed": "blue"
},
"GeeksforGeeks": {
"errorType": "status_code",
"errorMsg": "false | GeeksforGeeks Profile",
"errorType": "message",
"url": "https://auth.geeksforgeeks.org/user/{}",
"urlMain": "https://www.geeksforgeeks.org/",
"username_claimed": "adam"
@@ -1526,7 +1534,8 @@
"username_claimed": "lottiefiles"
},
"LushStories": {
"errorType": "status_code",
"errorType": "response_url",
"errorUrl": "https://www.lushstories.com/login",
"isNSFW": true,
"url": "https://www.lushstories.com/profile/{}",
"urlMain": "https://www.lushstories.com/",
@@ -2279,6 +2288,13 @@
"urlMain": "https://sourceforge.net/",
"username_claimed": "blue"
},
"SpaceHey": {
"errorType": "message",
"errorMsg": "Not Found (Error 404) | SpaceHey",
"url": "https://spacehey.com/{}",
"urlMain": "https://spacehey.com/",
"username_claimed": "blue"
},
"SoylentNews": {
"errorMsg": "The user you requested does not exist, no matter how much you wish this might be the case.",
"errorType": "message",
@@ -2376,6 +2392,13 @@
"urlMain": "https://www.strava.com/",
"username_claimed": "blue"
},
"Substack": {
"errorType": "status_code",
"regexCheck": "^[a-zA-Z0-9][a-zA-Z0-9_-]{1,60}$",
"url": "https://{}.substack.com/",
"urlMain": "https://substack.com/",
"username_claimed": "green"
},
"SublimeForum": {
"errorType": "status_code",
"url": "https://forum.sublimetext.com/u/{}",
@@ -2827,8 +2850,10 @@
},
"akniga": {
"errorType": "status_code",
"errorCode": 404,
"request_method": "GET",
"url": "https://akniga.org/profile/{}",
"urlMain": "https://akniga.org/profile/blue/",
"urlMain": "https://akniga.org/",
"username_claimed": "blue"
},
"authorSTREAM": {
+3
View File
@@ -136,6 +136,9 @@ def get_response(request_future, error_type, social_network):
except requests.exceptions.RequestException as err:
error_context = "Unknown Error"
exception_text = str(err)
except UnicodeError as err:
error_context = "Encoding Error"
exception_text = str(err)
return response, error_context, exception_text
+47
View File
@@ -0,0 +1,47 @@
"""Tests for handling usernames with special/unicode characters."""
from concurrent.futures import Future
from sherlock_project.sherlock import get_response
def _make_future_with_exception(exc):
"""Create a Future that raises the given exception."""
future = Future()
future.set_exception(exc)
return future
def test_get_response_handles_unicode_decode_error():
"""Regression test for issue #2730.
Usernames with special characters (e.g. 'Émile') can trigger a
UnicodeDecodeError inside the requests library during redirect
handling. This must not crash the program.
"""
future = _make_future_with_exception(
UnicodeDecodeError("utf-8", b"\xe9", 0, 1, "invalid continuation byte")
)
response, error_context, exception_text = get_response(
request_future=future,
error_type=["status_code"],
social_network="TestSite",
)
assert response is None
assert error_context == "Encoding Error"
assert "utf-8" in exception_text
def test_get_response_handles_unicode_encode_error():
"""UnicodeEncodeError should also be caught (subclass of UnicodeError)."""
future = _make_future_with_exception(
UnicodeEncodeError("ascii", "É", 0, 1, "ordinal not in range(128)")
)
response, error_context, exception_text = get_response(
request_future=future,
error_type=["status_code"],
social_network="TestSite",
)
assert response is None
assert error_context == "Encoding Error"
assert "ascii" in exception_text
+3 -3
View File
@@ -4,7 +4,7 @@ from sherlock_interactives import Interactives
from sherlock_interactives import InteractivesSubprocessError
def test_remove_nsfw(sites_obj):
nsfw_target: str = 'Pornhub'
nsfw_target: str = 'Xvideos'
assert nsfw_target in {site.name: site.information for site in sites_obj}
sites_obj.remove_nsfw_sites()
assert nsfw_target not in {site.name: site.information for site in sites_obj}
@@ -12,8 +12,8 @@ def test_remove_nsfw(sites_obj):
# Parametrized sites should *not* include Motherless, which is acting as the control
@pytest.mark.parametrize('nsfwsites', [
['Pornhub'],
['Pornhub', 'Xvideos'],
['Xvideos'],
['Xvideos', 'Erome'],
])
def test_nsfw_explicit_selection(sites_obj, nsfwsites):
for site in nsfwsites: