Compare commits
232 Commits
master
..
exclusions
| Author | SHA1 | Date | |
|---|---|---|---|
| d178b14f76 | |||
| f73c79ae51 | |||
| 4f26fa0c38 | |||
| 52b00e77a5 | |||
| 07e9c8d7bd | |||
| ad7230d789 | |||
| 411bac3b76 | |||
| 1803922129 | |||
| a360a731f7 | |||
| 1e2710b35b | |||
| 6c9ef74a52 | |||
| fd3e646616 | |||
| 0061bfa1ed | |||
| bc32cffd19 | |||
| 243f436110 | |||
| a4a7f0d92a | |||
| 5a6913d586 | |||
| 554cbb1f79 | |||
| 690e6c9baa | |||
| 9851385fb1 | |||
| 1891d5fbce | |||
| 445d20c91c | |||
| db11f8337b | |||
| 815da25d0c | |||
| 2112e4da7e | |||
| c1ef192368 | |||
| f3e21e0ba3 | |||
| d39fa14972 | |||
| e624bc6b59 | |||
| f9cc83928f | |||
| 28072f5832 | |||
| 57b58eaf3a | |||
| ec850a4439 | |||
| 91a587832a | |||
| 7abf381d43 | |||
| 25ca9339e1 | |||
| 9435d0fe30 | |||
| 2e628ec9be | |||
| 8b4d93924e | |||
| 08e7859a8c | |||
| 773d415195 | |||
| a549a55800 | |||
| ae26f9f46e | |||
| 5f39d67109 | |||
| 62778f04c8 | |||
| 8c6358dea5 | |||
| f22f0a7bf5 | |||
| 7ce524a358 | |||
| 8e267fa570 | |||
| 4331465ee5 | |||
| ad23921880 | |||
| 806f5600f5 | |||
| 0dd3e67d89 | |||
| 09132e94a4 | |||
| b659f335de | |||
| 7a74ab1b9b | |||
| 63d7c59d2e | |||
| 0dfcdb701c | |||
| d437ed62b8 | |||
| 9614537284 | |||
| 5fe4d28905 | |||
| 930c69ae70 | |||
| 2319283279 | |||
| 3bbe8f2560 | |||
| 46d274b4f5 | |||
| e3ff86abbc | |||
| ad00327441 | |||
| 7a981b7caf | |||
| a5d2eed21e | |||
| 011153e991 | |||
| 78a7205f77 | |||
| 82f8875695 | |||
| a2de53a616 | |||
| b0a6084589 | |||
| 3113932b5c | |||
| 8d18a2b3c6 | |||
| 3a2d5c2408 | |||
| c0af8cfd65 | |||
| aceabecb0c | |||
| 25e9407ee8 | |||
| 2cf08b451f | |||
| 357bc89796 | |||
| 0d431d416a | |||
| 5da41f835c | |||
| 4db1d29e9c | |||
| da06a8a3c1 | |||
| 2d5b988144 | |||
| ad3192c812 | |||
| 9b248c9c04 | |||
| fee2688f35 | |||
| 07fb021b6a | |||
| ed84304353 | |||
| 471a65c22b | |||
| ab57315e63 | |||
| 148cead04a | |||
| 51d365959f | |||
| 24f97abd24 | |||
| 8f8f680058 | |||
| a8d247cf49 | |||
| f76954ce2a | |||
| 15c80681d2 | |||
| d2b4fd012c | |||
| 7fdd2a2878 | |||
| dc8b7ffe7e | |||
| b9cc7e1d1c | |||
| e642bd8594 | |||
| 375a615d5d | |||
| d0990d160d | |||
| d2307904a1 | |||
| e78190329b | |||
| c333b79bf1 | |||
| ddc4a5d8c1 | |||
| 3222eb2aee | |||
| 0801ab1367 | |||
| baebf80279 | |||
| 6e05910e7f | |||
| 37e86bfe80 | |||
| 47069ae0cf | |||
| c833609c39 | |||
| 2005620034 | |||
| a6d09d4864 | |||
| 0a53716348 | |||
| 7a8b3c55be | |||
| a4f62d6b6f | |||
| ca78ff0c56 | |||
| b74b1e0b4f | |||
| 55d9b406dd | |||
| a381afe8ea | |||
| 777be62db7 | |||
| b1a1f97abf | |||
| 7f5f0b3d71 | |||
| 93102fbc09 | |||
| e353855b1a | |||
| c7b8771111 | |||
| c778b2a3cd | |||
| 42dbd0e0a5 | |||
| 6ab87f17c1 | |||
| a8f508f9e8 | |||
| c8659eb8e5 | |||
| 6359672dbd | |||
| 9b0524889c | |||
| eddc6f564d | |||
| d4c1153b9d | |||
| 03ffa8387c | |||
| 762ebf85a0 | |||
| 6647670ef6 | |||
| 8ab5519bcf | |||
| 7499f7ad17 | |||
| d26948c90a | |||
| 612c634e9f | |||
| 4aa4fad787 | |||
| 0941f1d03c | |||
| b8f54089a4 | |||
| da10787bbc | |||
| 80d5c781aa | |||
| 62ad4e4b97 | |||
| cd2ffdc75c | |||
| bd9fb1dee1 | |||
| 7b7ca503c8 | |||
| 2bce7c6de3 | |||
| 48a0d42de7 | |||
| 6d71df7ac6 | |||
| e1b422defd | |||
| 3d11b44eb3 | |||
| 6484b098f5 | |||
| eb3d88c70c | |||
| 365bf808ba | |||
| 15dcfe92da | |||
| 2c3ce45d15 | |||
| c462fc0fe0 | |||
| d90f2a1dee | |||
| 0d7daf5284 | |||
| ef19804a97 | |||
| abb690fe79 | |||
| 185fefb0e5 | |||
| d6495e6f0b | |||
| e05f8e8f08 | |||
| ac9f1c031d | |||
| 5d39a4aca4 | |||
| e3e4d81d98 | |||
| 34987d6c69 | |||
| 70e926e70a | |||
| 9e2a4f970e | |||
| 4a751104cc | |||
| adfbda1106 | |||
| 17a91d607d | |||
| 07ebe12917 | |||
| b27f070668 | |||
| e708568758 | |||
| b54b11274b | |||
| 4a5229e6da | |||
| 7e7a973a50 | |||
| d45fa905f5 | |||
| f16fd54ba5 | |||
| becc5d2ea4 | |||
| 978ad5b3ba | |||
| 2b992e6f6e | |||
| 92728aac2b | |||
| dd0d4364cf | |||
| 963c444743 | |||
| 7b491f0365 | |||
| 9c1f71c647 | |||
| 8ffa0b85eb | |||
| aeb8910e03 | |||
| 04a8ec494e | |||
| 159397053d | |||
| 4f20c7152b | |||
| d5b2cd4e3f | |||
| b3590c60a1 | |||
| 4405bbc7f2 | |||
| 93be16067b | |||
| 5dcae85cb2 | |||
| 4bdb361ae2 | |||
| e5b3849612 | |||
| 8461279514 | |||
| 8426e8355d | |||
| 52952a7ed0 | |||
| 93cb3d6664 | |||
| 9e8a687b18 | |||
| d233e2a9e3 | |||
| 0785fa9298 | |||
| b0bd0c6729 | |||
| f6968f0407 | |||
| 89785e4269 | |||
| 1be937ad7e | |||
| 30cde63805 | |||
| e4c274a5bf | |||
| 2c128e0e5e | |||
| d83b6ae2b0 | |||
| a1ad9cd03a | |||
| a83c3957c6 | |||
| 11fa6720a0 |
@@ -1,19 +0,0 @@
|
|||||||
FROM sherlock/sherlock as sherlock
|
|
||||||
|
|
||||||
# Install Node.js
|
|
||||||
RUN apt-get update; apt-get install curl gpg -y
|
|
||||||
RUN mkdir -p /etc/apt/keyrings
|
|
||||||
RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
|
|
||||||
RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list
|
|
||||||
RUN apt-get update && apt-get install -y curl bash git jq jo xz-utils nodejs
|
|
||||||
|
|
||||||
# Install Apify CLI (node.js) for the Actor Runtime
|
|
||||||
RUN npm -g install apify-cli
|
|
||||||
|
|
||||||
# Install Dependencies for the Actor Shell Script
|
|
||||||
RUN apt-get update && apt-get install -y bash jq jo xz-utils nodejs
|
|
||||||
|
|
||||||
# Copy Actor dir with the actorization shell script
|
|
||||||
COPY .actor/ .actor
|
|
||||||
|
|
||||||
ENTRYPOINT [".actor/actor.sh"]
|
|
||||||
@@ -1,93 +0,0 @@
|
|||||||
# Sherlock Actor on Apify
|
|
||||||
|
|
||||||
[](https://apify.com/netmilk/sherlock?fpr=sherlock)
|
|
||||||
|
|
||||||
This Actor wraps the [Sherlock Project](https://sherlockproject.xyz/) to provide serverless username reconnaissance across social networks in the cloud. It helps you find usernames across multiple social media platforms without installing and running the tool locally.
|
|
||||||
|
|
||||||
## What are Actors?
|
|
||||||
[Actors](https://docs.apify.com/platform/actors?fpr=sherlock) are serverless microservices running on the [Apify Platform](https://apify.com/?fpr=sherlock). They are based on the [Actor SDK](https://docs.apify.com/sdk/js?fpr=sherlock) and can be found in the [Apify Store](https://apify.com/store?fpr=sherlock). Learn more about Actors in the [Apify Whitepaper](https://whitepaper.actor?fpr=sherlock).
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
### Apify Console
|
|
||||||
|
|
||||||
1. Go to the Apify Actor page
|
|
||||||
2. Click "Run"
|
|
||||||
3. In the input form, fill in **Username(s)** to search for
|
|
||||||
4. The Actor will run and produce its outputs in the default datastore
|
|
||||||
|
|
||||||
|
|
||||||
### Apify CLI
|
|
||||||
|
|
||||||
```bash
|
|
||||||
apify call YOUR_USERNAME/sherlock --input='{
|
|
||||||
"usernames": ["johndoe", "janedoe"]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
### Using Apify API
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl --request POST \
|
|
||||||
--url "https://api.apify.com/v2/acts/YOUR_USERNAME~sherlock/run" \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--header 'Authorization: Bearer YOUR_API_TOKEN' \
|
|
||||||
--data '{
|
|
||||||
"usernames": ["johndoe", "janedoe"],
|
|
||||||
}
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
## Input Parameters
|
|
||||||
|
|
||||||
The Actor accepts a JSON schema with the following structure:
|
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
|
||||||
|-------|------|----------|---------|-------------|
|
|
||||||
| `usernames` | array | Yes | - | List of usernames to search for |
|
|
||||||
| `usernames[]` | string | Yes | "json" | Username to search for |
|
|
||||||
|
|
||||||
|
|
||||||
### Example Input
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"usernames": ["techuser", "designuser"],
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Output
|
|
||||||
|
|
||||||
The Actor provides three types of outputs:
|
|
||||||
|
|
||||||
### Dataset Record*
|
|
||||||
|
|
||||||
| Field | Type | Required | Description |
|
|
||||||
|-------|------|----------|-------------|
|
|
||||||
| `username` | string | Yes | Username the search was conducted for |
|
|
||||||
| `links` | array | Yes | Array with found links to the social media |
|
|
||||||
| `links[]`| string | No | URL to the account
|
|
||||||
|
|
||||||
### Example Dataset Item (JSON)
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"username": "johndoe",
|
|
||||||
"links": [
|
|
||||||
"https://github.com/johndoe"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Performance & Resources
|
|
||||||
|
|
||||||
- **Memory Requirements**:
|
|
||||||
- Minimum: 512 MB RAM
|
|
||||||
- Recommended: 1 GB RAM for multiple usernames
|
|
||||||
- **Processing Time**:
|
|
||||||
- Single username: ~1-2 minutes
|
|
||||||
- Multiple usernames: 2-5 minutes
|
|
||||||
- Varies based on number of sites checked and response times
|
|
||||||
|
|
||||||
|
|
||||||
For more help, check the [Sherlock Project documentation](https://github.com/sherlock-project/sherlock) or raise an issue in the Actor's repository.
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
{
|
|
||||||
"actorSpecification": 1,
|
|
||||||
"name": "sherlock",
|
|
||||||
"version": "0.0",
|
|
||||||
"buildTag": "latest",
|
|
||||||
"environmentVariables": {},
|
|
||||||
"dockerFile": "./Dockerfile",
|
|
||||||
"dockerContext": "../",
|
|
||||||
"input": "./input_schema.json",
|
|
||||||
"storages": {
|
|
||||||
"dataset": "./dataset_schema.json"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
INPUT=`apify actor:get-input | jq -r .usernames[] | xargs echo`
|
|
||||||
echo "INPUT: $INPUT"
|
|
||||||
|
|
||||||
sherlock $INPUT
|
|
||||||
|
|
||||||
for username in $INPUT; do
|
|
||||||
# escape the special meaning leading characters
|
|
||||||
# https://github.com/jpmens/jo/blob/master/jo.md#description
|
|
||||||
safe_username=$(echo $username | sed 's/^@/\\@/' | sed 's/^:/\\:/' | sed 's/%/\\%/')
|
|
||||||
echo "pushing results for username: $username, content:"
|
|
||||||
cat $username.txt
|
|
||||||
sed '$d' $username.txt | jo -a | jo username=$safe_username links:=- | apify actor:push-data
|
|
||||||
done
|
|
||||||
@@ -1,45 +0,0 @@
|
|||||||
{
|
|
||||||
"actorSpecification": 1,
|
|
||||||
"fields":{
|
|
||||||
"title": "Sherlock actor input",
|
|
||||||
"description": "This is actor input schema",
|
|
||||||
"type": "object",
|
|
||||||
"schemaVersion": 1,
|
|
||||||
"properties": {
|
|
||||||
"links": {
|
|
||||||
"title": "Links to accounts",
|
|
||||||
"type": "array",
|
|
||||||
"description": "A list of social media accounts found for the uername"
|
|
||||||
},
|
|
||||||
"username": {
|
|
||||||
"title": "Lookup username",
|
|
||||||
"type": "string",
|
|
||||||
"description": "Username the lookup was performed for"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": [
|
|
||||||
"username",
|
|
||||||
"links"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"views": {
|
|
||||||
"overview": {
|
|
||||||
"title": "Overview",
|
|
||||||
"transformation": {
|
|
||||||
"fields": [
|
|
||||||
"username",
|
|
||||||
"links"
|
|
||||||
],
|
|
||||||
},
|
|
||||||
"display": {
|
|
||||||
"component": "table",
|
|
||||||
"links": {
|
|
||||||
"label": "Links"
|
|
||||||
},
|
|
||||||
"username":{
|
|
||||||
"label": "Username"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
{
|
|
||||||
"title": "Sherlock actor input",
|
|
||||||
"description": "This is actor input schema",
|
|
||||||
"type": "object",
|
|
||||||
"schemaVersion": 1,
|
|
||||||
"properties": {
|
|
||||||
"usernames": {
|
|
||||||
"title": "Usernames to hunt down",
|
|
||||||
"type": "array",
|
|
||||||
"description": "A list of usernames to be checked for existence across social media",
|
|
||||||
"editor": "stringList",
|
|
||||||
"prefill": ["johndoe"]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": [
|
|
||||||
"usernames"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
.git/
|
|
||||||
.vscode/
|
|
||||||
screenshot/
|
|
||||||
tests/
|
|
||||||
*.txt
|
|
||||||
!/requirements.txt
|
|
||||||
venv/
|
|
||||||
devel/
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
root = true
|
|
||||||
|
|
||||||
[*]
|
|
||||||
indent_style = space
|
|
||||||
indent_size = 2
|
|
||||||
end_of_line = lf
|
|
||||||
charset = utf-8
|
|
||||||
trim_trailing_whitespace = true
|
|
||||||
insert_final_newline = true
|
|
||||||
curly_bracket_next_line = false
|
|
||||||
spaces_around_operators = true
|
|
||||||
|
|
||||||
[*.{markdown,md}]
|
|
||||||
trim_trailing_whitespace = false
|
|
||||||
|
|
||||||
[*.py]
|
|
||||||
indent_size = 4
|
|
||||||
quote_type = double
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
### REPOSITORY
|
|
||||||
/.github/CODEOWNERS @sdushantha @ppfeister
|
|
||||||
/.github/FUNDING.yml @sdushantha
|
|
||||||
/LICENSE @sdushantha
|
|
||||||
|
|
||||||
### PACKAGING
|
|
||||||
# Changes made to these items without code owner approval may negatively
|
|
||||||
# impact packaging pipelines.
|
|
||||||
/pyproject.toml @ppfeister @sdushantha
|
|
||||||
|
|
||||||
### REGRESSION
|
|
||||||
/.github/workflows/regression.yml @ppfeister
|
|
||||||
/tox.ini @ppfeister
|
|
||||||
/pytest.ini @ppfeister
|
|
||||||
/tests/ @ppfeister
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
github: [ sdushantha, ppfeister, matheusfelipeog ]
|
|
||||||
@@ -1,71 +0,0 @@
|
|||||||
name: Bug report
|
|
||||||
description: File a bug report
|
|
||||||
labels: ["bug"]
|
|
||||||
body:
|
|
||||||
- type: dropdown
|
|
||||||
id: package
|
|
||||||
attributes:
|
|
||||||
label: Installation method
|
|
||||||
description: |
|
|
||||||
Some packages are maintained by the community, rather than by the Sherlock Project.
|
|
||||||
Knowing which packages are affected helps us diagnose package-specific bugs.
|
|
||||||
options:
|
|
||||||
- Select one
|
|
||||||
- PyPI (via pip)
|
|
||||||
- Homebrew
|
|
||||||
- Docker
|
|
||||||
- Kali repository (via apt)
|
|
||||||
- Built from source
|
|
||||||
- Other (indicate below)
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: input
|
|
||||||
id: package-version
|
|
||||||
attributes:
|
|
||||||
label: Package version
|
|
||||||
description: |
|
|
||||||
Knowing the version of the package you are using can help us diagnose your issue more quickly.
|
|
||||||
You can find the version by running `sherlock --version`.
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: description
|
|
||||||
attributes:
|
|
||||||
label: Description
|
|
||||||
description: |
|
|
||||||
Detailed descriptions that help contributors understand and reproduce your bug are much more likely to lead to a fix.
|
|
||||||
Please include the following information:
|
|
||||||
- What you were trying to do
|
|
||||||
- What you expected to happen
|
|
||||||
- What actually happened
|
|
||||||
placeholder: |
|
|
||||||
When doing {action}, the expected result should be {expected result}.
|
|
||||||
When doing {action}, however, the actual result was {actual result}.
|
|
||||||
This is undesirable because {reason}.
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: steps-to-reproduce
|
|
||||||
attributes:
|
|
||||||
label: Steps to reproduce
|
|
||||||
description: Write a step by step list that will allow us to reproduce this bug.
|
|
||||||
placeholder: |
|
|
||||||
1. Do something
|
|
||||||
2. Then do something else
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: additional-info
|
|
||||||
attributes:
|
|
||||||
label: Additional information
|
|
||||||
description: If you have some additional information, please write it here.
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: checkboxes
|
|
||||||
id: terms
|
|
||||||
attributes:
|
|
||||||
label: Code of Conduct
|
|
||||||
description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/sherlock-project/sherlock/blob/master/docs/CODE_OF_CONDUCT.md).
|
|
||||||
options:
|
|
||||||
- label: I agree to follow this project's Code of Conduct
|
|
||||||
required: true
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
blank_issues_enabled: false
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
name: False negative
|
|
||||||
description: Report a site that is returning false negative results
|
|
||||||
title: "False negative for: "
|
|
||||||
labels: ["false negative"]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Please include the site name in the title of your issue.
|
|
||||||
Submit **one site per report** for faster resolution. If you have multiple sites in the same report, it often takes longer to fix.
|
|
||||||
- type: textarea
|
|
||||||
id: additional-info
|
|
||||||
attributes:
|
|
||||||
label: Additional info
|
|
||||||
description: If you know why the site is returning false negatives, or noticed any patterns, please explain.
|
|
||||||
placeholder: |
|
|
||||||
Reddit is returning false negatives because...
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: checkboxes
|
|
||||||
id: terms
|
|
||||||
attributes:
|
|
||||||
label: Code of Conduct
|
|
||||||
description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/sherlock-project/sherlock/blob/master/docs/CODE_OF_CONDUCT.md).
|
|
||||||
options:
|
|
||||||
- label: I agree to follow this project's Code of Conduct
|
|
||||||
required: true
|
|
||||||
@@ -1,28 +0,0 @@
|
|||||||
name: False positive
|
|
||||||
description: Report a site that is returning false positive results
|
|
||||||
title: "False positive for: "
|
|
||||||
labels: ["false positive"]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Please include the site name in the title of your issue.
|
|
||||||
Submit **one site per report** for faster resolution. If you have multiple sites in the same report, it often takes longer to fix.
|
|
||||||
- type: textarea
|
|
||||||
id: additional-info
|
|
||||||
attributes:
|
|
||||||
label: Additional info
|
|
||||||
description: If you know why the site is returning false positives, or noticed any patterns, please explain.
|
|
||||||
placeholder: |
|
|
||||||
Reddit is returning false positives because...
|
|
||||||
False positives only occur after x searches...
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: checkboxes
|
|
||||||
id: terms
|
|
||||||
attributes:
|
|
||||||
label: Code of Conduct
|
|
||||||
description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/sherlock-project/sherlock/blob/master/docs/CODE_OF_CONDUCT.md).
|
|
||||||
options:
|
|
||||||
- label: I agree to follow this project's Code of Conduct
|
|
||||||
required: true
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
name: Feature request
|
|
||||||
description: Request a feature or enhancement
|
|
||||||
labels: ["enhancement"]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Concise and thoughtful titles help other contributors find and add your requested feature.
|
|
||||||
- type: textarea
|
|
||||||
id: description
|
|
||||||
attributes:
|
|
||||||
label: Description
|
|
||||||
description: Describe the feature you are requesting
|
|
||||||
placeholder: I'd like Sherlock to be able to do xyz
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: checkboxes
|
|
||||||
id: terms
|
|
||||||
attributes:
|
|
||||||
label: Code of Conduct
|
|
||||||
description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/sherlock-project/sherlock/blob/master/docs/CODE_OF_CONDUCT.md).
|
|
||||||
options:
|
|
||||||
- label: I agree to follow this project's Code of Conduct
|
|
||||||
required: true
|
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
name: Reuest a new website
|
|
||||||
description: Request that Sherlock add support for a new website
|
|
||||||
title: "Requesting support for: "
|
|
||||||
labels: ["site support request"]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Ensure that the site name is in the title of your request. Requests without this information will be **closed**.
|
|
||||||
- type: input
|
|
||||||
id: site-url
|
|
||||||
attributes:
|
|
||||||
label: Site URL
|
|
||||||
description: |
|
|
||||||
What is the URL of the website indicated in your title?
|
|
||||||
Websites sometimes have similar names. This helps constributors find the correct site.
|
|
||||||
placeholder: https://reddit.com
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: additional-info
|
|
||||||
attributes:
|
|
||||||
label: Additional info
|
|
||||||
description: If you have suggestions on how Sherlock should detect for usernames, please explain below
|
|
||||||
placeholder: Sherlock can detect if a username exists on Reddit by checking for...
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: checkboxes
|
|
||||||
id: terms
|
|
||||||
attributes:
|
|
||||||
label: Code of Conduct
|
|
||||||
description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/sherlock-project/sherlock/blob/master/docs/CODE_OF_CONDUCT.md).
|
|
||||||
options:
|
|
||||||
- label: I agree to follow this project's Code of Conduct
|
|
||||||
required: true
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
## Security Policy
|
|
||||||
|
|
||||||
### Supported Versions
|
|
||||||
|
|
||||||
Sherlock is a forward looking project. Only the latest and most current version is supported.
|
|
||||||
|
|
||||||
### Reporting a Vulnerability
|
|
||||||
|
|
||||||
Security concerns can be submitted [__here__][report-url] without risk of exposing sensitive information. For issues that are low severity or unlikely to see exploitation, public issues are often acceptable.
|
|
||||||
|
|
||||||
[report-url]: https://github.com/sherlock-project/sherlock/security/advisories/new
|
|
||||||
@@ -1,89 +0,0 @@
|
|||||||
name: Exclusions Updater
|
|
||||||
|
|
||||||
on:
|
|
||||||
schedule:
|
|
||||||
#- cron: '0 5 * * 0' # Runs at 05:00 every Sunday
|
|
||||||
- cron: '0 5 * * *' # Runs at 05:00 every day
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
update-exclusions:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v5
|
|
||||||
|
|
||||||
- name: Set up Python
|
|
||||||
uses: actions/setup-python@v6
|
|
||||||
with:
|
|
||||||
python-version: '3.13'
|
|
||||||
|
|
||||||
- name: Install Poetry
|
|
||||||
uses: abatilo/actions-poetry@v4
|
|
||||||
with:
|
|
||||||
poetry-version: 'latest'
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
poetry install --no-interaction --with dev
|
|
||||||
|
|
||||||
- name: Run false positive tests
|
|
||||||
run: |
|
|
||||||
$(poetry env activate)
|
|
||||||
pytest -q --tb no -m validate_targets_fp -n 20 | tee fp_test_results.txt
|
|
||||||
deactivate
|
|
||||||
|
|
||||||
- name: Parse false positive detections by desired categories
|
|
||||||
run: |
|
|
||||||
grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was Claimed)' fp_test_results.txt \
|
|
||||||
| sort -u > false_positive_exclusions.txt
|
|
||||||
grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was WAF)' fp_test_results.txt \
|
|
||||||
| sort -u > waf_hits.txt
|
|
||||||
|
|
||||||
- name: Detect if exclusions list changed
|
|
||||||
id: detect_changes
|
|
||||||
run: |
|
|
||||||
git fetch origin exclusions || true
|
|
||||||
|
|
||||||
if git show origin/exclusions:false_positive_exclusions.txt >/dev/null 2>&1; then
|
|
||||||
# If the exclusions branch and file exist, compare
|
|
||||||
if git diff --quiet origin/exclusions -- false_positive_exclusions.txt; then
|
|
||||||
echo "exclusions_changed=false" >> "$GITHUB_OUTPUT"
|
|
||||||
else
|
|
||||||
echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
# If the exclusions branch or file do not exist, treat as changed
|
|
||||||
echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Quantify and display results
|
|
||||||
run: |
|
|
||||||
FP_COUNT=$(wc -l < false_positive_exclusions.txt | xargs)
|
|
||||||
WAF_COUNT=$(wc -l < waf_hits.txt | xargs)
|
|
||||||
echo ">>> Found $FP_COUNT false positives and $WAF_COUNT WAF hits."
|
|
||||||
echo ">>> False positive exclusions:" && cat false_positive_exclusions.txt
|
|
||||||
echo ">>> WAF hits:" && cat waf_hits.txt
|
|
||||||
|
|
||||||
- name: Commit and push exclusions list
|
|
||||||
if: steps.detect_changes.outputs.exclusions_changed == 'true'
|
|
||||||
run: |
|
|
||||||
git config user.name "Paul Pfeister (automation)"
|
|
||||||
git config user.email "code@pfeister.dev"
|
|
||||||
|
|
||||||
mv false_positive_exclusions.txt false_positive_exclusions.txt.tmp
|
|
||||||
|
|
||||||
git add -f false_positive_exclusions.txt.tmp # -f required to override .gitignore
|
|
||||||
git stash push -m "stash false positive exclusion list" -- false_positive_exclusions.txt.tmp
|
|
||||||
|
|
||||||
git fetch origin exclusions || true # Allows creation of branch if deleted
|
|
||||||
git checkout -B exclusions origin/exclusions || (git checkout --orphan exclusions && git rm -rf .)
|
|
||||||
|
|
||||||
git stash pop || true
|
|
||||||
|
|
||||||
mv false_positive_exclusions.txt.tmp false_positive_exclusions.txt
|
|
||||||
|
|
||||||
git rm -f false_positive_exclusions.txt.tmp || true
|
|
||||||
git add false_positive_exclusions.txt
|
|
||||||
git commit -m "auto: update exclusions list" || echo "No changes to commit"
|
|
||||||
git push origin exclusions
|
|
||||||
@@ -1,94 +0,0 @@
|
|||||||
name: Regression Testing
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
- release/**
|
|
||||||
paths:
|
|
||||||
- '.github/workflows/regression.yml'
|
|
||||||
- '**/*.json'
|
|
||||||
- '**/*.py'
|
|
||||||
- '**/*.ini'
|
|
||||||
- '**/*.toml'
|
|
||||||
- 'Dockerfile'
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
- release/**
|
|
||||||
paths:
|
|
||||||
- '.github/workflows/regression.yml'
|
|
||||||
- '**/*.json'
|
|
||||||
- '**/*.py'
|
|
||||||
- '**/*.ini'
|
|
||||||
- '**/*.toml'
|
|
||||||
- 'Dockerfile'
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
tox-lint:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
# Linting is run through tox to ensure that the same linter
|
|
||||||
# is used by local runners
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v6
|
|
||||||
- name: Set up linting environment
|
|
||||||
uses: actions/setup-python@v6
|
|
||||||
with:
|
|
||||||
python-version: '3.x'
|
|
||||||
- name: Install tox and related dependencies
|
|
||||||
run: |
|
|
||||||
python -m pip install --upgrade pip
|
|
||||||
pip install tox
|
|
||||||
- name: Run tox linting environment
|
|
||||||
run: tox -e lint
|
|
||||||
tox-matrix:
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
strategy:
|
|
||||||
# We want to know what specific versions it fails on
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
os: [
|
|
||||||
ubuntu-latest,
|
|
||||||
windows-latest,
|
|
||||||
macos-latest,
|
|
||||||
]
|
|
||||||
python-version: [
|
|
||||||
'3.10',
|
|
||||||
'3.11',
|
|
||||||
'3.12',
|
|
||||||
'3.13',
|
|
||||||
'3.14',
|
|
||||||
'3.14t',
|
|
||||||
]
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v6
|
|
||||||
- name: Set up environment ${{ matrix.python-version }}
|
|
||||||
uses: actions/setup-python@v6
|
|
||||||
with:
|
|
||||||
python-version: ${{ matrix.python-version }}
|
|
||||||
- name: Install tox and related dependencies
|
|
||||||
run: |
|
|
||||||
python -m pip install --upgrade pip
|
|
||||||
pip install tox
|
|
||||||
pip install tox-gh-actions
|
|
||||||
- name: Run tox
|
|
||||||
run: tox
|
|
||||||
docker-build-test:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@v6
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v3
|
|
||||||
- name: Get version from pyproject.toml
|
|
||||||
id: get-version
|
|
||||||
run: |
|
|
||||||
VERSION=$(grep -m1 'version = ' pyproject.toml | cut -d'"' -f2)
|
|
||||||
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
|
||||||
- name: Build Docker image
|
|
||||||
run: |
|
|
||||||
docker build \
|
|
||||||
--build-arg VERSION_TAG=${{ steps.get-version.outputs.version }} \
|
|
||||||
-t sherlock-test:latest .
|
|
||||||
- name: Test Docker image runs
|
|
||||||
run: docker run --rm sherlock-test:latest --version
|
|
||||||
@@ -1,46 +0,0 @@
|
|||||||
name: Update Site List
|
|
||||||
|
|
||||||
# Trigger the workflow when changes are pushed to the main branch
|
|
||||||
# and the changes include the sherlock_project/resources/data.json file
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
paths:
|
|
||||||
- sherlock_project/resources/data.json
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
sync-json-data:
|
|
||||||
# Use the latest version of Ubuntu as the runner environment
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
# Check out the code at the specified pull request head commit
|
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event.pull_request.head.sha }}
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
# Install Python 3
|
|
||||||
- name: Install Python
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
with:
|
|
||||||
python-version: '3.x'
|
|
||||||
|
|
||||||
# Execute the site_list.py Python script
|
|
||||||
- name: Execute site-list.py
|
|
||||||
run: python devel/site-list.py
|
|
||||||
|
|
||||||
- name: Pushes to another repository
|
|
||||||
uses: sdushantha/github-action-push-to-another-repository@main
|
|
||||||
env:
|
|
||||||
SSH_DEPLOY_KEY: ${{ secrets.SSH_DEPLOY_KEY }}
|
|
||||||
API_TOKEN_GITHUB: ${{ secrets.API_TOKEN_GITHUB }}
|
|
||||||
with:
|
|
||||||
source-directory: 'output'
|
|
||||||
destination-github-username: 'sherlock-project'
|
|
||||||
commit-message: 'Updated site list'
|
|
||||||
destination-repository-name: 'sherlockproject.xyz'
|
|
||||||
user-email: siddharth.dushantha@gmail.com
|
|
||||||
target-branch: master
|
|
||||||
@@ -1,127 +0,0 @@
|
|||||||
name: Modified Target Validation
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request_target:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
paths:
|
|
||||||
- "sherlock_project/resources/data.json"
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
validate-modified-targets:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
pull-requests: write
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v5
|
|
||||||
with:
|
|
||||||
# Checkout the base branch but fetch all history to avoid a second fetch call
|
|
||||||
ref: ${{ github.base_ref }}
|
|
||||||
fetch-depth: 0
|
|
||||||
persist-credentials: false
|
|
||||||
|
|
||||||
- name: Set up Python
|
|
||||||
uses: actions/setup-python@v6
|
|
||||||
with:
|
|
||||||
python-version: "3.13"
|
|
||||||
|
|
||||||
- name: Install Poetry
|
|
||||||
uses: abatilo/actions-poetry@v4
|
|
||||||
with:
|
|
||||||
poetry-version: "latest"
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
poetry install --no-interaction --with dev
|
|
||||||
|
|
||||||
- name: Prepare JSON versions for comparison
|
|
||||||
run: |
|
|
||||||
# Fetch only the PR's branch head (single network call in this step)
|
|
||||||
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr
|
|
||||||
|
|
||||||
# Find the merge-base commit between the target branch and the PR branch
|
|
||||||
MERGE_BASE=$(git merge-base origin/${{ github.base_ref }} pr)
|
|
||||||
echo "Comparing PR head against merge-base commit: $MERGE_BASE"
|
|
||||||
|
|
||||||
# Safely extract the file from the PR's head and the merge-base commit
|
|
||||||
git show pr:sherlock_project/resources/data.json > data.json.head
|
|
||||||
git show $MERGE_BASE:sherlock_project/resources/data.json > data.json.base
|
|
||||||
|
|
||||||
# CRITICAL FIX: Overwrite the checked-out data.json with the one from the PR
|
|
||||||
# This ensures that pytest runs against the new, updated file.
|
|
||||||
cp data.json.head sherlock_project/resources/data.json
|
|
||||||
|
|
||||||
- name: Discover modified targets
|
|
||||||
id: discover-modified
|
|
||||||
run: |
|
|
||||||
CHANGED=$(
|
|
||||||
python - <<'EOF'
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
try:
|
|
||||||
with open("data.json.base") as f: base = json.load(f)
|
|
||||||
with open("data.json.head") as f: head = json.load(f)
|
|
||||||
except FileNotFoundError as e:
|
|
||||||
print(f"Error: Could not find {e.filename}", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
print(f"Error: Could not decode JSON from a file - {e}", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
changed = []
|
|
||||||
for k, v in head.items():
|
|
||||||
if k not in base or base[k] != v:
|
|
||||||
changed.append(k)
|
|
||||||
|
|
||||||
print(",".join(sorted(changed)))
|
|
||||||
EOF
|
|
||||||
)
|
|
||||||
|
|
||||||
# Preserve changelist
|
|
||||||
echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
|
|
||||||
echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"
|
|
||||||
|
|
||||||
- name: Validate remote manifest against local schema
|
|
||||||
if: steps.discover-modified.outputs.changed_targets != ''
|
|
||||||
run: |
|
|
||||||
poetry run pytest tests/test_manifest.py::test_validate_manifest_against_local_schema
|
|
||||||
|
|
||||||
# --- The rest of the steps below are unchanged ---
|
|
||||||
|
|
||||||
- name: Validate modified targets
|
|
||||||
env:
|
|
||||||
CHANGED_TARGETS: ${{ steps.discover-modified.outputs.changed_targets }}
|
|
||||||
run: |
|
|
||||||
poetry run pytest -q --tb no -rA -m validate_targets -n 20 \
|
|
||||||
--chunked-sites "$CHANGED_TARGETS" \
|
|
||||||
--junitxml=validation_results.xml
|
|
||||||
|
|
||||||
- name: Prepare validation summary
|
|
||||||
if: steps.discover-modified.outputs.changed_targets != ''
|
|
||||||
id: prepare-summary
|
|
||||||
run: |
|
|
||||||
summary=$(
|
|
||||||
poetry run python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results"
|
|
||||||
)
|
|
||||||
echo "$summary" > validation_summary.md
|
|
||||||
|
|
||||||
- name: Announce validation results
|
|
||||||
if: steps.discover-modified.outputs.changed_targets != ''
|
|
||||||
uses: actions/github-script@v8
|
|
||||||
with:
|
|
||||||
script: |
|
|
||||||
const fs = require('fs');
|
|
||||||
const body = fs.readFileSync('validation_summary.md', 'utf8');
|
|
||||||
await github.rest.issues.createComment({
|
|
||||||
issue_number: context.payload.pull_request.number,
|
|
||||||
owner: context.repo.owner,
|
|
||||||
repo: context.repo.repo,
|
|
||||||
body: body,
|
|
||||||
});
|
|
||||||
|
|
||||||
- name: This step shows as ran when no modifications are found
|
|
||||||
if: steps.discover-modified.outputs.changed_targets == ''
|
|
||||||
run: |
|
|
||||||
echo "No modified targets found"
|
|
||||||
-47
@@ -1,47 +0,0 @@
|
|||||||
# Virtual Environments
|
|
||||||
venv/
|
|
||||||
bin/
|
|
||||||
lib/
|
|
||||||
pyvenv.cfg
|
|
||||||
poetry.lock
|
|
||||||
|
|
||||||
# Regression Testing
|
|
||||||
.coverage
|
|
||||||
.tox/
|
|
||||||
|
|
||||||
# Editor Configurations
|
|
||||||
.vscode/
|
|
||||||
.idea/
|
|
||||||
|
|
||||||
# Python
|
|
||||||
__pycache__/
|
|
||||||
|
|
||||||
# Pip
|
|
||||||
src/
|
|
||||||
|
|
||||||
# Devel, Build, and Installation
|
|
||||||
*.egg-info/
|
|
||||||
dist/**
|
|
||||||
|
|
||||||
# Jupyter Notebook
|
|
||||||
.ipynb_checkpoints
|
|
||||||
*.ipynb
|
|
||||||
|
|
||||||
# Output files, except requirements.txt
|
|
||||||
*.txt
|
|
||||||
!requirements.txt
|
|
||||||
|
|
||||||
# Comma-Separated Values (CSV) Reports
|
|
||||||
*.csv
|
|
||||||
|
|
||||||
#XLSX Reports
|
|
||||||
*.xlsx
|
|
||||||
|
|
||||||
# Excluded sites list
|
|
||||||
tests/.excluded_sites
|
|
||||||
|
|
||||||
# MacOS Folder Metadata File
|
|
||||||
.DS_Store
|
|
||||||
|
|
||||||
# Vim swap files
|
|
||||||
*.swp
|
|
||||||
-31
@@ -1,31 +0,0 @@
|
|||||||
# Release instructions:
|
|
||||||
# 1. Update the version tag in the Dockerfile to match the version in sherlock/__init__.py
|
|
||||||
# 2. Update the VCS_REF tag to match the tagged version's FULL commit hash
|
|
||||||
# 3. Build image with BOTH latest and version tags
|
|
||||||
# i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .`
|
|
||||||
|
|
||||||
FROM python:3.12-slim-bullseye AS build
|
|
||||||
WORKDIR /sherlock
|
|
||||||
|
|
||||||
RUN pip3 install --no-cache-dir --upgrade pip
|
|
||||||
|
|
||||||
FROM python:3.12-slim-bullseye
|
|
||||||
WORKDIR /sherlock
|
|
||||||
|
|
||||||
ARG VCS_REF= # CHANGE ME ON UPDATE
|
|
||||||
ARG VCS_URL="https://github.com/sherlock-project/sherlock"
|
|
||||||
ARG VERSION_TAG= # CHANGE ME ON UPDATE
|
|
||||||
|
|
||||||
ENV SHERLOCK_ENV=docker
|
|
||||||
|
|
||||||
LABEL org.label-schema.vcs-ref=$VCS_REF \
|
|
||||||
org.label-schema.vcs-url=$VCS_URL \
|
|
||||||
org.label-schema.name="Sherlock" \
|
|
||||||
org.label-schema.version=$VERSION_TAG \
|
|
||||||
website="https://sherlockproject.xyz"
|
|
||||||
|
|
||||||
RUN pip3 install --no-cache-dir sherlock-project==$VERSION_TAG
|
|
||||||
|
|
||||||
WORKDIR /sherlock
|
|
||||||
|
|
||||||
ENTRYPOINT ["sherlock"]
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
MIT License
|
|
||||||
|
|
||||||
Copyright (c) 2019 Sherlock Project
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
|
||||||
in the Software without restriction, including without limitation the rights
|
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
|
||||||
furnished to do so, subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all
|
|
||||||
copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
SOFTWARE.
|
|
||||||
@@ -1,45 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# This module generates the listing of supported sites which can be found in
|
|
||||||
# sites.mdx. It also organizes all the sites in alphanumeric order
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
|
|
||||||
DATA_REL_URI: str = "sherlock_project/resources/data.json"
|
|
||||||
|
|
||||||
DEFAULT_ENCODING = "utf-8"
|
|
||||||
|
|
||||||
# Read the data.json file
|
|
||||||
with open(DATA_REL_URI, "r", encoding=DEFAULT_ENCODING) as data_file:
|
|
||||||
data: dict = json.load(data_file)
|
|
||||||
|
|
||||||
# Removes schema-specific keywords for proper processing
|
|
||||||
social_networks = data.copy()
|
|
||||||
social_networks.pop('$schema', None)
|
|
||||||
|
|
||||||
# Sort the social networks in alphanumeric order
|
|
||||||
social_networks = sorted(social_networks.items())
|
|
||||||
|
|
||||||
# Make output dir where the site list will be written
|
|
||||||
os.mkdir("output")
|
|
||||||
|
|
||||||
# Write the list of supported sites to sites.mdx
|
|
||||||
with open("output/sites.mdx", "w", encoding=DEFAULT_ENCODING) as site_file:
|
|
||||||
site_file.write("---\n")
|
|
||||||
site_file.write("title: 'List of supported sites'\n")
|
|
||||||
site_file.write("sidebarTitle: 'Supported sites'\n")
|
|
||||||
site_file.write("icon: 'globe'\n")
|
|
||||||
site_file.write("description: 'Sherlock currently supports **400+** sites'\n")
|
|
||||||
site_file.write("---\n\n")
|
|
||||||
|
|
||||||
for social_network, info in social_networks:
|
|
||||||
url_main = info["urlMain"]
|
|
||||||
is_nsfw = "**(NSFW)**" if info.get("isNSFW") else ""
|
|
||||||
site_file.write(f"1. [{social_network}]({url_main}) {is_nsfw}\n")
|
|
||||||
|
|
||||||
# Overwrite the data.json file with sorted data
|
|
||||||
with open(DATA_REL_URI, "w", encoding=DEFAULT_ENCODING) as data_file:
|
|
||||||
sorted_data = json.dumps(data, indent=2, sort_keys=True)
|
|
||||||
data_file.write(sorted_data)
|
|
||||||
data_file.write("\n") # Keep the newline after writing data
|
|
||||||
|
|
||||||
print("Finished updating supported site listing!")
|
|
||||||
@@ -1,72 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# This module summarizes the results of site validation tests queued by
|
|
||||||
# workflow validate_modified_targets for presentation in Issue comments.
|
|
||||||
|
|
||||||
from defusedxml import ElementTree as ET
|
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
def summarize_junit_xml(xml_path: Path) -> str:
|
|
||||||
tree = ET.parse(xml_path)
|
|
||||||
root = tree.getroot()
|
|
||||||
suite = root.find('testsuite')
|
|
||||||
|
|
||||||
pass_message: str = ":heavy_check_mark: Pass"
|
|
||||||
fail_message: str = ":x: Fail"
|
|
||||||
|
|
||||||
if suite is None:
|
|
||||||
raise ValueError("Invalid JUnit XML: No testsuite found")
|
|
||||||
|
|
||||||
summary_lines: list[str] = []
|
|
||||||
summary_lines.append("#### Automatic validation of changes\n")
|
|
||||||
summary_lines.append("| Target | F+ Check | F- Check |")
|
|
||||||
summary_lines.append("|---|---|---|")
|
|
||||||
|
|
||||||
failures = int(suite.get('failures', 0))
|
|
||||||
errors_detected: bool = False
|
|
||||||
|
|
||||||
results: dict[str, dict[str, str]] = {}
|
|
||||||
|
|
||||||
for testcase in suite.findall('testcase'):
|
|
||||||
test_name = testcase.get('name').split('[')[0]
|
|
||||||
site_name = testcase.get('name').split('[')[1].rstrip(']')
|
|
||||||
failure = testcase.find('failure')
|
|
||||||
error = testcase.find('error')
|
|
||||||
|
|
||||||
if site_name not in results:
|
|
||||||
results[site_name] = {}
|
|
||||||
|
|
||||||
if test_name == "test_false_neg":
|
|
||||||
results[site_name]['F- Check'] = pass_message if failure is None and error is None else fail_message
|
|
||||||
elif test_name == "test_false_pos":
|
|
||||||
results[site_name]['F+ Check'] = pass_message if failure is None and error is None else fail_message
|
|
||||||
|
|
||||||
if error is not None:
|
|
||||||
errors_detected = True
|
|
||||||
|
|
||||||
for result in results:
|
|
||||||
summary_lines.append(f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |")
|
|
||||||
|
|
||||||
if failures > 0:
|
|
||||||
summary_lines.append("\n___\n" +
|
|
||||||
"\nFailures were detected on at least one updated target. Commits containing accuracy failures" +
|
|
||||||
" will often not be merged (unless a rationale is provided, such as false negatives due to regional differences).")
|
|
||||||
|
|
||||||
if errors_detected:
|
|
||||||
summary_lines.append("\n___\n" +
|
|
||||||
"\n**Errors were detected during validation. Please review the workflow logs.**")
|
|
||||||
|
|
||||||
return "\n".join(summary_lines)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
if len(sys.argv) != 2:
|
|
||||||
print("Usage: summarize_site_validation.py <junit-xml-file>")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
xml_path: Path = Path(sys.argv[1])
|
|
||||||
if not xml_path.is_file():
|
|
||||||
print(f"Error: File '{xml_path}' does not exist.")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
summary: str = summarize_junit_xml(xml_path)
|
|
||||||
print(summary)
|
|
||||||
@@ -1,130 +0,0 @@
|
|||||||
# Contributor Covenant Code of Conduct
|
|
||||||
|
|
||||||
## Our Pledge
|
|
||||||
|
|
||||||
We as members, contributors, and leaders pledge to make participation in our
|
|
||||||
community a harassment-free experience for everyone, regardless of age, body
|
|
||||||
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
|
||||||
identity and expression, level of experience, education, socio-economic status,
|
|
||||||
nationality, personal appearance, race, caste, color, religion, or sexual
|
|
||||||
identity and orientation.
|
|
||||||
|
|
||||||
We pledge to act and interact in ways that contribute to an open, welcoming,
|
|
||||||
diverse, inclusive, and healthy community.
|
|
||||||
## Our Standards
|
|
||||||
|
|
||||||
Examples of behavior that contributes to a positive environment for our
|
|
||||||
community include:
|
|
||||||
|
|
||||||
* Demonstrating empathy and kindness toward other people
|
|
||||||
* Being respectful of differing opinions, viewpoints, and experiences
|
|
||||||
* Giving and gracefully accepting constructive feedback
|
|
||||||
* Accepting responsibility and apologizing to those affected by our mistakes,
|
|
||||||
and learning from the experience
|
|
||||||
* Focusing on what is best not just for us as individuals, but for the overall
|
|
||||||
community
|
|
||||||
|
|
||||||
Examples of unacceptable behavior include:
|
|
||||||
|
|
||||||
* The use of sexualized language or imagery, and sexual attention or advances of
|
|
||||||
any kind
|
|
||||||
* Trolling, insulting or derogatory comments, and personal or political attacks
|
|
||||||
* Public or private harassment
|
|
||||||
* Publishing others' private information, such as a physical or email address,
|
|
||||||
without their explicit permission
|
|
||||||
* Other conduct which could reasonably be considered inappropriate in a
|
|
||||||
professional setting
|
|
||||||
|
|
||||||
## Enforcement Responsibilities
|
|
||||||
|
|
||||||
Community leaders are responsible for clarifying and enforcing our standards of
|
|
||||||
acceptable behavior and will take appropriate and fair corrective action in
|
|
||||||
response to any behavior that they deem inappropriate, threatening, offensive,
|
|
||||||
or harmful.
|
|
||||||
|
|
||||||
Community leaders have the right and responsibility to remove, edit, or reject
|
|
||||||
comments, commits, code, wiki edits, issues, and other contributions that are
|
|
||||||
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
|
||||||
decisions when appropriate.
|
|
||||||
|
|
||||||
## Scope
|
|
||||||
|
|
||||||
This Code of Conduct applies within all community spaces, and also applies when
|
|
||||||
an individual is officially representing the community in public spaces.
|
|
||||||
Examples of representing our community include using an official e-mail address,
|
|
||||||
posting via an official social media account, or acting as an appointed
|
|
||||||
representative at an online or offline event.
|
|
||||||
|
|
||||||
## Enforcement
|
|
||||||
|
|
||||||
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
|
||||||
reported to the community leaders responsible for enforcement at yahya.arbabi@gmail.com.
|
|
||||||
All complaints will be reviewed and investigated promptly and fairly.
|
|
||||||
|
|
||||||
All community leaders are obligated to respect the privacy and security of the
|
|
||||||
reporter of any incident.
|
|
||||||
|
|
||||||
## Enforcement Guidelines
|
|
||||||
|
|
||||||
Community leaders will follow these Community Impact Guidelines in determining
|
|
||||||
the consequences for any action they deem in violation of this Code of Conduct:
|
|
||||||
|
|
||||||
### 1. Correction
|
|
||||||
|
|
||||||
**Community Impact**: Use of inappropriate language or other behavior deemed
|
|
||||||
unprofessional or unwelcome in the community.
|
|
||||||
|
|
||||||
**Consequence**: A private, written warning from community leaders, providing
|
|
||||||
clarity around the nature of the violation and an explanation of why the
|
|
||||||
behavior was inappropriate. A public apology may be requested.
|
|
||||||
|
|
||||||
### 2. Warning
|
|
||||||
|
|
||||||
**Community Impact**: A violation through a single incident or series of
|
|
||||||
actions.
|
|
||||||
|
|
||||||
**Consequence**: A warning with consequences for continued behavior. No
|
|
||||||
interaction with the people involved, including unsolicited interaction with
|
|
||||||
those enforcing the Code of Conduct, for a specified period of time. This
|
|
||||||
includes avoiding interactions in community spaces as well as external channels
|
|
||||||
like social media. Violating these terms may lead to a temporary or permanent
|
|
||||||
ban.
|
|
||||||
|
|
||||||
### 3. Temporary Ban
|
|
||||||
|
|
||||||
**Community Impact**: A serious violation of community standards, including
|
|
||||||
sustained inappropriate behavior.
|
|
||||||
|
|
||||||
**Consequence**: A temporary ban from any sort of interaction or public
|
|
||||||
communication with the community for a specified period of time. No public or
|
|
||||||
private interaction with the people involved, including unsolicited interaction
|
|
||||||
with those enforcing the Code of Conduct, is allowed during this period.
|
|
||||||
Violating these terms may lead to a permanent ban.
|
|
||||||
|
|
||||||
### 4. Permanent Ban
|
|
||||||
|
|
||||||
**Community Impact**: Demonstrating a pattern of violation of community
|
|
||||||
standards, including sustained inappropriate behavior, harassment of an
|
|
||||||
individual, or aggression toward or disparagement of classes of individuals.
|
|
||||||
|
|
||||||
**Consequence**: A permanent ban from any sort of public interaction within the
|
|
||||||
community.
|
|
||||||
|
|
||||||
## Attribution
|
|
||||||
|
|
||||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
|
||||||
version 2.1, available at
|
|
||||||
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
|
|
||||||
|
|
||||||
Community Impact Guidelines were inspired by
|
|
||||||
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
|
||||||
|
|
||||||
For answers to common questions about this code of conduct, see the FAQ at
|
|
||||||
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
|
|
||||||
[https://www.contributor-covenant.org/translations][translations].
|
|
||||||
|
|
||||||
[homepage]: https://www.contributor-covenant.org
|
|
||||||
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
|
||||||
[Mozilla CoC]: https://github.com/mozilla/diversity
|
|
||||||
[FAQ]: https://www.contributor-covenant.org/faq
|
|
||||||
[translations]: https://www.contributor-covenant.org/translations
|
|
||||||
-115
@@ -1,115 +0,0 @@
|
|||||||
<p align="center">
|
|
||||||
<br>
|
|
||||||
<a href="https://sherlock-project.github.io/" target="_blank"><img src="images/sherlock-logo.png" alt="sherlock"/></a>
|
|
||||||
<br>
|
|
||||||
<span>Hunt down social media accounts by username across <a href="https://sherlockproject.xyz/sites">400+ social networks</a></span>
|
|
||||||
<br>
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p align="center">
|
|
||||||
<a href="https://sherlockproject.xyz/installation">Installation</a>
|
|
||||||
•
|
|
||||||
<a href="https://sherlockproject.xyz/usage">Usage</a>
|
|
||||||
•
|
|
||||||
<a href="https://sherlockproject.xyz/contribute">Contributing</a>
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p align="center">
|
|
||||||
<img width="70%" height="70%" src="images/demo.png" alt="demo"/>
|
|
||||||
</p>
|
|
||||||
|
|
||||||
|
|
||||||
## Installation
|
|
||||||
|
|
||||||
> [!WARNING]
|
|
||||||
> Packages for ParrotOS and Ubuntu 24.04, maintained by a third party, appear to be __broken__.
|
|
||||||
> Users of these systems should defer to [`uv`](https://docs.astral.sh/uv/)/`pipx`/`pip` or Docker.
|
|
||||||
|
|
||||||
| Method | Notes |
|
|
||||||
| - | - |
|
|
||||||
| `pipx install sherlock-project` | `pip` or [`uv`](https://docs.astral.sh/uv/) may be used in place of `pipx` |
|
|
||||||
| `docker run -it --rm sherlock/sherlock` |
|
|
||||||
| `dnf install sherlock-project` | |
|
|
||||||
|
|
||||||
Community-maintained packages are available for Debian (>= 13), Ubuntu (>= 22.10), Homebrew, Kali, and BlackArch. These packages are not directly supported or maintained by the Sherlock Project.
|
|
||||||
|
|
||||||
See all alternative installation methods [here](https://sherlockproject.xyz/installation).
|
|
||||||
|
|
||||||
## General usage
|
|
||||||
|
|
||||||
To search for only one user:
|
|
||||||
```bash
|
|
||||||
sherlock user123
|
|
||||||
```
|
|
||||||
|
|
||||||
To search for more than one user:
|
|
||||||
```bash
|
|
||||||
sherlock user1 user2 user3
|
|
||||||
```
|
|
||||||
|
|
||||||
Accounts found will be stored in an individual text file with the corresponding username (e.g ```user123.txt```).
|
|
||||||
|
|
||||||
```console
|
|
||||||
$ sherlock --help
|
|
||||||
usage: sherlock [-h] [--version] [--verbose] [--folderoutput FOLDEROUTPUT] [--output OUTPUT] [--csv] [--xlsx] [--site SITE_NAME] [--proxy PROXY_URL] [--dump-response]
|
|
||||||
[--json JSON_FILE] [--timeout TIMEOUT] [--print-all] [--print-found] [--no-color] [--browse] [--local] [--nsfw] [--txt] [--ignore-exclusions]
|
|
||||||
USERNAMES [USERNAMES ...]
|
|
||||||
|
|
||||||
Sherlock: Find Usernames Across Social Networks (Version 0.16.0)
|
|
||||||
|
|
||||||
positional arguments:
|
|
||||||
USERNAMES One or more usernames to check with social networks. Check similar usernames using {?} (replace to '_', '-', '.').
|
|
||||||
|
|
||||||
options:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
--version Display version information and dependencies.
|
|
||||||
--verbose, -v, -d, --debug
|
|
||||||
Display extra debugging information and metrics.
|
|
||||||
--folderoutput FOLDEROUTPUT, -fo FOLDEROUTPUT
|
|
||||||
If using multiple usernames, the output of the results will be saved to this folder.
|
|
||||||
--output OUTPUT, -o OUTPUT
|
|
||||||
If using single username, the output of the result will be saved to this file.
|
|
||||||
--csv Create Comma-Separated Values (CSV) File.
|
|
||||||
--xlsx Create the standard file for the modern Microsoft Excel spreadsheet (xlsx).
|
|
||||||
--site SITE_NAME Limit analysis to just the listed sites. Add multiple options to specify more than one site.
|
|
||||||
--proxy PROXY_URL, -p PROXY_URL
|
|
||||||
Make requests over a proxy. e.g. socks5://127.0.0.1:1080
|
|
||||||
--dump-response Dump the HTTP response to stdout for targeted debugging.
|
|
||||||
--json JSON_FILE, -j JSON_FILE
|
|
||||||
Load data from a JSON file or an online, valid, JSON file. Upstream PR numbers also accepted.
|
|
||||||
--timeout TIMEOUT Time (in seconds) to wait for response to requests (Default: 60)
|
|
||||||
--print-all Output sites where the username was not found.
|
|
||||||
--print-found Output sites where the username was found (also if exported as file).
|
|
||||||
--no-color Don't color terminal output
|
|
||||||
--browse, -b Browse to all results on default browser.
|
|
||||||
--local, -l Force the use of the local data.json file.
|
|
||||||
--nsfw Include checking of NSFW sites from default list.
|
|
||||||
--txt Enable creation of a txt file
|
|
||||||
--ignore-exclusions Ignore upstream exclusions (may return more false positives)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Credits
|
|
||||||
|
|
||||||
Thank you to everyone who has contributed to Sherlock! ❤️
|
|
||||||
|
|
||||||
<a href="https://github.com/sherlock-project/sherlock/graphs/contributors">
|
|
||||||
<img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" alt="contributors"/>
|
|
||||||
</a>
|
|
||||||
|
|
||||||
## Star History
|
|
||||||
|
|
||||||
<picture>
|
|
||||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date&theme=dark" />
|
|
||||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date" />
|
|
||||||
<img alt="Sherlock Project Star History Chart" src="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date" />
|
|
||||||
</picture>
|
|
||||||
|
|
||||||
## License
|
|
||||||
|
|
||||||
MIT © Sherlock Project<br/>
|
|
||||||
Creator - [Siddharth Dushantha](https://github.com/sdushantha)
|
|
||||||
|
|
||||||
<!-- Reference Links -->
|
|
||||||
|
|
||||||
[ext_pypi]: https://pypi.org/project/sherlock-project/
|
|
||||||
[ext_brew]: https://formulae.brew.sh/formula/sherlock
|
|
||||||
Binary file not shown.
|
Before Width: | Height: | Size: 440 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 70 KiB |
@@ -1,42 +0,0 @@
|
|||||||
<!-- This README should be a mini version at all times for use on pypi -->
|
|
||||||
|
|
||||||
<p align=center>
|
|
||||||
<br>
|
|
||||||
<a href="https://sherlock-project.github.io/" target="_blank"><img src="https://www.kali.org/tools/sherlock/images/sherlock-logo.svg" width="25%"/></a>
|
|
||||||
<br>
|
|
||||||
<strong><span>Hunt down social media accounts by username across <a href="https://github.com/sherlock-project/sherlock/blob/master/sites.md">400+ social networks</a></span></strong>
|
|
||||||
<br><br>
|
|
||||||
<span>Additional documentation can be found at our <a href="https://github.com/sherlock-project/sherlock/">GitHub repository</a></span>
|
|
||||||
<br>
|
|
||||||
</p>
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
```console
|
|
||||||
$ sherlock --help
|
|
||||||
usage: sherlock [-h] [--version] [--verbose] [--folderoutput FOLDEROUTPUT]
|
|
||||||
[--output OUTPUT] [--tor] [--unique-tor] [--csv] [--xlsx]
|
|
||||||
[--site SITE_NAME] [--proxy PROXY_URL] [--json JSON_FILE]
|
|
||||||
[--timeout TIMEOUT] [--print-all] [--print-found] [--no-color]
|
|
||||||
[--browse] [--local] [--nsfw]
|
|
||||||
USERNAMES [USERNAMES ...]
|
|
||||||
```
|
|
||||||
|
|
||||||
To search for only one user:
|
|
||||||
```bash
|
|
||||||
$ sherlock user123
|
|
||||||
```
|
|
||||||
|
|
||||||
To search for more than one user:
|
|
||||||
```bash
|
|
||||||
$ sherlock user1 user2 user3
|
|
||||||
```
|
|
||||||
<br>
|
|
||||||
|
|
||||||
___
|
|
||||||
|
|
||||||
<br>
|
|
||||||
<p align="center">
|
|
||||||
<img width="70%" height="70%" src="https://user-images.githubusercontent.com/27065646/219638267-a5e11090-aa6e-4e77-87f7-0e95f6ad5978.png"/>
|
|
||||||
</a>
|
|
||||||
</p>
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,40 @@
|
|||||||
|
7Cups
|
||||||
|
APClips
|
||||||
|
Airliners
|
||||||
|
Apple Discussions
|
||||||
|
Archive.org
|
||||||
|
Bandcamp
|
||||||
|
BitBucket
|
||||||
|
Codolio
|
||||||
|
Discord.bio
|
||||||
|
Envato Forum
|
||||||
|
Giphy
|
||||||
|
Hashnode
|
||||||
|
Hubski
|
||||||
|
LessWrong
|
||||||
|
Motherless
|
||||||
|
Patched
|
||||||
|
Pornhub
|
||||||
|
Rarible
|
||||||
|
Realmeye
|
||||||
|
Reddit
|
||||||
|
RocketTube
|
||||||
|
RuneScape
|
||||||
|
Scribd
|
||||||
|
Shelf
|
||||||
|
SlideShare
|
||||||
|
Smule
|
||||||
|
Splice
|
||||||
|
Spotify
|
||||||
|
TryHackMe
|
||||||
|
Velomania
|
||||||
|
Weblate
|
||||||
|
YandexMusic
|
||||||
|
dailykos
|
||||||
|
igromania
|
||||||
|
interpals
|
||||||
|
mercadolivre
|
||||||
|
opennet
|
||||||
|
phpRU
|
||||||
|
svidbook
|
||||||
|
xHamster
|
||||||
@@ -1,68 +0,0 @@
|
|||||||
[build-system]
|
|
||||||
requires = [ "poetry-core>=1.2.0" ]
|
|
||||||
build-backend = "poetry.core.masonry.api"
|
|
||||||
# poetry-core 1.8 not available in .fc39. Can upgrade to 1.8.0 at .fc39 EOL
|
|
||||||
|
|
||||||
[tool.poetry-version-plugin]
|
|
||||||
source = "init"
|
|
||||||
|
|
||||||
[tool.poetry]
|
|
||||||
name = "sherlock-project"
|
|
||||||
version = "0.16.1"
|
|
||||||
description = "Hunt down social media accounts by username across social networks"
|
|
||||||
license = "MIT"
|
|
||||||
authors = [
|
|
||||||
"Siddharth Dushantha <siddharth.dushantha@gmail.com>"
|
|
||||||
]
|
|
||||||
maintainers = [
|
|
||||||
"Paul Pfeister <code@pfeister.dev>",
|
|
||||||
"Matheus Felipe <matheusfelipeog@protonmail.com>",
|
|
||||||
"Sondre Karlsen Dyrnes <sondre@villdyr.no>"
|
|
||||||
]
|
|
||||||
readme = "docs/pyproject/README.md"
|
|
||||||
packages = [ { include = "sherlock_project"} ]
|
|
||||||
keywords = [ "osint", "reconnaissance", "information gathering" ]
|
|
||||||
classifiers = [
|
|
||||||
"Development Status :: 5 - Production/Stable",
|
|
||||||
"Intended Audience :: Developers",
|
|
||||||
"Intended Audience :: Information Technology",
|
|
||||||
"Natural Language :: English",
|
|
||||||
"Operating System :: OS Independent",
|
|
||||||
"Programming Language :: Python :: 3",
|
|
||||||
"Programming Language :: Python :: 3.10",
|
|
||||||
"Programming Language :: Python :: 3.11",
|
|
||||||
"Programming Language :: Python :: 3.12",
|
|
||||||
"Programming Language :: Python :: 3.13",
|
|
||||||
"Topic :: Security"
|
|
||||||
]
|
|
||||||
homepage = "https://sherlockproject.xyz/"
|
|
||||||
repository = "https://github.com/sherlock-project/sherlock"
|
|
||||||
|
|
||||||
|
|
||||||
[tool.poetry.urls]
|
|
||||||
"Bug Tracker" = "https://github.com/sherlock-project/sherlock/issues"
|
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
|
||||||
python = "^3.9"
|
|
||||||
certifi = ">=2019.6.16"
|
|
||||||
colorama = "^0.4.1"
|
|
||||||
PySocks = "^1.7.0"
|
|
||||||
requests = "^2.22.0"
|
|
||||||
requests-futures = "^1.0.0"
|
|
||||||
stem = "^1.8.0"
|
|
||||||
pandas = "^2.2.1"
|
|
||||||
openpyxl = "^3.0.10"
|
|
||||||
tomli = "^2.2.1"
|
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
|
||||||
jsonschema = "^4.0.0"
|
|
||||||
rstr = "^3.2.2"
|
|
||||||
pytest = "^8.4.2"
|
|
||||||
pytest-xdist = "^3.8.0"
|
|
||||||
|
|
||||||
|
|
||||||
[tool.poetry.group.ci.dependencies]
|
|
||||||
defusedxml = "^0.7.1"
|
|
||||||
|
|
||||||
[tool.poetry.scripts]
|
|
||||||
sherlock = 'sherlock_project.sherlock:main'
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
[pytest]
|
|
||||||
addopts = --strict-markers -m "not validate_targets"
|
|
||||||
markers =
|
|
||||||
online: mark tests are requiring internet access.
|
|
||||||
validate_targets: mark tests for sweeping manifest validation (sends many requests).
|
|
||||||
validate_targets_fp: validate_targets, false positive tests only.
|
|
||||||
validate_targets_fn: validate_targets, false negative tests only.
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
""" Sherlock Module
|
|
||||||
|
|
||||||
This module contains the main logic to search for usernames at social
|
|
||||||
networks.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from importlib.metadata import version as pkg_version, PackageNotFoundError
|
|
||||||
import pathlib
|
|
||||||
import tomli
|
|
||||||
|
|
||||||
|
|
||||||
def get_version() -> str:
|
|
||||||
"""Fetch the version number of the installed package."""
|
|
||||||
try:
|
|
||||||
return pkg_version("sherlock_project")
|
|
||||||
except PackageNotFoundError:
|
|
||||||
pyproject_path: pathlib.Path = pathlib.Path(__file__).resolve().parent.parent / "pyproject.toml"
|
|
||||||
with pyproject_path.open("rb") as f:
|
|
||||||
pyproject_data = tomli.load(f)
|
|
||||||
return pyproject_data["tool"]["poetry"]["version"]
|
|
||||||
|
|
||||||
# This variable is only used to check for ImportErrors induced by users running as script rather than as module or package
|
|
||||||
import_error_test_var = None
|
|
||||||
|
|
||||||
__shortname__ = "Sherlock"
|
|
||||||
__longname__ = "Sherlock: Find Usernames Across Social Networks"
|
|
||||||
__version__ = get_version()
|
|
||||||
|
|
||||||
forge_api_latest_release = "https://api.github.com/repos/sherlock-project/sherlock/releases/latest"
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
#! /usr/bin/env python3
|
|
||||||
|
|
||||||
"""
|
|
||||||
Sherlock: Find Usernames Across Social Networks Module
|
|
||||||
|
|
||||||
This module contains the main logic to search for usernames at social
|
|
||||||
networks.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# Check if the user is using the correct version of Python
|
|
||||||
python_version = sys.version.split()[0]
|
|
||||||
|
|
||||||
if sys.version_info < (3, 9):
|
|
||||||
print(f"Sherlock requires Python 3.9+\nYou are using Python {python_version}, which is not supported by Sherlock.")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
from sherlock_project import sherlock
|
|
||||||
sherlock.main()
|
|
||||||
@@ -1,279 +0,0 @@
|
|||||||
"""Sherlock Notify Module
|
|
||||||
|
|
||||||
This module defines the objects for notifying the caller about the
|
|
||||||
results of queries.
|
|
||||||
"""
|
|
||||||
from sherlock_project.result import QueryStatus
|
|
||||||
from colorama import Fore, Style
|
|
||||||
import webbrowser
|
|
||||||
|
|
||||||
# Global variable to count the number of results.
|
|
||||||
globvar = 0
|
|
||||||
|
|
||||||
|
|
||||||
class QueryNotify:
|
|
||||||
"""Query Notify Object.
|
|
||||||
|
|
||||||
Base class that describes methods available to notify the results of
|
|
||||||
a query.
|
|
||||||
It is intended that other classes inherit from this base class and
|
|
||||||
override the methods to implement specific functionality.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, result=None):
|
|
||||||
"""Create Query Notify Object.
|
|
||||||
|
|
||||||
Contains information about a specific method of notifying the results
|
|
||||||
of a query.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
result -- Object of type QueryResult() containing
|
|
||||||
results for this query.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nothing.
|
|
||||||
"""
|
|
||||||
|
|
||||||
self.result = result
|
|
||||||
|
|
||||||
|
|
||||||
def start(self, message=None):
|
|
||||||
"""Notify Start.
|
|
||||||
|
|
||||||
Notify method for start of query. This method will be called before
|
|
||||||
any queries are performed. This method will typically be
|
|
||||||
overridden by higher level classes that will inherit from it.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
message -- Object that is used to give context to start
|
|
||||||
of query.
|
|
||||||
Default is None.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nothing.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def update(self, result):
|
|
||||||
"""Notify Update.
|
|
||||||
|
|
||||||
Notify method for query result. This method will typically be
|
|
||||||
overridden by higher level classes that will inherit from it.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
result -- Object of type QueryResult() containing
|
|
||||||
results for this query.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nothing.
|
|
||||||
"""
|
|
||||||
|
|
||||||
self.result = result
|
|
||||||
|
|
||||||
|
|
||||||
def finish(self, message=None):
|
|
||||||
"""Notify Finish.
|
|
||||||
|
|
||||||
Notify method for finish of query. This method will be called after
|
|
||||||
all queries have been performed. This method will typically be
|
|
||||||
overridden by higher level classes that will inherit from it.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
message -- Object that is used to give context to start
|
|
||||||
of query.
|
|
||||||
Default is None.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nothing.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
"""Convert Object To String.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nicely formatted string to get information about this object.
|
|
||||||
"""
|
|
||||||
return str(self.result)
|
|
||||||
|
|
||||||
|
|
||||||
class QueryNotifyPrint(QueryNotify):
|
|
||||||
"""Query Notify Print Object.
|
|
||||||
|
|
||||||
Query notify class that prints results.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, result=None, verbose=False, print_all=False, browse=False):
|
|
||||||
"""Create Query Notify Print Object.
|
|
||||||
|
|
||||||
Contains information about a specific method of notifying the results
|
|
||||||
of a query.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
result -- Object of type QueryResult() containing
|
|
||||||
results for this query.
|
|
||||||
verbose -- Boolean indicating whether to give verbose output.
|
|
||||||
print_all -- Boolean indicating whether to only print all sites, including not found.
|
|
||||||
browse -- Boolean indicating whether to open found sites in a web browser.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nothing.
|
|
||||||
"""
|
|
||||||
|
|
||||||
super().__init__(result)
|
|
||||||
self.verbose = verbose
|
|
||||||
self.print_all = print_all
|
|
||||||
self.browse = browse
|
|
||||||
|
|
||||||
|
|
||||||
def start(self, message):
|
|
||||||
"""Notify Start.
|
|
||||||
|
|
||||||
Will print the title to the standard output.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
message -- String containing username that the series
|
|
||||||
of queries are about.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nothing.
|
|
||||||
"""
|
|
||||||
|
|
||||||
title = "Checking username"
|
|
||||||
|
|
||||||
print(Style.BRIGHT + Fore.GREEN + "[" +
|
|
||||||
Fore.YELLOW + "*" +
|
|
||||||
Fore.GREEN + f"] {title}" +
|
|
||||||
Fore.WHITE + f" {message}" +
|
|
||||||
Fore.GREEN + " on:")
|
|
||||||
# An empty line between first line and the result(more clear output)
|
|
||||||
print('\r')
|
|
||||||
|
|
||||||
|
|
||||||
def countResults(self):
|
|
||||||
"""This function counts the number of results. Every time the function is called,
|
|
||||||
the number of results is increasing.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
The number of results by the time we call the function.
|
|
||||||
"""
|
|
||||||
global globvar
|
|
||||||
globvar += 1
|
|
||||||
return globvar
|
|
||||||
|
|
||||||
def update(self, result):
|
|
||||||
"""Notify Update.
|
|
||||||
|
|
||||||
Will print the query result to the standard output.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
result -- Object of type QueryResult() containing
|
|
||||||
results for this query.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nothing.
|
|
||||||
"""
|
|
||||||
self.result = result
|
|
||||||
|
|
||||||
response_time_text = ""
|
|
||||||
if self.result.query_time is not None and self.verbose is True:
|
|
||||||
response_time_text = f" [{round(self.result.query_time * 1000)}ms]"
|
|
||||||
|
|
||||||
# Output to the terminal is desired.
|
|
||||||
if result.status == QueryStatus.CLAIMED:
|
|
||||||
self.countResults()
|
|
||||||
print(Style.BRIGHT + Fore.WHITE + "[" +
|
|
||||||
Fore.GREEN + "+" +
|
|
||||||
Fore.WHITE + "]" +
|
|
||||||
response_time_text +
|
|
||||||
Fore.GREEN +
|
|
||||||
f" {self.result.site_name}: " +
|
|
||||||
Style.RESET_ALL +
|
|
||||||
f"{self.result.site_url_user}")
|
|
||||||
if self.browse:
|
|
||||||
webbrowser.open(self.result.site_url_user, 2)
|
|
||||||
|
|
||||||
elif result.status == QueryStatus.AVAILABLE:
|
|
||||||
if self.print_all:
|
|
||||||
print(Style.BRIGHT + Fore.WHITE + "[" +
|
|
||||||
Fore.RED + "-" +
|
|
||||||
Fore.WHITE + "]" +
|
|
||||||
response_time_text +
|
|
||||||
Fore.GREEN + f" {self.result.site_name}:" +
|
|
||||||
Fore.YELLOW + " Not Found!")
|
|
||||||
|
|
||||||
elif result.status == QueryStatus.UNKNOWN:
|
|
||||||
if self.print_all:
|
|
||||||
print(Style.BRIGHT + Fore.WHITE + "[" +
|
|
||||||
Fore.RED + "-" +
|
|
||||||
Fore.WHITE + "]" +
|
|
||||||
Fore.GREEN + f" {self.result.site_name}:" +
|
|
||||||
Fore.RED + f" {self.result.context}" +
|
|
||||||
Fore.YELLOW + " ")
|
|
||||||
|
|
||||||
elif result.status == QueryStatus.ILLEGAL:
|
|
||||||
if self.print_all:
|
|
||||||
msg = "Illegal Username Format For This Site!"
|
|
||||||
print(Style.BRIGHT + Fore.WHITE + "[" +
|
|
||||||
Fore.RED + "-" +
|
|
||||||
Fore.WHITE + "]" +
|
|
||||||
Fore.GREEN + f" {self.result.site_name}:" +
|
|
||||||
Fore.YELLOW + f" {msg}")
|
|
||||||
|
|
||||||
elif result.status == QueryStatus.WAF:
|
|
||||||
if self.print_all:
|
|
||||||
print(Style.BRIGHT + Fore.WHITE + "[" +
|
|
||||||
Fore.RED + "-" +
|
|
||||||
Fore.WHITE + "]" +
|
|
||||||
Fore.GREEN + f" {self.result.site_name}:" +
|
|
||||||
Fore.RED + " Blocked by bot detection" +
|
|
||||||
Fore.YELLOW + " (proxy may help)")
|
|
||||||
|
|
||||||
else:
|
|
||||||
# It should be impossible to ever get here...
|
|
||||||
raise ValueError(
|
|
||||||
f"Unknown Query Status '{result.status}' for site '{self.result.site_name}'"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def finish(self, message="The processing has been finished."):
|
|
||||||
"""Notify Finish.
|
|
||||||
Will print the last line to the standard output.
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
message -- The 2 last phrases.
|
|
||||||
Return Value:
|
|
||||||
Nothing.
|
|
||||||
"""
|
|
||||||
NumberOfResults = self.countResults() - 1
|
|
||||||
|
|
||||||
print(Style.BRIGHT + Fore.GREEN + "[" +
|
|
||||||
Fore.YELLOW + "*" +
|
|
||||||
Fore.GREEN + "] Search completed with" +
|
|
||||||
Fore.WHITE + f" {NumberOfResults} " +
|
|
||||||
Fore.GREEN + "results" + Style.RESET_ALL
|
|
||||||
)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
"""Convert Object To String.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nicely formatted string to get information about this object.
|
|
||||||
"""
|
|
||||||
return str(self.result)
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,149 +0,0 @@
|
|||||||
{
|
|
||||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
||||||
"title": "Sherlock Target Manifest",
|
|
||||||
"description": "Social media targets to probe for the existence of known usernames",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"$schema": { "type": "string" }
|
|
||||||
},
|
|
||||||
"patternProperties": {
|
|
||||||
"^(?!\\$).*?$": {
|
|
||||||
"type": "object",
|
|
||||||
"description": "Target name and associated information (key should be human readable name)",
|
|
||||||
"required": ["url", "urlMain", "errorType", "username_claimed"],
|
|
||||||
"properties": {
|
|
||||||
"url": { "type": "string" },
|
|
||||||
"urlMain": { "type": "string" },
|
|
||||||
"urlProbe": { "type": "string" },
|
|
||||||
"username_claimed": { "type": "string" },
|
|
||||||
"regexCheck": { "type": "string" },
|
|
||||||
"isNSFW": { "type": "boolean" },
|
|
||||||
"headers": { "type": "object" },
|
|
||||||
"request_payload": { "type": "object" },
|
|
||||||
"__comment__": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
|
|
||||||
},
|
|
||||||
"tags": {
|
|
||||||
"oneOf": [
|
|
||||||
{ "$ref": "#/$defs/tag" },
|
|
||||||
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"request_method": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": ["GET", "POST", "HEAD", "PUT"]
|
|
||||||
},
|
|
||||||
"errorType": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"type": "string",
|
|
||||||
"enum": ["message", "response_url", "status_code"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": ["message", "response_url", "status_code"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"errorMsg": {
|
|
||||||
"oneOf": [
|
|
||||||
{ "type": "string" },
|
|
||||||
{ "type": "array", "items": { "type": "string" } }
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"errorCode": {
|
|
||||||
"oneOf": [
|
|
||||||
{ "type": "integer" },
|
|
||||||
{ "type": "array", "items": { "type": "integer" } }
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"errorUrl": { "type": "string" },
|
|
||||||
"response_url": { "type": "string" }
|
|
||||||
},
|
|
||||||
"dependencies": {
|
|
||||||
"errorMsg": {
|
|
||||||
"oneOf": [
|
|
||||||
{ "properties": { "errorType": { "const": "message" } } },
|
|
||||||
{
|
|
||||||
"properties": {
|
|
||||||
"errorType": {
|
|
||||||
"type": "array",
|
|
||||||
"contains": { "const": "message" }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"errorUrl": {
|
|
||||||
"oneOf": [
|
|
||||||
{ "properties": { "errorType": { "const": "response_url" } } },
|
|
||||||
{
|
|
||||||
"properties": {
|
|
||||||
"errorType": {
|
|
||||||
"type": "array",
|
|
||||||
"contains": { "const": "response_url" }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"errorCode": {
|
|
||||||
"oneOf": [
|
|
||||||
{ "properties": { "errorType": { "const": "status_code" } } },
|
|
||||||
{
|
|
||||||
"properties": {
|
|
||||||
"errorType": {
|
|
||||||
"type": "array",
|
|
||||||
"contains": { "const": "status_code" }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"allOf": [
|
|
||||||
{
|
|
||||||
"if": {
|
|
||||||
"anyOf": [
|
|
||||||
{ "properties": { "errorType": { "const": "message" } } },
|
|
||||||
{
|
|
||||||
"properties": {
|
|
||||||
"errorType": {
|
|
||||||
"type": "array",
|
|
||||||
"contains": { "const": "message" }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"then": { "required": ["errorMsg"] }
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"if": {
|
|
||||||
"anyOf": [
|
|
||||||
{ "properties": { "errorType": { "const": "response_url" } } },
|
|
||||||
{
|
|
||||||
"properties": {
|
|
||||||
"errorType": {
|
|
||||||
"type": "array",
|
|
||||||
"contains": { "const": "response_url" }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"then": { "required": ["errorUrl"] }
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"additionalProperties": false
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"$defs": {
|
|
||||||
"tag": { "type": "string", "enum": ["adult", "gaming"] }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,89 +0,0 @@
|
|||||||
"""Sherlock Result Module
|
|
||||||
|
|
||||||
This module defines various objects for recording the results of queries.
|
|
||||||
"""
|
|
||||||
from enum import Enum
|
|
||||||
|
|
||||||
|
|
||||||
class QueryStatus(Enum):
|
|
||||||
"""Query Status Enumeration.
|
|
||||||
|
|
||||||
Describes status of query about a given username.
|
|
||||||
"""
|
|
||||||
CLAIMED = "Claimed" # Username Detected
|
|
||||||
AVAILABLE = "Available" # Username Not Detected
|
|
||||||
UNKNOWN = "Unknown" # Error Occurred While Trying To Detect Username
|
|
||||||
ILLEGAL = "Illegal" # Username Not Allowable For This Site
|
|
||||||
WAF = "WAF" # Request blocked by WAF (i.e. Cloudflare)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
"""Convert Object To String.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nicely formatted string to get information about this object.
|
|
||||||
"""
|
|
||||||
return self.value
|
|
||||||
|
|
||||||
class QueryResult():
|
|
||||||
"""Query Result Object.
|
|
||||||
|
|
||||||
Describes result of query about a given username.
|
|
||||||
"""
|
|
||||||
def __init__(self, username, site_name, site_url_user, status,
|
|
||||||
query_time=None, context=None):
|
|
||||||
"""Create Query Result Object.
|
|
||||||
|
|
||||||
Contains information about a specific method of detecting usernames on
|
|
||||||
a given type of web sites.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
username -- String indicating username that query result
|
|
||||||
was about.
|
|
||||||
site_name -- String which identifies site.
|
|
||||||
site_url_user -- String containing URL for username on site.
|
|
||||||
NOTE: The site may or may not exist: this
|
|
||||||
just indicates what the name would
|
|
||||||
be, if it existed.
|
|
||||||
status -- Enumeration of type QueryStatus() indicating
|
|
||||||
the status of the query.
|
|
||||||
query_time -- Time (in seconds) required to perform query.
|
|
||||||
Default of None.
|
|
||||||
context -- String indicating any additional context
|
|
||||||
about the query. For example, if there was
|
|
||||||
an error, this might indicate the type of
|
|
||||||
error that occurred.
|
|
||||||
Default of None.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nothing.
|
|
||||||
"""
|
|
||||||
|
|
||||||
self.username = username
|
|
||||||
self.site_name = site_name
|
|
||||||
self.site_url_user = site_url_user
|
|
||||||
self.status = status
|
|
||||||
self.query_time = query_time
|
|
||||||
self.context = context
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
"""Convert Object To String.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nicely formatted string to get information about this object.
|
|
||||||
"""
|
|
||||||
status = str(self.status)
|
|
||||||
if self.context is not None:
|
|
||||||
# There is extra context information available about the results.
|
|
||||||
# Append it to the normal response text.
|
|
||||||
status += f" ({self.context})"
|
|
||||||
|
|
||||||
return status
|
|
||||||
@@ -1,935 +0,0 @@
|
|||||||
#! /usr/bin/env python3
|
|
||||||
|
|
||||||
"""
|
|
||||||
Sherlock: Find Usernames Across Social Networks Module
|
|
||||||
|
|
||||||
This module contains the main logic to search for usernames at social
|
|
||||||
networks.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
try:
|
|
||||||
from sherlock_project.__init__ import import_error_test_var # noqa: F401
|
|
||||||
except ImportError:
|
|
||||||
print("Did you run Sherlock with `python3 sherlock/sherlock.py ...`?")
|
|
||||||
print("This is an outdated method. Please see https://sherlockproject.xyz/installation for up to date instructions.")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
import csv
|
|
||||||
import signal
|
|
||||||
import pandas as pd
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
|
||||||
from json import loads as json_loads
|
|
||||||
from time import monotonic
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import requests
|
|
||||||
from requests_futures.sessions import FuturesSession
|
|
||||||
|
|
||||||
from sherlock_project.__init__ import (
|
|
||||||
__longname__,
|
|
||||||
__shortname__,
|
|
||||||
__version__,
|
|
||||||
forge_api_latest_release,
|
|
||||||
)
|
|
||||||
|
|
||||||
from sherlock_project.result import QueryStatus
|
|
||||||
from sherlock_project.result import QueryResult
|
|
||||||
from sherlock_project.notify import QueryNotify
|
|
||||||
from sherlock_project.notify import QueryNotifyPrint
|
|
||||||
from sherlock_project.sites import SitesInformation
|
|
||||||
from colorama import init
|
|
||||||
from argparse import ArgumentTypeError
|
|
||||||
|
|
||||||
|
|
||||||
class SherlockFuturesSession(FuturesSession):
|
|
||||||
def request(self, method, url, hooks=None, *args, **kwargs):
|
|
||||||
"""Request URL.
|
|
||||||
|
|
||||||
This extends the FuturesSession request method to calculate a response
|
|
||||||
time metric to each request.
|
|
||||||
|
|
||||||
It is taken (almost) directly from the following Stack Overflow answer:
|
|
||||||
https://github.com/ross/requests-futures#working-in-the-background
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
method -- String containing method desired for request.
|
|
||||||
url -- String containing URL for request.
|
|
||||||
hooks -- Dictionary containing hooks to execute after
|
|
||||||
request finishes.
|
|
||||||
args -- Arguments.
|
|
||||||
kwargs -- Keyword arguments.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Request object.
|
|
||||||
"""
|
|
||||||
# Record the start time for the request.
|
|
||||||
if hooks is None:
|
|
||||||
hooks = {}
|
|
||||||
start = monotonic()
|
|
||||||
|
|
||||||
def response_time(resp, *args, **kwargs):
|
|
||||||
"""Response Time Hook.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
resp -- Response object.
|
|
||||||
args -- Arguments.
|
|
||||||
kwargs -- Keyword arguments.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nothing.
|
|
||||||
"""
|
|
||||||
resp.elapsed = monotonic() - start
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
# Install hook to execute when response completes.
|
|
||||||
# Make sure that the time measurement hook is first, so we will not
|
|
||||||
# track any later hook's execution time.
|
|
||||||
try:
|
|
||||||
if isinstance(hooks["response"], list):
|
|
||||||
hooks["response"].insert(0, response_time)
|
|
||||||
elif isinstance(hooks["response"], tuple):
|
|
||||||
# Convert tuple to list and insert time measurement hook first.
|
|
||||||
hooks["response"] = list(hooks["response"])
|
|
||||||
hooks["response"].insert(0, response_time)
|
|
||||||
else:
|
|
||||||
# Must have previously contained a single hook function,
|
|
||||||
# so convert to list.
|
|
||||||
hooks["response"] = [response_time, hooks["response"]]
|
|
||||||
except KeyError:
|
|
||||||
# No response hook was already defined, so install it ourselves.
|
|
||||||
hooks["response"] = [response_time]
|
|
||||||
|
|
||||||
return super(SherlockFuturesSession, self).request(
|
|
||||||
method, url, hooks=hooks, *args, **kwargs
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_response(request_future, error_type, social_network):
|
|
||||||
# Default for Response object if some failure occurs.
|
|
||||||
response = None
|
|
||||||
|
|
||||||
error_context = "General Unknown Error"
|
|
||||||
exception_text = None
|
|
||||||
try:
|
|
||||||
response = request_future.result()
|
|
||||||
if response.status_code:
|
|
||||||
# Status code exists in response object
|
|
||||||
error_context = None
|
|
||||||
except requests.exceptions.HTTPError as errh:
|
|
||||||
error_context = "HTTP Error"
|
|
||||||
exception_text = str(errh)
|
|
||||||
except requests.exceptions.ProxyError as errp:
|
|
||||||
error_context = "Proxy Error"
|
|
||||||
exception_text = str(errp)
|
|
||||||
except requests.exceptions.ConnectionError as errc:
|
|
||||||
error_context = "Error Connecting"
|
|
||||||
exception_text = str(errc)
|
|
||||||
except requests.exceptions.Timeout as errt:
|
|
||||||
error_context = "Timeout Error"
|
|
||||||
exception_text = str(errt)
|
|
||||||
except requests.exceptions.RequestException as err:
|
|
||||||
error_context = "Unknown Error"
|
|
||||||
exception_text = str(err)
|
|
||||||
except UnicodeError as err:
|
|
||||||
error_context = "Encoding Error"
|
|
||||||
exception_text = str(err)
|
|
||||||
|
|
||||||
return response, error_context, exception_text
|
|
||||||
|
|
||||||
|
|
||||||
def interpolate_string(input_object, username):
|
|
||||||
if isinstance(input_object, str):
|
|
||||||
return input_object.replace("{}", username)
|
|
||||||
elif isinstance(input_object, dict):
|
|
||||||
return {k: interpolate_string(v, username) for k, v in input_object.items()}
|
|
||||||
elif isinstance(input_object, list):
|
|
||||||
return [interpolate_string(i, username) for i in input_object]
|
|
||||||
return input_object
|
|
||||||
|
|
||||||
|
|
||||||
def check_for_parameter(username):
|
|
||||||
"""checks if {?} exists in the username
|
|
||||||
if exist it means that sherlock is looking for more multiple username"""
|
|
||||||
return "{?}" in username
|
|
||||||
|
|
||||||
|
|
||||||
checksymbols = ["_", "-", "."]
|
|
||||||
|
|
||||||
|
|
||||||
def multiple_usernames(username):
|
|
||||||
"""replace the parameter with with symbols and return a list of usernames"""
|
|
||||||
allUsernames = []
|
|
||||||
for i in checksymbols:
|
|
||||||
allUsernames.append(username.replace("{?}", i))
|
|
||||||
return allUsernames
|
|
||||||
|
|
||||||
|
|
||||||
def sherlock(
|
|
||||||
username: str,
|
|
||||||
site_data: dict[str, dict[str, str]],
|
|
||||||
query_notify: QueryNotify,
|
|
||||||
dump_response: bool = False,
|
|
||||||
proxy: Optional[str] = None,
|
|
||||||
timeout: int = 60,
|
|
||||||
) -> dict[str, dict[str, str | QueryResult]]:
|
|
||||||
"""Run Sherlock Analysis.
|
|
||||||
|
|
||||||
Checks for existence of username on various social media sites.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
username -- String indicating username that report
|
|
||||||
should be created against.
|
|
||||||
site_data -- Dictionary containing all of the site data.
|
|
||||||
query_notify -- Object with base type of QueryNotify().
|
|
||||||
This will be used to notify the caller about
|
|
||||||
query results.
|
|
||||||
proxy -- String indicating the proxy URL
|
|
||||||
timeout -- Time in seconds to wait before timing out request.
|
|
||||||
Default is 60 seconds.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Dictionary containing results from report. Key of dictionary is the name
|
|
||||||
of the social network site, and the value is another dictionary with
|
|
||||||
the following keys:
|
|
||||||
url_main: URL of main site.
|
|
||||||
url_user: URL of user on site (if account exists).
|
|
||||||
status: QueryResult() object indicating results of test for
|
|
||||||
account existence.
|
|
||||||
http_status: HTTP status code of query which checked for existence on
|
|
||||||
site.
|
|
||||||
response_text: Text that came back from request. May be None if
|
|
||||||
there was an HTTP error when checking for existence.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Notify caller that we are starting the query.
|
|
||||||
query_notify.start(username)
|
|
||||||
|
|
||||||
# Normal requests
|
|
||||||
underlying_session = requests.session()
|
|
||||||
|
|
||||||
# Limit number of workers to 20.
|
|
||||||
# This is probably vastly overkill.
|
|
||||||
if len(site_data) >= 20:
|
|
||||||
max_workers = 20
|
|
||||||
else:
|
|
||||||
max_workers = len(site_data)
|
|
||||||
|
|
||||||
# Create multi-threaded session for all requests.
|
|
||||||
session = SherlockFuturesSession(
|
|
||||||
max_workers=max_workers, session=underlying_session
|
|
||||||
)
|
|
||||||
|
|
||||||
# Results from analysis of all sites
|
|
||||||
results_total = {}
|
|
||||||
|
|
||||||
# First create futures for all requests. This allows for the requests to run in parallel
|
|
||||||
for social_network, net_info in site_data.items():
|
|
||||||
# Results from analysis of this specific site
|
|
||||||
results_site = {"url_main": net_info.get("urlMain")}
|
|
||||||
|
|
||||||
# Record URL of main site
|
|
||||||
|
|
||||||
# A user agent is needed because some sites don't return the correct
|
|
||||||
# information since they think that we are bots (Which we actually are...)
|
|
||||||
headers = {
|
|
||||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:129.0) Gecko/20100101 Firefox/129.0",
|
|
||||||
}
|
|
||||||
|
|
||||||
if "headers" in net_info:
|
|
||||||
# Override/append any extra headers required by a given site.
|
|
||||||
headers.update(net_info["headers"])
|
|
||||||
|
|
||||||
# URL of user on site (if it exists)
|
|
||||||
url = interpolate_string(net_info["url"], username.replace(' ', '%20'))
|
|
||||||
|
|
||||||
# Don't make request if username is invalid for the site
|
|
||||||
regex_check = net_info.get("regexCheck")
|
|
||||||
if regex_check and re.search(regex_check, username) is None:
|
|
||||||
# No need to do the check at the site: this username is not allowed.
|
|
||||||
results_site["status"] = QueryResult(
|
|
||||||
username, social_network, url, QueryStatus.ILLEGAL
|
|
||||||
)
|
|
||||||
results_site["url_user"] = ""
|
|
||||||
results_site["http_status"] = ""
|
|
||||||
results_site["response_text"] = ""
|
|
||||||
query_notify.update(results_site["status"])
|
|
||||||
else:
|
|
||||||
# URL of user on site (if it exists)
|
|
||||||
results_site["url_user"] = url
|
|
||||||
url_probe = net_info.get("urlProbe")
|
|
||||||
request_method = net_info.get("request_method")
|
|
||||||
request_payload = net_info.get("request_payload")
|
|
||||||
request = None
|
|
||||||
|
|
||||||
if request_method is not None:
|
|
||||||
if request_method == "GET":
|
|
||||||
request = session.get
|
|
||||||
elif request_method == "HEAD":
|
|
||||||
request = session.head
|
|
||||||
elif request_method == "POST":
|
|
||||||
request = session.post
|
|
||||||
elif request_method == "PUT":
|
|
||||||
request = session.put
|
|
||||||
else:
|
|
||||||
raise RuntimeError(f"Unsupported request_method for {url}")
|
|
||||||
|
|
||||||
if request_payload is not None:
|
|
||||||
request_payload = interpolate_string(request_payload, username)
|
|
||||||
|
|
||||||
if url_probe is None:
|
|
||||||
# Probe URL is normal one seen by people out on the web.
|
|
||||||
url_probe = url
|
|
||||||
else:
|
|
||||||
# There is a special URL for probing existence separate
|
|
||||||
# from where the user profile normally can be found.
|
|
||||||
url_probe = interpolate_string(url_probe, username)
|
|
||||||
|
|
||||||
if request is None:
|
|
||||||
if net_info["errorType"] == "status_code":
|
|
||||||
# In most cases when we are detecting by status code,
|
|
||||||
# it is not necessary to get the entire body: we can
|
|
||||||
# detect fine with just the HEAD response.
|
|
||||||
request = session.head
|
|
||||||
else:
|
|
||||||
# Either this detect method needs the content associated
|
|
||||||
# with the GET response, or this specific website will
|
|
||||||
# not respond properly unless we request the whole page.
|
|
||||||
request = session.get
|
|
||||||
|
|
||||||
if net_info["errorType"] == "response_url":
|
|
||||||
# Site forwards request to a different URL if username not
|
|
||||||
# found. Disallow the redirect so we can capture the
|
|
||||||
# http status from the original URL request.
|
|
||||||
allow_redirects = False
|
|
||||||
else:
|
|
||||||
# Allow whatever redirect that the site wants to do.
|
|
||||||
# The final result of the request will be what is available.
|
|
||||||
allow_redirects = True
|
|
||||||
|
|
||||||
# This future starts running the request in a new thread, doesn't block the main thread
|
|
||||||
if proxy is not None:
|
|
||||||
proxies = {"http": proxy, "https": proxy}
|
|
||||||
future = request(
|
|
||||||
url=url_probe,
|
|
||||||
headers=headers,
|
|
||||||
proxies=proxies,
|
|
||||||
allow_redirects=allow_redirects,
|
|
||||||
timeout=timeout,
|
|
||||||
json=request_payload,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
future = request(
|
|
||||||
url=url_probe,
|
|
||||||
headers=headers,
|
|
||||||
allow_redirects=allow_redirects,
|
|
||||||
timeout=timeout,
|
|
||||||
json=request_payload,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Store future in data for access later
|
|
||||||
net_info["request_future"] = future
|
|
||||||
|
|
||||||
# Add this site's results into final dictionary with all the other results.
|
|
||||||
results_total[social_network] = results_site
|
|
||||||
|
|
||||||
# Open the file containing account links
|
|
||||||
for social_network, net_info in site_data.items():
|
|
||||||
# Retrieve results again
|
|
||||||
results_site = results_total.get(social_network)
|
|
||||||
|
|
||||||
# Retrieve other site information again
|
|
||||||
url = results_site.get("url_user")
|
|
||||||
status = results_site.get("status")
|
|
||||||
if status is not None:
|
|
||||||
# We have already determined the user doesn't exist here
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Get the expected error type
|
|
||||||
error_type = net_info["errorType"]
|
|
||||||
if isinstance(error_type, str):
|
|
||||||
error_type: list[str] = [error_type]
|
|
||||||
|
|
||||||
# Retrieve future and ensure it has finished
|
|
||||||
future = net_info["request_future"]
|
|
||||||
r, error_text, exception_text = get_response(
|
|
||||||
request_future=future, error_type=error_type, social_network=social_network
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get response time for response of our request.
|
|
||||||
try:
|
|
||||||
response_time = r.elapsed
|
|
||||||
except AttributeError:
|
|
||||||
response_time = None
|
|
||||||
|
|
||||||
# Attempt to get request information
|
|
||||||
try:
|
|
||||||
http_status = r.status_code
|
|
||||||
except Exception:
|
|
||||||
http_status = "?"
|
|
||||||
try:
|
|
||||||
response_text = r.text.encode(r.encoding or "UTF-8")
|
|
||||||
except Exception:
|
|
||||||
response_text = ""
|
|
||||||
|
|
||||||
query_status = QueryStatus.UNKNOWN
|
|
||||||
error_context = None
|
|
||||||
|
|
||||||
# As WAFs advance and evolve, they will occasionally block Sherlock and
|
|
||||||
# lead to false positives and negatives. Fingerprints should be added
|
|
||||||
# here to filter results that fail to bypass WAFs. Fingerprints should
|
|
||||||
# be highly targetted. Comment at the end of each fingerprint to
|
|
||||||
# indicate target and date fingerprinted.
|
|
||||||
WAFHitMsgs = [
|
|
||||||
r'.loading-spinner{visibility:hidden}body.no-js .challenge-running{display:none}body.dark{background-color:#222;color:#d9d9d9}body.dark a{color:#fff}body.dark a:hover{color:#ee730a;text-decoration:underline}body.dark .lds-ring div{border-color:#999 transparent transparent}body.dark .font-red{color:#b20f03}body.dark', # 2024-05-13 Cloudflare
|
|
||||||
r'<span id="challenge-error-text">', # 2024-11-11 Cloudflare error page
|
|
||||||
r'AwsWafIntegration.forceRefreshToken', # 2024-11-11 Cloudfront (AWS)
|
|
||||||
r'{return l.onPageView}}),Object.defineProperty(r,"perimeterxIdentifiers",{enumerable:' # 2024-04-09 PerimeterX / Human Security
|
|
||||||
]
|
|
||||||
|
|
||||||
if error_text is not None:
|
|
||||||
error_context = error_text
|
|
||||||
|
|
||||||
elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
|
|
||||||
query_status = QueryStatus.WAF
|
|
||||||
|
|
||||||
else:
|
|
||||||
if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
|
|
||||||
error_context = f"Unknown error type '{error_type}' for {social_network}"
|
|
||||||
query_status = QueryStatus.UNKNOWN
|
|
||||||
else:
|
|
||||||
if "message" in error_type:
|
|
||||||
# error_flag True denotes no error found in the HTML
|
|
||||||
# error_flag False denotes error found in the HTML
|
|
||||||
error_flag = True
|
|
||||||
errors = net_info.get("errorMsg")
|
|
||||||
# errors will hold the error message
|
|
||||||
# it can be string or list
|
|
||||||
# by isinstance method we can detect that
|
|
||||||
# and handle the case for strings as normal procedure
|
|
||||||
# and if its list we can iterate the errors
|
|
||||||
if isinstance(errors, str):
|
|
||||||
# Checks if the error message is in the HTML
|
|
||||||
# if error is present we will set flag to False
|
|
||||||
if errors in r.text:
|
|
||||||
error_flag = False
|
|
||||||
else:
|
|
||||||
# If it's list, it will iterate all the error message
|
|
||||||
for error in errors:
|
|
||||||
if error in r.text:
|
|
||||||
error_flag = False
|
|
||||||
break
|
|
||||||
if error_flag:
|
|
||||||
query_status = QueryStatus.CLAIMED
|
|
||||||
else:
|
|
||||||
query_status = QueryStatus.AVAILABLE
|
|
||||||
|
|
||||||
if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
|
|
||||||
error_codes = net_info.get("errorCode")
|
|
||||||
query_status = QueryStatus.CLAIMED
|
|
||||||
|
|
||||||
# Type consistency, allowing for both singlets and lists in manifest
|
|
||||||
if isinstance(error_codes, int):
|
|
||||||
error_codes = [error_codes]
|
|
||||||
|
|
||||||
if error_codes is not None and r.status_code in error_codes:
|
|
||||||
query_status = QueryStatus.AVAILABLE
|
|
||||||
elif r.status_code >= 300 or r.status_code < 200:
|
|
||||||
query_status = QueryStatus.AVAILABLE
|
|
||||||
|
|
||||||
if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
|
|
||||||
# For this detection method, we have turned off the redirect.
|
|
||||||
# So, there is no need to check the response URL: it will always
|
|
||||||
# match the request. Instead, we will ensure that the response
|
|
||||||
# code indicates that the request was successful (i.e. no 404, or
|
|
||||||
# forward to some odd redirect).
|
|
||||||
if 200 <= r.status_code < 300:
|
|
||||||
query_status = QueryStatus.CLAIMED
|
|
||||||
else:
|
|
||||||
query_status = QueryStatus.AVAILABLE
|
|
||||||
|
|
||||||
if dump_response:
|
|
||||||
print("+++++++++++++++++++++")
|
|
||||||
print(f"TARGET NAME : {social_network}")
|
|
||||||
print(f"USERNAME : {username}")
|
|
||||||
print(f"TARGET URL : {url}")
|
|
||||||
print(f"TEST METHOD : {error_type}")
|
|
||||||
try:
|
|
||||||
print(f"STATUS CODES : {net_info['errorCode']}")
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
print("Results...")
|
|
||||||
try:
|
|
||||||
print(f"RESPONSE CODE : {r.status_code}")
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
print(f"ERROR TEXT : {net_info['errorMsg']}")
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
print(">>>>> BEGIN RESPONSE TEXT")
|
|
||||||
try:
|
|
||||||
print(r.text)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
print("<<<<< END RESPONSE TEXT")
|
|
||||||
print("VERDICT : " + str(query_status))
|
|
||||||
print("+++++++++++++++++++++")
|
|
||||||
|
|
||||||
# Notify caller about results of query.
|
|
||||||
result: QueryResult = QueryResult(
|
|
||||||
username=username,
|
|
||||||
site_name=social_network,
|
|
||||||
site_url_user=url,
|
|
||||||
status=query_status,
|
|
||||||
query_time=response_time,
|
|
||||||
context=error_context,
|
|
||||||
)
|
|
||||||
query_notify.update(result)
|
|
||||||
|
|
||||||
# Save status of request
|
|
||||||
results_site["status"] = result
|
|
||||||
|
|
||||||
# Save results from request
|
|
||||||
results_site["http_status"] = http_status
|
|
||||||
results_site["response_text"] = response_text
|
|
||||||
|
|
||||||
# Add this site's results into final dictionary with all of the other results.
|
|
||||||
results_total[social_network] = results_site
|
|
||||||
|
|
||||||
return results_total
|
|
||||||
|
|
||||||
|
|
||||||
def timeout_check(value):
|
|
||||||
"""Check Timeout Argument.
|
|
||||||
|
|
||||||
Checks timeout for validity.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
value -- Time in seconds to wait before timing out request.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Floating point number representing the time (in seconds) that should be
|
|
||||||
used for the timeout.
|
|
||||||
|
|
||||||
NOTE: Will raise an exception if the timeout in invalid.
|
|
||||||
"""
|
|
||||||
|
|
||||||
float_value = float(value)
|
|
||||||
|
|
||||||
if float_value <= 0:
|
|
||||||
raise ArgumentTypeError(
|
|
||||||
f"Invalid timeout value: {value}. Timeout must be a positive number."
|
|
||||||
)
|
|
||||||
|
|
||||||
return float_value
|
|
||||||
|
|
||||||
|
|
||||||
def handler(signal_received, frame):
|
|
||||||
"""Exit gracefully without throwing errors
|
|
||||||
|
|
||||||
Source: https://www.devdungeon.com/content/python-catch-sigint-ctrl-c
|
|
||||||
"""
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = ArgumentParser(
|
|
||||||
formatter_class=RawDescriptionHelpFormatter,
|
|
||||||
description=f"{__longname__} (Version {__version__})",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--version",
|
|
||||||
action="version",
|
|
||||||
version=f"{__shortname__} v{__version__}",
|
|
||||||
help="Display version information and dependencies.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--verbose",
|
|
||||||
"-v",
|
|
||||||
"-d",
|
|
||||||
"--debug",
|
|
||||||
action="store_true",
|
|
||||||
dest="verbose",
|
|
||||||
default=False,
|
|
||||||
help="Display extra debugging information and metrics.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--folderoutput",
|
|
||||||
"-fo",
|
|
||||||
dest="folderoutput",
|
|
||||||
help="If using multiple usernames, the output of the results will be saved to this folder.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--output",
|
|
||||||
"-o",
|
|
||||||
dest="output",
|
|
||||||
help="If using single username, the output of the result will be saved to this file.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--csv",
|
|
||||||
action="store_true",
|
|
||||||
dest="csv",
|
|
||||||
default=False,
|
|
||||||
help="Create Comma-Separated Values (CSV) File.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--xlsx",
|
|
||||||
action="store_true",
|
|
||||||
dest="xlsx",
|
|
||||||
default=False,
|
|
||||||
help="Create the standard file for the modern Microsoft Excel spreadsheet (xlsx).",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--site",
|
|
||||||
action="append",
|
|
||||||
metavar="SITE_NAME",
|
|
||||||
dest="site_list",
|
|
||||||
default=[],
|
|
||||||
help="Limit analysis to just the listed sites. Add multiple options to specify more than one site.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--proxy",
|
|
||||||
"-p",
|
|
||||||
metavar="PROXY_URL",
|
|
||||||
action="store",
|
|
||||||
dest="proxy",
|
|
||||||
default=None,
|
|
||||||
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--dump-response",
|
|
||||||
action="store_true",
|
|
||||||
dest="dump_response",
|
|
||||||
default=False,
|
|
||||||
help="Dump the HTTP response to stdout for targeted debugging.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--json",
|
|
||||||
"-j",
|
|
||||||
metavar="JSON_FILE",
|
|
||||||
dest="json_file",
|
|
||||||
default=None,
|
|
||||||
help="Load data from a JSON file or an online, valid, JSON file. Upstream PR numbers also accepted.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--timeout",
|
|
||||||
action="store",
|
|
||||||
metavar="TIMEOUT",
|
|
||||||
dest="timeout",
|
|
||||||
type=timeout_check,
|
|
||||||
default=60,
|
|
||||||
help="Time (in seconds) to wait for response to requests (Default: 60)",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--print-all",
|
|
||||||
action="store_true",
|
|
||||||
dest="print_all",
|
|
||||||
default=False,
|
|
||||||
help="Output sites where the username was not found.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--print-found",
|
|
||||||
action="store_true",
|
|
||||||
dest="print_found",
|
|
||||||
default=True,
|
|
||||||
help="Output sites where the username was found (also if exported as file).",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--no-color",
|
|
||||||
action="store_true",
|
|
||||||
dest="no_color",
|
|
||||||
default=False,
|
|
||||||
help="Don't color terminal output",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"username",
|
|
||||||
nargs="+",
|
|
||||||
metavar="USERNAMES",
|
|
||||||
action="store",
|
|
||||||
help="One or more usernames to check with social networks. Check similar usernames using {?} (replace to '_', '-', '.').",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--browse",
|
|
||||||
"-b",
|
|
||||||
action="store_true",
|
|
||||||
dest="browse",
|
|
||||||
default=False,
|
|
||||||
help="Browse to all results on default browser.",
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--local",
|
|
||||||
"-l",
|
|
||||||
action="store_true",
|
|
||||||
default=False,
|
|
||||||
help="Force the use of the local data.json file.",
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--nsfw",
|
|
||||||
action="store_true",
|
|
||||||
default=False,
|
|
||||||
help="Include checking of NSFW sites from default list.",
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--txt",
|
|
||||||
action="store_true",
|
|
||||||
dest="output_txt",
|
|
||||||
default=False,
|
|
||||||
help="Enable creation of a txt file",
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--ignore-exclusions",
|
|
||||||
action="store_true",
|
|
||||||
dest="ignore_exclusions",
|
|
||||||
default=False,
|
|
||||||
help="Ignore upstream exclusions (may return more false positives)",
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# If the user presses CTRL-C, exit gracefully without throwing errors
|
|
||||||
signal.signal(signal.SIGINT, handler)
|
|
||||||
|
|
||||||
# Check for newer version of Sherlock. If it exists, let the user know about it
|
|
||||||
try:
|
|
||||||
latest_release_raw = requests.get(forge_api_latest_release, timeout=10).text
|
|
||||||
latest_release_json = json_loads(latest_release_raw)
|
|
||||||
latest_remote_tag = latest_release_json["tag_name"]
|
|
||||||
|
|
||||||
if latest_remote_tag[1:] != __version__:
|
|
||||||
print(
|
|
||||||
f"Update available! {__version__} --> {latest_remote_tag[1:]}"
|
|
||||||
f"\n{latest_release_json['html_url']}"
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as error:
|
|
||||||
print(f"A problem occurred while checking for an update: {error}")
|
|
||||||
|
|
||||||
# Make prompts
|
|
||||||
if args.proxy is not None:
|
|
||||||
print("Using the proxy: " + args.proxy)
|
|
||||||
|
|
||||||
if args.no_color:
|
|
||||||
# Disable color output.
|
|
||||||
init(strip=True, convert=False)
|
|
||||||
else:
|
|
||||||
# Enable color output.
|
|
||||||
init(autoreset=True)
|
|
||||||
|
|
||||||
# Check if both output methods are entered as input.
|
|
||||||
if args.output is not None and args.folderoutput is not None:
|
|
||||||
print("You can only use one of the output methods.")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Check validity for single username output.
|
|
||||||
if args.output is not None and len(args.username) != 1:
|
|
||||||
print("You can only use --output with a single username")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Create object with all information about sites we are aware of.
|
|
||||||
try:
|
|
||||||
if args.local:
|
|
||||||
sites = SitesInformation(
|
|
||||||
os.path.join(os.path.dirname(__file__), "resources/data.json"),
|
|
||||||
honor_exclusions=False,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
json_file_location = args.json_file
|
|
||||||
if args.json_file:
|
|
||||||
# If --json parameter is a number, interpret it as a pull request number
|
|
||||||
if args.json_file.isnumeric():
|
|
||||||
pull_number = args.json_file
|
|
||||||
pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}"
|
|
||||||
pull_request_raw = requests.get(pull_url, timeout=10).text
|
|
||||||
pull_request_json = json_loads(pull_request_raw)
|
|
||||||
|
|
||||||
# Check if it's a valid pull request
|
|
||||||
if "message" in pull_request_json:
|
|
||||||
print(f"ERROR: Pull request #{pull_number} not found.")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
head_commit_sha = pull_request_json["head"]["sha"]
|
|
||||||
json_file_location = f"https://raw.githubusercontent.com/sherlock-project/sherlock/{head_commit_sha}/sherlock_project/resources/data.json"
|
|
||||||
|
|
||||||
sites = SitesInformation(
|
|
||||||
data_file_path=json_file_location,
|
|
||||||
honor_exclusions=not args.ignore_exclusions,
|
|
||||||
do_not_exclude=args.site_list,
|
|
||||||
)
|
|
||||||
except Exception as error:
|
|
||||||
print(f"ERROR: {error}")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
if not args.nsfw:
|
|
||||||
sites.remove_nsfw_sites(do_not_remove=args.site_list)
|
|
||||||
|
|
||||||
# Create original dictionary from SitesInformation() object.
|
|
||||||
# Eventually, the rest of the code will be updated to use the new object
|
|
||||||
# directly, but this will glue the two pieces together.
|
|
||||||
site_data_all = {site.name: site.information for site in sites}
|
|
||||||
if args.site_list == []:
|
|
||||||
# Not desired to look at a sub-set of sites
|
|
||||||
site_data = site_data_all
|
|
||||||
else:
|
|
||||||
# User desires to selectively run queries on a sub-set of the site list.
|
|
||||||
# Make sure that the sites are supported & build up pruned site database.
|
|
||||||
site_data = {}
|
|
||||||
site_missing = []
|
|
||||||
for site in args.site_list:
|
|
||||||
counter = 0
|
|
||||||
for existing_site in site_data_all:
|
|
||||||
if site.lower() == existing_site.lower():
|
|
||||||
site_data[existing_site] = site_data_all[existing_site]
|
|
||||||
counter += 1
|
|
||||||
if counter == 0:
|
|
||||||
# Build up list of sites not supported for future error message.
|
|
||||||
site_missing.append(f"'{site}'")
|
|
||||||
|
|
||||||
if site_missing:
|
|
||||||
print(f"Error: Desired sites not found: {', '.join(site_missing)}.")
|
|
||||||
|
|
||||||
if not site_data:
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Create notify object for query results.
|
|
||||||
query_notify = QueryNotifyPrint(
|
|
||||||
result=None, verbose=args.verbose, print_all=args.print_all, browse=args.browse
|
|
||||||
)
|
|
||||||
|
|
||||||
# Run report on all specified users.
|
|
||||||
all_usernames = []
|
|
||||||
for username in args.username:
|
|
||||||
if check_for_parameter(username):
|
|
||||||
for name in multiple_usernames(username):
|
|
||||||
all_usernames.append(name)
|
|
||||||
else:
|
|
||||||
all_usernames.append(username)
|
|
||||||
for username in all_usernames:
|
|
||||||
results = sherlock(
|
|
||||||
username,
|
|
||||||
site_data,
|
|
||||||
query_notify,
|
|
||||||
dump_response=args.dump_response,
|
|
||||||
proxy=args.proxy,
|
|
||||||
timeout=args.timeout,
|
|
||||||
)
|
|
||||||
|
|
||||||
if args.output:
|
|
||||||
result_file = args.output
|
|
||||||
elif args.folderoutput:
|
|
||||||
# The usernames results should be stored in a targeted folder.
|
|
||||||
# If the folder doesn't exist, create it first
|
|
||||||
os.makedirs(args.folderoutput, exist_ok=True)
|
|
||||||
result_file = os.path.join(args.folderoutput, f"{username}.txt")
|
|
||||||
else:
|
|
||||||
result_file = f"{username}.txt"
|
|
||||||
|
|
||||||
if args.output_txt:
|
|
||||||
with open(result_file, "w", encoding="utf-8") as file:
|
|
||||||
exists_counter = 0
|
|
||||||
for website_name in results:
|
|
||||||
dictionary = results[website_name]
|
|
||||||
if dictionary.get("status").status == QueryStatus.CLAIMED:
|
|
||||||
exists_counter += 1
|
|
||||||
file.write(dictionary["url_user"] + "\n")
|
|
||||||
file.write(f"Total Websites Username Detected On : {exists_counter}\n")
|
|
||||||
|
|
||||||
if args.csv:
|
|
||||||
result_file = f"{username}.csv"
|
|
||||||
if args.folderoutput:
|
|
||||||
# The usernames results should be stored in a targeted folder.
|
|
||||||
# If the folder doesn't exist, create it first
|
|
||||||
os.makedirs(args.folderoutput, exist_ok=True)
|
|
||||||
result_file = os.path.join(args.folderoutput, result_file)
|
|
||||||
|
|
||||||
with open(result_file, "w", newline="", encoding="utf-8") as csv_report:
|
|
||||||
writer = csv.writer(csv_report)
|
|
||||||
writer.writerow(
|
|
||||||
[
|
|
||||||
"username",
|
|
||||||
"name",
|
|
||||||
"url_main",
|
|
||||||
"url_user",
|
|
||||||
"exists",
|
|
||||||
"http_status",
|
|
||||||
"response_time_s",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
for site in results:
|
|
||||||
if (
|
|
||||||
args.print_found
|
|
||||||
and not args.print_all
|
|
||||||
and results[site]["status"].status != QueryStatus.CLAIMED
|
|
||||||
):
|
|
||||||
continue
|
|
||||||
|
|
||||||
response_time_s = results[site]["status"].query_time
|
|
||||||
if response_time_s is None:
|
|
||||||
response_time_s = ""
|
|
||||||
writer.writerow(
|
|
||||||
[
|
|
||||||
username,
|
|
||||||
site,
|
|
||||||
results[site]["url_main"],
|
|
||||||
results[site]["url_user"],
|
|
||||||
str(results[site]["status"].status),
|
|
||||||
results[site]["http_status"],
|
|
||||||
response_time_s,
|
|
||||||
]
|
|
||||||
)
|
|
||||||
if args.xlsx:
|
|
||||||
usernames = []
|
|
||||||
names = []
|
|
||||||
url_main = []
|
|
||||||
url_user = []
|
|
||||||
exists = []
|
|
||||||
http_status = []
|
|
||||||
response_time_s = []
|
|
||||||
|
|
||||||
for site in results:
|
|
||||||
if (
|
|
||||||
args.print_found
|
|
||||||
and not args.print_all
|
|
||||||
and results[site]["status"].status != QueryStatus.CLAIMED
|
|
||||||
):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if response_time_s is None:
|
|
||||||
response_time_s.append("")
|
|
||||||
else:
|
|
||||||
response_time_s.append(results[site]["status"].query_time)
|
|
||||||
usernames.append(username)
|
|
||||||
names.append(site)
|
|
||||||
url_main.append(results[site]["url_main"])
|
|
||||||
url_user.append(results[site]["url_user"])
|
|
||||||
exists.append(str(results[site]["status"].status))
|
|
||||||
http_status.append(results[site]["http_status"])
|
|
||||||
|
|
||||||
DataFrame = pd.DataFrame(
|
|
||||||
{
|
|
||||||
"username": usernames,
|
|
||||||
"name": names,
|
|
||||||
"url_main": [f'=HYPERLINK(\"{u}\")' for u in url_main],
|
|
||||||
"url_user": [f'=HYPERLINK(\"{u}\")' for u in url_user],
|
|
||||||
"exists": exists,
|
|
||||||
"http_status": http_status,
|
|
||||||
"response_time_s": response_time_s,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
DataFrame.to_excel(f"{username}.xlsx", sheet_name="sheet1", index=False)
|
|
||||||
|
|
||||||
print()
|
|
||||||
query_notify.finish()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,260 +0,0 @@
|
|||||||
"""Sherlock Sites Information Module
|
|
||||||
|
|
||||||
This module supports storing information about websites.
|
|
||||||
This is the raw data that will be used to search for usernames.
|
|
||||||
"""
|
|
||||||
import json
|
|
||||||
import requests
|
|
||||||
import secrets
|
|
||||||
|
|
||||||
|
|
||||||
MANIFEST_URL = "https://data.sherlockproject.xyz"
|
|
||||||
EXCLUSIONS_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/refs/heads/exclusions/false_positive_exclusions.txt"
|
|
||||||
|
|
||||||
class SiteInformation:
|
|
||||||
def __init__(self, name, url_home, url_username_format, username_claimed,
|
|
||||||
information, is_nsfw, username_unclaimed=secrets.token_urlsafe(10)):
|
|
||||||
"""Create Site Information Object.
|
|
||||||
|
|
||||||
Contains information about a specific website.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
name -- String which identifies site.
|
|
||||||
url_home -- String containing URL for home of site.
|
|
||||||
url_username_format -- String containing URL for Username format
|
|
||||||
on site.
|
|
||||||
NOTE: The string should contain the
|
|
||||||
token "{}" where the username should
|
|
||||||
be substituted. For example, a string
|
|
||||||
of "https://somesite.com/users/{}"
|
|
||||||
indicates that the individual
|
|
||||||
usernames would show up under the
|
|
||||||
"https://somesite.com/users/" area of
|
|
||||||
the website.
|
|
||||||
username_claimed -- String containing username which is known
|
|
||||||
to be claimed on website.
|
|
||||||
username_unclaimed -- String containing username which is known
|
|
||||||
to be unclaimed on website.
|
|
||||||
information -- Dictionary containing all known information
|
|
||||||
about website.
|
|
||||||
NOTE: Custom information about how to
|
|
||||||
actually detect the existence of the
|
|
||||||
username will be included in this
|
|
||||||
dictionary. This information will
|
|
||||||
be needed by the detection method,
|
|
||||||
but it is only recorded in this
|
|
||||||
object for future use.
|
|
||||||
is_nsfw -- Boolean indicating if site is Not Safe For Work.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nothing.
|
|
||||||
"""
|
|
||||||
|
|
||||||
self.name = name
|
|
||||||
self.url_home = url_home
|
|
||||||
self.url_username_format = url_username_format
|
|
||||||
|
|
||||||
self.username_claimed = username_claimed
|
|
||||||
self.username_unclaimed = secrets.token_urlsafe(32)
|
|
||||||
self.information = information
|
|
||||||
self.is_nsfw = is_nsfw
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
"""Convert Object To String.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nicely formatted string to get information about this object.
|
|
||||||
"""
|
|
||||||
|
|
||||||
return f"{self.name} ({self.url_home})"
|
|
||||||
|
|
||||||
|
|
||||||
class SitesInformation:
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
data_file_path: str|None = None,
|
|
||||||
honor_exclusions: bool = True,
|
|
||||||
do_not_exclude: list[str] = [],
|
|
||||||
):
|
|
||||||
"""Create Sites Information Object.
|
|
||||||
|
|
||||||
Contains information about all supported websites.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
data_file_path -- String which indicates path to data file.
|
|
||||||
The file name must end in ".json".
|
|
||||||
|
|
||||||
There are 3 possible formats:
|
|
||||||
* Absolute File Format
|
|
||||||
For example, "c:/stuff/data.json".
|
|
||||||
* Relative File Format
|
|
||||||
The current working directory is used
|
|
||||||
as the context.
|
|
||||||
For example, "data.json".
|
|
||||||
* URL Format
|
|
||||||
For example,
|
|
||||||
"https://example.com/data.json", or
|
|
||||||
"http://example.com/data.json".
|
|
||||||
|
|
||||||
An exception will be thrown if the path
|
|
||||||
to the data file is not in the expected
|
|
||||||
format, or if there was any problem loading
|
|
||||||
the file.
|
|
||||||
|
|
||||||
If this option is not specified, then a
|
|
||||||
default site list will be used.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nothing.
|
|
||||||
"""
|
|
||||||
|
|
||||||
if not data_file_path:
|
|
||||||
# The default data file is the live data.json which is in the GitHub repo. The reason why we are using
|
|
||||||
# this instead of the local one is so that the user has the most up-to-date data. This prevents
|
|
||||||
# users from creating issue about false positives which has already been fixed or having outdated data
|
|
||||||
data_file_path = MANIFEST_URL
|
|
||||||
|
|
||||||
if data_file_path.lower().startswith("http"):
|
|
||||||
# Reference is to a URL.
|
|
||||||
try:
|
|
||||||
response = requests.get(url=data_file_path, timeout=30)
|
|
||||||
except Exception as error:
|
|
||||||
raise FileNotFoundError(
|
|
||||||
f"Problem while attempting to access data file URL '{data_file_path}': {error}"
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status_code != 200:
|
|
||||||
raise FileNotFoundError(f"Bad response while accessing "
|
|
||||||
f"data file URL '{data_file_path}'."
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
site_data = response.json()
|
|
||||||
except Exception as error:
|
|
||||||
raise ValueError(
|
|
||||||
f"Problem parsing json contents at '{data_file_path}': {error}."
|
|
||||||
)
|
|
||||||
|
|
||||||
else:
|
|
||||||
# Reference is to a file.
|
|
||||||
try:
|
|
||||||
with open(data_file_path, "r", encoding="utf-8") as file:
|
|
||||||
try:
|
|
||||||
site_data = json.load(file)
|
|
||||||
except Exception as error:
|
|
||||||
raise ValueError(
|
|
||||||
f"Problem parsing json contents at '{data_file_path}': {error}."
|
|
||||||
)
|
|
||||||
|
|
||||||
except FileNotFoundError:
|
|
||||||
raise FileNotFoundError(f"Problem while attempting to access "
|
|
||||||
f"data file '{data_file_path}'."
|
|
||||||
)
|
|
||||||
|
|
||||||
site_data.pop('$schema', None)
|
|
||||||
|
|
||||||
if honor_exclusions:
|
|
||||||
try:
|
|
||||||
response = requests.get(url=EXCLUSIONS_URL, timeout=10)
|
|
||||||
if response.status_code == 200:
|
|
||||||
exclusions = response.text.splitlines()
|
|
||||||
exclusions = [exclusion.strip() for exclusion in exclusions]
|
|
||||||
|
|
||||||
for site in do_not_exclude:
|
|
||||||
if site in exclusions:
|
|
||||||
exclusions.remove(site)
|
|
||||||
|
|
||||||
for exclusion in exclusions:
|
|
||||||
try:
|
|
||||||
site_data.pop(exclusion, None)
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
except Exception:
|
|
||||||
# If there was any problem loading the exclusions, just continue without them
|
|
||||||
print("Warning: Could not load exclusions, continuing without them.")
|
|
||||||
honor_exclusions = False
|
|
||||||
|
|
||||||
self.sites = {}
|
|
||||||
|
|
||||||
# Add all site information from the json file to internal site list.
|
|
||||||
for site_name in site_data:
|
|
||||||
try:
|
|
||||||
|
|
||||||
self.sites[site_name] = \
|
|
||||||
SiteInformation(site_name,
|
|
||||||
site_data[site_name]["urlMain"],
|
|
||||||
site_data[site_name]["url"],
|
|
||||||
site_data[site_name]["username_claimed"],
|
|
||||||
site_data[site_name],
|
|
||||||
site_data[site_name].get("isNSFW",False)
|
|
||||||
|
|
||||||
)
|
|
||||||
except KeyError as error:
|
|
||||||
raise ValueError(
|
|
||||||
f"Problem parsing json contents at '{data_file_path}': Missing attribute {error}."
|
|
||||||
)
|
|
||||||
except TypeError:
|
|
||||||
print(f"Encountered TypeError parsing json contents for target '{site_name}' at {data_file_path}\nSkipping target.\n")
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def remove_nsfw_sites(self, do_not_remove: list = []):
|
|
||||||
"""
|
|
||||||
Remove NSFW sites from the sites, if isNSFW flag is true for site
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
None
|
|
||||||
"""
|
|
||||||
sites = {}
|
|
||||||
do_not_remove = [site.casefold() for site in do_not_remove]
|
|
||||||
for site in self.sites:
|
|
||||||
if self.sites[site].is_nsfw and site.casefold() not in do_not_remove:
|
|
||||||
continue
|
|
||||||
sites[site] = self.sites[site]
|
|
||||||
self.sites = sites
|
|
||||||
|
|
||||||
def site_name_list(self):
|
|
||||||
"""Get Site Name List.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
List of strings containing names of sites.
|
|
||||||
"""
|
|
||||||
|
|
||||||
return sorted([site.name for site in self], key=str.lower)
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
"""Iterator For Object.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Iterator for sites object.
|
|
||||||
"""
|
|
||||||
|
|
||||||
for site_name in self.sites:
|
|
||||||
yield self.sites[site_name]
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
"""Length For Object.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Length of sites object.
|
|
||||||
"""
|
|
||||||
return len(self.sites)
|
|
||||||
@@ -1,51 +0,0 @@
|
|||||||
import os
|
|
||||||
import json
|
|
||||||
import urllib
|
|
||||||
import pytest
|
|
||||||
from sherlock_project.sites import SitesInformation
|
|
||||||
|
|
||||||
def fetch_local_manifest(honor_exclusions: bool = True) -> dict[str, dict[str, str]]:
|
|
||||||
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"), honor_exclusions=honor_exclusions)
|
|
||||||
sites_iterable: dict[str, dict[str, str]] = {site.name: site.information for site in sites_obj}
|
|
||||||
return sites_iterable
|
|
||||||
|
|
||||||
@pytest.fixture()
|
|
||||||
def sites_obj():
|
|
||||||
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
|
|
||||||
yield sites_obj
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def sites_info():
|
|
||||||
yield fetch_local_manifest()
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def remote_schema():
|
|
||||||
schema_url: str = 'https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.schema.json'
|
|
||||||
with urllib.request.urlopen(schema_url) as remoteschema:
|
|
||||||
schemadat = json.load(remoteschema)
|
|
||||||
yield schemadat
|
|
||||||
|
|
||||||
def pytest_addoption(parser):
|
|
||||||
parser.addoption(
|
|
||||||
"--chunked-sites",
|
|
||||||
action="store",
|
|
||||||
default=None,
|
|
||||||
help="For tests utilizing chunked sites, include only the (comma-separated) site(s) specified.",
|
|
||||||
)
|
|
||||||
|
|
||||||
def pytest_generate_tests(metafunc):
|
|
||||||
if "chunked_sites" in metafunc.fixturenames:
|
|
||||||
sites_info = fetch_local_manifest(honor_exclusions=False)
|
|
||||||
|
|
||||||
# Ingest and apply site selections
|
|
||||||
site_filter: str | None = metafunc.config.getoption("--chunked-sites")
|
|
||||||
if site_filter:
|
|
||||||
selected_sites: list[str] = [site.strip() for site in site_filter.split(",")]
|
|
||||||
sites_info = {
|
|
||||||
site: data for site, data in sites_info.items()
|
|
||||||
if site in selected_sites
|
|
||||||
}
|
|
||||||
|
|
||||||
params = [{name: data} for name, data in sites_info.items()]
|
|
||||||
ids = list(sites_info.keys())
|
|
||||||
metafunc.parametrize("chunked_sites", params, ids=ids)
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
import sherlock_project
|
|
||||||
|
|
||||||
#from sherlock.sites import SitesInformation
|
|
||||||
#local_manifest = data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json")
|
|
||||||
|
|
||||||
def test_username_via_message():
|
|
||||||
sherlock_project.__main__("--version")
|
|
||||||
@@ -1,38 +0,0 @@
|
|||||||
import os
|
|
||||||
import platform
|
|
||||||
import re
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
class Interactives:
|
|
||||||
def run_cli(args:str = "") -> str:
|
|
||||||
"""Pass arguments to Sherlock as a normal user on the command line"""
|
|
||||||
# Adapt for platform differences (Windows likes to be special)
|
|
||||||
if platform.system() == "Windows":
|
|
||||||
command:str = f"py -m sherlock_project {args}"
|
|
||||||
else:
|
|
||||||
command:str = f"sherlock {args}"
|
|
||||||
|
|
||||||
proc_out:str = ""
|
|
||||||
try:
|
|
||||||
proc_out = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT)
|
|
||||||
return proc_out.decode()
|
|
||||||
except subprocess.CalledProcessError as e:
|
|
||||||
raise InteractivesSubprocessError(e.output.decode())
|
|
||||||
|
|
||||||
|
|
||||||
def walk_sherlock_for_files_with(pattern: str) -> list[str]:
|
|
||||||
"""Check all files within the Sherlock package for matching patterns"""
|
|
||||||
pattern:re.Pattern = re.compile(pattern)
|
|
||||||
matching_files:list[str] = []
|
|
||||||
for root, dirs, files in os.walk("sherlock_project"):
|
|
||||||
for file in files:
|
|
||||||
file_path = os.path.join(root,file)
|
|
||||||
if "__pycache__" in file_path:
|
|
||||||
continue
|
|
||||||
with open(file_path, 'r', errors='ignore') as f:
|
|
||||||
if pattern.search(f.read()):
|
|
||||||
matching_files.append(file_path)
|
|
||||||
return matching_files
|
|
||||||
|
|
||||||
class InteractivesSubprocessError(Exception):
|
|
||||||
pass
|
|
||||||
@@ -1,39 +0,0 @@
|
|||||||
import os
|
|
||||||
import json
|
|
||||||
import pytest
|
|
||||||
from jsonschema import validate
|
|
||||||
|
|
||||||
def test_validate_manifest_against_local_schema():
|
|
||||||
"""Ensures that the manifest matches the local schema, for situations where the schema is being changed."""
|
|
||||||
json_relative: str = '../sherlock_project/resources/data.json'
|
|
||||||
schema_relative: str = '../sherlock_project/resources/data.schema.json'
|
|
||||||
|
|
||||||
json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
|
|
||||||
schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative)
|
|
||||||
|
|
||||||
with open(json_path, 'r') as f:
|
|
||||||
jsondat = json.load(f)
|
|
||||||
with open(schema_path, 'r') as f:
|
|
||||||
schemadat = json.load(f)
|
|
||||||
|
|
||||||
validate(instance=jsondat, schema=schemadat)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.online
|
|
||||||
def test_validate_manifest_against_remote_schema(remote_schema):
|
|
||||||
"""Ensures that the manifest matches the remote schema, so as to not unexpectedly break clients."""
|
|
||||||
json_relative: str = '../sherlock_project/resources/data.json'
|
|
||||||
json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
|
|
||||||
|
|
||||||
with open(json_path, 'r') as f:
|
|
||||||
jsondat = json.load(f)
|
|
||||||
|
|
||||||
validate(instance=jsondat, schema=remote_schema)
|
|
||||||
|
|
||||||
# Ensure that the expected values are beind returned by the site list
|
|
||||||
@pytest.mark.parametrize("target_name,target_expected_err_type", [
|
|
||||||
('GitHub', 'status_code'),
|
|
||||||
('GitLab', 'message'),
|
|
||||||
])
|
|
||||||
def test_site_list_iterability (sites_info, target_name, target_expected_err_type):
|
|
||||||
assert sites_info[target_name]['errorType'] == target_expected_err_type
|
|
||||||
@@ -1,105 +0,0 @@
|
|||||||
import pytest
|
|
||||||
import random
|
|
||||||
import string
|
|
||||||
import re
|
|
||||||
from sherlock_project.sherlock import sherlock
|
|
||||||
from sherlock_project.notify import QueryNotify
|
|
||||||
from sherlock_project.result import QueryStatus
|
|
||||||
#from sherlock_interactives import Interactives
|
|
||||||
|
|
||||||
|
|
||||||
def simple_query(sites_info: dict, site: str, username: str) -> QueryStatus:
|
|
||||||
query_notify = QueryNotify()
|
|
||||||
site_data: dict = {}
|
|
||||||
site_data[site] = sites_info[site]
|
|
||||||
return sherlock(
|
|
||||||
username=username,
|
|
||||||
site_data=site_data,
|
|
||||||
query_notify=query_notify,
|
|
||||||
)[site]['status'].status
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.online
|
|
||||||
class TestLiveTargets:
|
|
||||||
"""Actively test probes against live and trusted targets"""
|
|
||||||
# Known positives should only use sites trusted to be reliable and unchanging
|
|
||||||
@pytest.mark.parametrize('site,username',[
|
|
||||||
('GitLab', 'ppfeister'),
|
|
||||||
('AllMyLinks', 'blue'),
|
|
||||||
])
|
|
||||||
def test_known_positives_via_message(self, sites_info, site, username):
|
|
||||||
assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
|
|
||||||
|
|
||||||
|
|
||||||
# Known positives should only use sites trusted to be reliable and unchanging
|
|
||||||
@pytest.mark.parametrize('site,username',[
|
|
||||||
('GitHub', 'ppfeister'),
|
|
||||||
('GitHub', 'sherlock-project'),
|
|
||||||
('Docker Hub', 'ppfeister'),
|
|
||||||
('Docker Hub', 'sherlock'),
|
|
||||||
])
|
|
||||||
def test_known_positives_via_status_code(self, sites_info, site, username):
|
|
||||||
assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
|
|
||||||
|
|
||||||
|
|
||||||
# Known positives should only use sites trusted to be reliable and unchanging
|
|
||||||
@pytest.mark.parametrize('site,username',[
|
|
||||||
('Keybase', 'blue'),
|
|
||||||
('devRant', 'blue'),
|
|
||||||
])
|
|
||||||
def test_known_positives_via_response_url(self, sites_info, site, username):
|
|
||||||
assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
|
|
||||||
|
|
||||||
|
|
||||||
# Randomly generate usernames of high length and test for positive availability
|
|
||||||
# Randomly generated usernames should be simple alnum for simplicity and high
|
|
||||||
# compatibility. Several attempts may be made ~just in case~ a real username is
|
|
||||||
# generated.
|
|
||||||
@pytest.mark.parametrize('site,random_len',[
|
|
||||||
('GitLab', 255),
|
|
||||||
('Codecademy', 30)
|
|
||||||
])
|
|
||||||
def test_likely_negatives_via_message(self, sites_info, site, random_len):
|
|
||||||
num_attempts: int = 3
|
|
||||||
attempted_usernames: list[str] = []
|
|
||||||
status: QueryStatus = QueryStatus.CLAIMED
|
|
||||||
for i in range(num_attempts):
|
|
||||||
acceptable_types = string.ascii_letters + string.digits
|
|
||||||
random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len))
|
|
||||||
attempted_usernames.append(random_handle)
|
|
||||||
status = simple_query(sites_info=sites_info, site=site, username=random_handle)
|
|
||||||
if status is QueryStatus.AVAILABLE:
|
|
||||||
break
|
|
||||||
assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}."
|
|
||||||
|
|
||||||
|
|
||||||
# Randomly generate usernames of high length and test for positive availability
|
|
||||||
# Randomly generated usernames should be simple alnum for simplicity and high
|
|
||||||
# compatibility. Several attempts may be made ~just in case~ a real username is
|
|
||||||
# generated.
|
|
||||||
@pytest.mark.parametrize('site,random_len',[
|
|
||||||
('GitHub', 39),
|
|
||||||
('Docker Hub', 30)
|
|
||||||
])
|
|
||||||
def test_likely_negatives_via_status_code(self, sites_info, site, random_len):
|
|
||||||
num_attempts: int = 3
|
|
||||||
attempted_usernames: list[str] = []
|
|
||||||
status: QueryStatus = QueryStatus.CLAIMED
|
|
||||||
for i in range(num_attempts):
|
|
||||||
acceptable_types = string.ascii_letters + string.digits
|
|
||||||
random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len))
|
|
||||||
attempted_usernames.append(random_handle)
|
|
||||||
status = simple_query(sites_info=sites_info, site=site, username=random_handle)
|
|
||||||
if status is QueryStatus.AVAILABLE:
|
|
||||||
break
|
|
||||||
assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}."
|
|
||||||
|
|
||||||
|
|
||||||
def test_username_illegal_regex(sites_info):
|
|
||||||
site: str = 'BitBucket'
|
|
||||||
invalid_handle: str = '*#$Y&*JRE'
|
|
||||||
pattern = re.compile(sites_info[site]['regexCheck'])
|
|
||||||
# Ensure that the username actually fails regex before testing sherlock
|
|
||||||
assert pattern.match(invalid_handle) is None
|
|
||||||
assert simple_query(sites_info=sites_info, site=site, username=invalid_handle) is QueryStatus.ILLEGAL
|
|
||||||
|
|
||||||
@@ -1,47 +0,0 @@
|
|||||||
"""Tests for handling usernames with special/unicode characters."""
|
|
||||||
|
|
||||||
from concurrent.futures import Future
|
|
||||||
|
|
||||||
from sherlock_project.sherlock import get_response
|
|
||||||
|
|
||||||
|
|
||||||
def _make_future_with_exception(exc):
|
|
||||||
"""Create a Future that raises the given exception."""
|
|
||||||
future = Future()
|
|
||||||
future.set_exception(exc)
|
|
||||||
return future
|
|
||||||
|
|
||||||
|
|
||||||
def test_get_response_handles_unicode_decode_error():
|
|
||||||
"""Regression test for issue #2730.
|
|
||||||
|
|
||||||
Usernames with special characters (e.g. 'Émile') can trigger a
|
|
||||||
UnicodeDecodeError inside the requests library during redirect
|
|
||||||
handling. This must not crash the program.
|
|
||||||
"""
|
|
||||||
future = _make_future_with_exception(
|
|
||||||
UnicodeDecodeError("utf-8", b"\xe9", 0, 1, "invalid continuation byte")
|
|
||||||
)
|
|
||||||
response, error_context, exception_text = get_response(
|
|
||||||
request_future=future,
|
|
||||||
error_type=["status_code"],
|
|
||||||
social_network="TestSite",
|
|
||||||
)
|
|
||||||
assert response is None
|
|
||||||
assert error_context == "Encoding Error"
|
|
||||||
assert "utf-8" in exception_text
|
|
||||||
|
|
||||||
|
|
||||||
def test_get_response_handles_unicode_encode_error():
|
|
||||||
"""UnicodeEncodeError should also be caught (subclass of UnicodeError)."""
|
|
||||||
future = _make_future_with_exception(
|
|
||||||
UnicodeEncodeError("ascii", "É", 0, 1, "ordinal not in range(128)")
|
|
||||||
)
|
|
||||||
response, error_context, exception_text = get_response(
|
|
||||||
request_future=future,
|
|
||||||
error_type=["status_code"],
|
|
||||||
social_network="TestSite",
|
|
||||||
)
|
|
||||||
assert response is None
|
|
||||||
assert error_context == "Encoding Error"
|
|
||||||
assert "ascii" in exception_text
|
|
||||||
@@ -1,43 +0,0 @@
|
|||||||
import pytest
|
|
||||||
from sherlock_project import sherlock
|
|
||||||
from sherlock_interactives import Interactives
|
|
||||||
from sherlock_interactives import InteractivesSubprocessError
|
|
||||||
|
|
||||||
def test_remove_nsfw(sites_obj):
|
|
||||||
nsfw_target: str = 'Xvideos'
|
|
||||||
assert nsfw_target in {site.name: site.information for site in sites_obj}
|
|
||||||
sites_obj.remove_nsfw_sites()
|
|
||||||
assert nsfw_target not in {site.name: site.information for site in sites_obj}
|
|
||||||
|
|
||||||
|
|
||||||
# Parametrized sites should *not* include Motherless, which is acting as the control
|
|
||||||
@pytest.mark.parametrize('nsfwsites', [
|
|
||||||
['Xvideos'],
|
|
||||||
['Xvideos', 'Erome'],
|
|
||||||
])
|
|
||||||
def test_nsfw_explicit_selection(sites_obj, nsfwsites):
|
|
||||||
for site in nsfwsites:
|
|
||||||
assert site in {site.name: site.information for site in sites_obj}
|
|
||||||
sites_obj.remove_nsfw_sites(do_not_remove=nsfwsites)
|
|
||||||
for site in nsfwsites:
|
|
||||||
assert site in {site.name: site.information for site in sites_obj}
|
|
||||||
assert 'Motherless' not in {site.name: site.information for site in sites_obj}
|
|
||||||
|
|
||||||
def test_wildcard_username_expansion():
|
|
||||||
assert sherlock.check_for_parameter('test{?}test') is True
|
|
||||||
assert sherlock.check_for_parameter('test{.}test') is False
|
|
||||||
assert sherlock.check_for_parameter('test{}test') is False
|
|
||||||
assert sherlock.check_for_parameter('testtest') is False
|
|
||||||
assert sherlock.check_for_parameter('test{?test') is False
|
|
||||||
assert sherlock.check_for_parameter('test?}test') is False
|
|
||||||
assert sherlock.multiple_usernames('test{?}test') == ["test_test" , "test-test" , "test.test"]
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('cliargs', [
|
|
||||||
'',
|
|
||||||
'--site urghrtuight --egiotr',
|
|
||||||
'--',
|
|
||||||
])
|
|
||||||
def test_no_usernames_provided(cliargs):
|
|
||||||
with pytest.raises(InteractivesSubprocessError, match=r"error: the following arguments are required: USERNAMES"):
|
|
||||||
Interactives.run_cli(cliargs)
|
|
||||||
@@ -1,100 +0,0 @@
|
|||||||
import pytest
|
|
||||||
import re
|
|
||||||
import rstr
|
|
||||||
|
|
||||||
from sherlock_project.sherlock import sherlock
|
|
||||||
from sherlock_project.notify import QueryNotify
|
|
||||||
from sherlock_project.result import QueryResult, QueryStatus
|
|
||||||
|
|
||||||
|
|
||||||
FALSE_POSITIVE_ATTEMPTS: int = 2 # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit
|
|
||||||
FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND: int = 15 # If a pattern uses quantifiers such as `+` `*` or `{n,}`, limit the upper bound (0 to disable)
|
|
||||||
FALSE_POSITIVE_DEFAULT_PATTERN: str = r'^[a-zA-Z0-9]{7,20}$' # Used in absence of a regexCheck entry
|
|
||||||
|
|
||||||
|
|
||||||
def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND) -> str:
|
|
||||||
"""Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`."""
|
|
||||||
def replace_upper_bound(match: re.Match) -> str: # type: ignore
|
|
||||||
lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore
|
|
||||||
nonlocal upper_bound
|
|
||||||
upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore # noqa: F823
|
|
||||||
return f'{{{lower_bound},{upper_bound}}}'
|
|
||||||
|
|
||||||
pattern = re.sub(r'(?<!\\)\{(\d+),\}', replace_upper_bound, pattern) # {n,} # type: ignore
|
|
||||||
pattern = re.sub(r'(?<!\\)\+', f'{{1,{upper_bound}}}', pattern) # +
|
|
||||||
pattern = re.sub(r'(?<!\\)\*', f'{{0,{upper_bound}}}', pattern) # *
|
|
||||||
|
|
||||||
return pattern
|
|
||||||
|
|
||||||
def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, pattern: str) -> QueryStatus:
|
|
||||||
"""Check if a site is likely to produce false positives."""
|
|
||||||
status: QueryStatus = QueryStatus.UNKNOWN
|
|
||||||
|
|
||||||
for _ in range(FALSE_POSITIVE_ATTEMPTS):
|
|
||||||
query_notify: QueryNotify = QueryNotify()
|
|
||||||
username: str = rstr.xeger(pattern)
|
|
||||||
|
|
||||||
result: QueryResult | str = sherlock(
|
|
||||||
username=username,
|
|
||||||
site_data=sites_info,
|
|
||||||
query_notify=query_notify,
|
|
||||||
)[site]['status']
|
|
||||||
|
|
||||||
if not hasattr(result, 'status'):
|
|
||||||
raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
|
|
||||||
if type(result.status) is not QueryStatus: # type: ignore
|
|
||||||
raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
|
|
||||||
status = result.status # type: ignore
|
|
||||||
|
|
||||||
if status in (QueryStatus.AVAILABLE, QueryStatus.WAF):
|
|
||||||
return status
|
|
||||||
|
|
||||||
return status
|
|
||||||
|
|
||||||
|
|
||||||
def false_negative_check(sites_info: dict[str, dict[str, str]], site: str) -> QueryStatus:
|
|
||||||
"""Check if a site is likely to produce false negatives."""
|
|
||||||
status: QueryStatus = QueryStatus.UNKNOWN
|
|
||||||
query_notify: QueryNotify = QueryNotify()
|
|
||||||
|
|
||||||
result: QueryResult | str = sherlock(
|
|
||||||
username=sites_info[site]['username_claimed'],
|
|
||||||
site_data=sites_info,
|
|
||||||
query_notify=query_notify,
|
|
||||||
)[site]['status']
|
|
||||||
|
|
||||||
if not hasattr(result, 'status'):
|
|
||||||
raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
|
|
||||||
if type(result.status) is not QueryStatus: # type: ignore
|
|
||||||
raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
|
|
||||||
status = result.status # type: ignore
|
|
||||||
|
|
||||||
return status
|
|
||||||
|
|
||||||
@pytest.mark.validate_targets
|
|
||||||
@pytest.mark.online
|
|
||||||
class Test_All_Targets:
|
|
||||||
|
|
||||||
@pytest.mark.validate_targets_fp
|
|
||||||
def test_false_pos(self, chunked_sites: dict[str, dict[str, str]]):
|
|
||||||
"""Iterate through all sites in the manifest to discover possible false-positive inducting targets."""
|
|
||||||
pattern: str
|
|
||||||
for site in chunked_sites:
|
|
||||||
try:
|
|
||||||
pattern = chunked_sites[site]['regexCheck']
|
|
||||||
except KeyError:
|
|
||||||
pattern = FALSE_POSITIVE_DEFAULT_PATTERN
|
|
||||||
|
|
||||||
if FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND > 0:
|
|
||||||
pattern = set_pattern_upper_bound(pattern)
|
|
||||||
|
|
||||||
result: QueryStatus = false_positive_check(chunked_sites, site, pattern)
|
|
||||||
assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}"
|
|
||||||
|
|
||||||
@pytest.mark.validate_targets_fn
|
|
||||||
def test_false_neg(self, chunked_sites: dict[str, dict[str, str]]):
|
|
||||||
"""Iterate through all sites in the manifest to discover possible false-negative inducting targets."""
|
|
||||||
for site in chunked_sites:
|
|
||||||
result: QueryStatus = false_negative_check(chunked_sites, site)
|
|
||||||
assert result is QueryStatus.CLAIMED, f"{site} produced false negative, result was {result}"
|
|
||||||
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
import os
|
|
||||||
from sherlock_interactives import Interactives
|
|
||||||
import sherlock_project
|
|
||||||
|
|
||||||
def test_versioning() -> None:
|
|
||||||
# Ensure __version__ matches version presented to the user
|
|
||||||
assert sherlock_project.__version__ in Interactives.run_cli("--version")
|
|
||||||
# Ensure __init__ is single source of truth for __version__ in package
|
|
||||||
# Temporarily allows sherlock.py so as to not trigger early upgrades
|
|
||||||
found:list = Interactives.walk_sherlock_for_files_with(r'__version__ *= *')
|
|
||||||
expected:list = [
|
|
||||||
# Normalization is REQUIRED for Windows ( / vs \ )
|
|
||||||
os.path.normpath("sherlock_project/__init__.py"),
|
|
||||||
]
|
|
||||||
# Sorting is REQUIRED for Mac
|
|
||||||
assert sorted(found) == sorted(expected)
|
|
||||||
@@ -1,42 +0,0 @@
|
|||||||
[tox]
|
|
||||||
requires =
|
|
||||||
tox >= 3
|
|
||||||
envlist =
|
|
||||||
lint
|
|
||||||
py313
|
|
||||||
py312
|
|
||||||
py311
|
|
||||||
py310
|
|
||||||
|
|
||||||
[testenv]
|
|
||||||
description = Attempt to build and install the package
|
|
||||||
deps =
|
|
||||||
coverage
|
|
||||||
jsonschema
|
|
||||||
pytest
|
|
||||||
rstr
|
|
||||||
allowlist_externals = coverage
|
|
||||||
commands =
|
|
||||||
coverage run --source=sherlock_project --module pytest -v
|
|
||||||
coverage report --show-missing
|
|
||||||
|
|
||||||
[testenv:offline]
|
|
||||||
deps =
|
|
||||||
jsonschema
|
|
||||||
pytest
|
|
||||||
commands =
|
|
||||||
pytest -v -m "not online"
|
|
||||||
|
|
||||||
[testenv:lint]
|
|
||||||
description = Lint with Ruff
|
|
||||||
deps =
|
|
||||||
ruff
|
|
||||||
commands =
|
|
||||||
ruff check
|
|
||||||
|
|
||||||
[gh-actions]
|
|
||||||
python =
|
|
||||||
3.13: py313
|
|
||||||
3.12: py312
|
|
||||||
3.11: py311
|
|
||||||
3.10: py310
|
|
||||||
Reference in New Issue
Block a user