Compare commits
232 Commits
master
..
exclusions
| Author | SHA1 | Date | |
|---|---|---|---|
| d178b14f76 | |||
| f73c79ae51 | |||
| 4f26fa0c38 | |||
| 52b00e77a5 | |||
| 07e9c8d7bd | |||
| ad7230d789 | |||
| 411bac3b76 | |||
| 1803922129 | |||
| a360a731f7 | |||
| 1e2710b35b | |||
| 6c9ef74a52 | |||
| fd3e646616 | |||
| 0061bfa1ed | |||
| bc32cffd19 | |||
| 243f436110 | |||
| a4a7f0d92a | |||
| 5a6913d586 | |||
| 554cbb1f79 | |||
| 690e6c9baa | |||
| 9851385fb1 | |||
| 1891d5fbce | |||
| 445d20c91c | |||
| db11f8337b | |||
| 815da25d0c | |||
| 2112e4da7e | |||
| c1ef192368 | |||
| f3e21e0ba3 | |||
| d39fa14972 | |||
| e624bc6b59 | |||
| f9cc83928f | |||
| 28072f5832 | |||
| 57b58eaf3a | |||
| ec850a4439 | |||
| 91a587832a | |||
| 7abf381d43 | |||
| 25ca9339e1 | |||
| 9435d0fe30 | |||
| 2e628ec9be | |||
| 8b4d93924e | |||
| 08e7859a8c | |||
| 773d415195 | |||
| a549a55800 | |||
| ae26f9f46e | |||
| 5f39d67109 | |||
| 62778f04c8 | |||
| 8c6358dea5 | |||
| f22f0a7bf5 | |||
| 7ce524a358 | |||
| 8e267fa570 | |||
| 4331465ee5 | |||
| ad23921880 | |||
| 806f5600f5 | |||
| 0dd3e67d89 | |||
| 09132e94a4 | |||
| b659f335de | |||
| 7a74ab1b9b | |||
| 63d7c59d2e | |||
| 0dfcdb701c | |||
| d437ed62b8 | |||
| 9614537284 | |||
| 5fe4d28905 | |||
| 930c69ae70 | |||
| 2319283279 | |||
| 3bbe8f2560 | |||
| 46d274b4f5 | |||
| e3ff86abbc | |||
| ad00327441 | |||
| 7a981b7caf | |||
| a5d2eed21e | |||
| 011153e991 | |||
| 78a7205f77 | |||
| 82f8875695 | |||
| a2de53a616 | |||
| b0a6084589 | |||
| 3113932b5c | |||
| 8d18a2b3c6 | |||
| 3a2d5c2408 | |||
| c0af8cfd65 | |||
| aceabecb0c | |||
| 25e9407ee8 | |||
| 2cf08b451f | |||
| 357bc89796 | |||
| 0d431d416a | |||
| 5da41f835c | |||
| 4db1d29e9c | |||
| da06a8a3c1 | |||
| 2d5b988144 | |||
| ad3192c812 | |||
| 9b248c9c04 | |||
| fee2688f35 | |||
| 07fb021b6a | |||
| ed84304353 | |||
| 471a65c22b | |||
| ab57315e63 | |||
| 148cead04a | |||
| 51d365959f | |||
| 24f97abd24 | |||
| 8f8f680058 | |||
| a8d247cf49 | |||
| f76954ce2a | |||
| 15c80681d2 | |||
| d2b4fd012c | |||
| 7fdd2a2878 | |||
| dc8b7ffe7e | |||
| b9cc7e1d1c | |||
| e642bd8594 | |||
| 375a615d5d | |||
| d0990d160d | |||
| d2307904a1 | |||
| e78190329b | |||
| c333b79bf1 | |||
| ddc4a5d8c1 | |||
| 3222eb2aee | |||
| 0801ab1367 | |||
| baebf80279 | |||
| 6e05910e7f | |||
| 37e86bfe80 | |||
| 47069ae0cf | |||
| c833609c39 | |||
| 2005620034 | |||
| a6d09d4864 | |||
| 0a53716348 | |||
| 7a8b3c55be | |||
| a4f62d6b6f | |||
| ca78ff0c56 | |||
| b74b1e0b4f | |||
| 55d9b406dd | |||
| a381afe8ea | |||
| 777be62db7 | |||
| b1a1f97abf | |||
| 7f5f0b3d71 | |||
| 93102fbc09 | |||
| e353855b1a | |||
| c7b8771111 | |||
| c778b2a3cd | |||
| 42dbd0e0a5 | |||
| 6ab87f17c1 | |||
| a8f508f9e8 | |||
| c8659eb8e5 | |||
| 6359672dbd | |||
| 9b0524889c | |||
| eddc6f564d | |||
| d4c1153b9d | |||
| 03ffa8387c | |||
| 762ebf85a0 | |||
| 6647670ef6 | |||
| 8ab5519bcf | |||
| 7499f7ad17 | |||
| d26948c90a | |||
| 612c634e9f | |||
| 4aa4fad787 | |||
| 0941f1d03c | |||
| b8f54089a4 | |||
| da10787bbc | |||
| 80d5c781aa | |||
| 62ad4e4b97 | |||
| cd2ffdc75c | |||
| bd9fb1dee1 | |||
| 7b7ca503c8 | |||
| 2bce7c6de3 | |||
| 48a0d42de7 | |||
| 6d71df7ac6 | |||
| e1b422defd | |||
| 3d11b44eb3 | |||
| 6484b098f5 | |||
| eb3d88c70c | |||
| 365bf808ba | |||
| 15dcfe92da | |||
| 2c3ce45d15 | |||
| c462fc0fe0 | |||
| d90f2a1dee | |||
| 0d7daf5284 | |||
| ef19804a97 | |||
| abb690fe79 | |||
| 185fefb0e5 | |||
| d6495e6f0b | |||
| e05f8e8f08 | |||
| ac9f1c031d | |||
| 5d39a4aca4 | |||
| e3e4d81d98 | |||
| 34987d6c69 | |||
| 70e926e70a | |||
| 9e2a4f970e | |||
| 4a751104cc | |||
| adfbda1106 | |||
| 17a91d607d | |||
| 07ebe12917 | |||
| b27f070668 | |||
| e708568758 | |||
| b54b11274b | |||
| 4a5229e6da | |||
| 7e7a973a50 | |||
| d45fa905f5 | |||
| f16fd54ba5 | |||
| becc5d2ea4 | |||
| 978ad5b3ba | |||
| 2b992e6f6e | |||
| 92728aac2b | |||
| dd0d4364cf | |||
| 963c444743 | |||
| 7b491f0365 | |||
| 9c1f71c647 | |||
| 8ffa0b85eb | |||
| aeb8910e03 | |||
| 04a8ec494e | |||
| 159397053d | |||
| 4f20c7152b | |||
| d5b2cd4e3f | |||
| b3590c60a1 | |||
| 4405bbc7f2 | |||
| 93be16067b | |||
| 5dcae85cb2 | |||
| 4bdb361ae2 | |||
| e5b3849612 | |||
| 8461279514 | |||
| 8426e8355d | |||
| 52952a7ed0 | |||
| 93cb3d6664 | |||
| 9e8a687b18 | |||
| d233e2a9e3 | |||
| 0785fa9298 | |||
| b0bd0c6729 | |||
| f6968f0407 | |||
| 89785e4269 | |||
| 1be937ad7e | |||
| 30cde63805 | |||
| e4c274a5bf | |||
| 2c128e0e5e | |||
| d83b6ae2b0 | |||
| a1ad9cd03a | |||
| a83c3957c6 | |||
| 11fa6720a0 |
@@ -1,19 +0,0 @@
|
||||
FROM sherlock/sherlock as sherlock
|
||||
|
||||
# Install Node.js
|
||||
RUN apt-get update; apt-get install curl gpg -y
|
||||
RUN mkdir -p /etc/apt/keyrings
|
||||
RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
|
||||
RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list
|
||||
RUN apt-get update && apt-get install -y curl bash git jq jo xz-utils nodejs
|
||||
|
||||
# Install Apify CLI (node.js) for the Actor Runtime
|
||||
RUN npm -g install apify-cli
|
||||
|
||||
# Install Dependencies for the Actor Shell Script
|
||||
RUN apt-get update && apt-get install -y bash jq jo xz-utils nodejs
|
||||
|
||||
# Copy Actor dir with the actorization shell script
|
||||
COPY .actor/ .actor
|
||||
|
||||
ENTRYPOINT [".actor/actor.sh"]
|
||||
@@ -1,93 +0,0 @@
|
||||
# Sherlock Actor on Apify
|
||||
|
||||
[](https://apify.com/netmilk/sherlock?fpr=sherlock)
|
||||
|
||||
This Actor wraps the [Sherlock Project](https://sherlockproject.xyz/) to provide serverless username reconnaissance across social networks in the cloud. It helps you find usernames across multiple social media platforms without installing and running the tool locally.
|
||||
|
||||
## What are Actors?
|
||||
[Actors](https://docs.apify.com/platform/actors?fpr=sherlock) are serverless microservices running on the [Apify Platform](https://apify.com/?fpr=sherlock). They are based on the [Actor SDK](https://docs.apify.com/sdk/js?fpr=sherlock) and can be found in the [Apify Store](https://apify.com/store?fpr=sherlock). Learn more about Actors in the [Apify Whitepaper](https://whitepaper.actor?fpr=sherlock).
|
||||
|
||||
## Usage
|
||||
|
||||
### Apify Console
|
||||
|
||||
1. Go to the Apify Actor page
|
||||
2. Click "Run"
|
||||
3. In the input form, fill in **Username(s)** to search for
|
||||
4. The Actor will run and produce its outputs in the default datastore
|
||||
|
||||
|
||||
### Apify CLI
|
||||
|
||||
```bash
|
||||
apify call YOUR_USERNAME/sherlock --input='{
|
||||
"usernames": ["johndoe", "janedoe"]
|
||||
}'
|
||||
```
|
||||
|
||||
### Using Apify API
|
||||
|
||||
```bash
|
||||
curl --request POST \
|
||||
--url "https://api.apify.com/v2/acts/YOUR_USERNAME~sherlock/run" \
|
||||
--header 'Content-Type: application/json' \
|
||||
--header 'Authorization: Bearer YOUR_API_TOKEN' \
|
||||
--data '{
|
||||
"usernames": ["johndoe", "janedoe"],
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
## Input Parameters
|
||||
|
||||
The Actor accepts a JSON schema with the following structure:
|
||||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `usernames` | array | Yes | - | List of usernames to search for |
|
||||
| `usernames[]` | string | Yes | "json" | Username to search for |
|
||||
|
||||
|
||||
### Example Input
|
||||
|
||||
```json
|
||||
{
|
||||
"usernames": ["techuser", "designuser"],
|
||||
}
|
||||
```
|
||||
|
||||
## Output
|
||||
|
||||
The Actor provides three types of outputs:
|
||||
|
||||
### Dataset Record*
|
||||
|
||||
| Field | Type | Required | Description |
|
||||
|-------|------|----------|-------------|
|
||||
| `username` | string | Yes | Username the search was conducted for |
|
||||
| `links` | array | Yes | Array with found links to the social media |
|
||||
| `links[]`| string | No | URL to the account
|
||||
|
||||
### Example Dataset Item (JSON)
|
||||
|
||||
```json
|
||||
{
|
||||
"username": "johndoe",
|
||||
"links": [
|
||||
"https://github.com/johndoe"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Performance & Resources
|
||||
|
||||
- **Memory Requirements**:
|
||||
- Minimum: 512 MB RAM
|
||||
- Recommended: 1 GB RAM for multiple usernames
|
||||
- **Processing Time**:
|
||||
- Single username: ~1-2 minutes
|
||||
- Multiple usernames: 2-5 minutes
|
||||
- Varies based on number of sites checked and response times
|
||||
|
||||
|
||||
For more help, check the [Sherlock Project documentation](https://github.com/sherlock-project/sherlock) or raise an issue in the Actor's repository.
|
||||
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"actorSpecification": 1,
|
||||
"name": "sherlock",
|
||||
"version": "0.0",
|
||||
"buildTag": "latest",
|
||||
"environmentVariables": {},
|
||||
"dockerFile": "./Dockerfile",
|
||||
"dockerContext": "../",
|
||||
"input": "./input_schema.json",
|
||||
"storages": {
|
||||
"dataset": "./dataset_schema.json"
|
||||
}
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
#!/bin/bash
|
||||
INPUT=`apify actor:get-input | jq -r .usernames[] | xargs echo`
|
||||
echo "INPUT: $INPUT"
|
||||
|
||||
sherlock $INPUT
|
||||
|
||||
for username in $INPUT; do
|
||||
# escape the special meaning leading characters
|
||||
# https://github.com/jpmens/jo/blob/master/jo.md#description
|
||||
safe_username=$(echo $username | sed 's/^@/\\@/' | sed 's/^:/\\:/' | sed 's/%/\\%/')
|
||||
echo "pushing results for username: $username, content:"
|
||||
cat $username.txt
|
||||
sed '$d' $username.txt | jo -a | jo username=$safe_username links:=- | apify actor:push-data
|
||||
done
|
||||
@@ -1,45 +0,0 @@
|
||||
{
|
||||
"actorSpecification": 1,
|
||||
"fields":{
|
||||
"title": "Sherlock actor input",
|
||||
"description": "This is actor input schema",
|
||||
"type": "object",
|
||||
"schemaVersion": 1,
|
||||
"properties": {
|
||||
"links": {
|
||||
"title": "Links to accounts",
|
||||
"type": "array",
|
||||
"description": "A list of social media accounts found for the uername"
|
||||
},
|
||||
"username": {
|
||||
"title": "Lookup username",
|
||||
"type": "string",
|
||||
"description": "Username the lookup was performed for"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"username",
|
||||
"links"
|
||||
]
|
||||
},
|
||||
"views": {
|
||||
"overview": {
|
||||
"title": "Overview",
|
||||
"transformation": {
|
||||
"fields": [
|
||||
"username",
|
||||
"links"
|
||||
],
|
||||
},
|
||||
"display": {
|
||||
"component": "table",
|
||||
"links": {
|
||||
"label": "Links"
|
||||
},
|
||||
"username":{
|
||||
"label": "Username"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
{
|
||||
"title": "Sherlock actor input",
|
||||
"description": "This is actor input schema",
|
||||
"type": "object",
|
||||
"schemaVersion": 1,
|
||||
"properties": {
|
||||
"usernames": {
|
||||
"title": "Usernames to hunt down",
|
||||
"type": "array",
|
||||
"description": "A list of usernames to be checked for existence across social media",
|
||||
"editor": "stringList",
|
||||
"prefill": ["johndoe"]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"usernames"
|
||||
]
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
.git/
|
||||
.vscode/
|
||||
screenshot/
|
||||
tests/
|
||||
*.txt
|
||||
!/requirements.txt
|
||||
venv/
|
||||
devel/
|
||||
@@ -1,18 +0,0 @@
|
||||
root = true
|
||||
|
||||
[*]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
end_of_line = lf
|
||||
charset = utf-8
|
||||
trim_trailing_whitespace = true
|
||||
insert_final_newline = true
|
||||
curly_bracket_next_line = false
|
||||
spaces_around_operators = true
|
||||
|
||||
[*.{markdown,md}]
|
||||
trim_trailing_whitespace = false
|
||||
|
||||
[*.py]
|
||||
indent_size = 4
|
||||
quote_type = double
|
||||
@@ -1,15 +0,0 @@
|
||||
### REPOSITORY
|
||||
/.github/CODEOWNERS @sdushantha @ppfeister
|
||||
/.github/FUNDING.yml @sdushantha
|
||||
/LICENSE @sdushantha
|
||||
|
||||
### PACKAGING
|
||||
# Changes made to these items without code owner approval may negatively
|
||||
# impact packaging pipelines.
|
||||
/pyproject.toml @ppfeister @sdushantha
|
||||
|
||||
### REGRESSION
|
||||
/.github/workflows/regression.yml @ppfeister
|
||||
/tox.ini @ppfeister
|
||||
/pytest.ini @ppfeister
|
||||
/tests/ @ppfeister
|
||||
@@ -1 +0,0 @@
|
||||
github: [ sdushantha, ppfeister, matheusfelipeog ]
|
||||
@@ -1,71 +0,0 @@
|
||||
name: Bug report
|
||||
description: File a bug report
|
||||
labels: ["bug"]
|
||||
body:
|
||||
- type: dropdown
|
||||
id: package
|
||||
attributes:
|
||||
label: Installation method
|
||||
description: |
|
||||
Some packages are maintained by the community, rather than by the Sherlock Project.
|
||||
Knowing which packages are affected helps us diagnose package-specific bugs.
|
||||
options:
|
||||
- Select one
|
||||
- PyPI (via pip)
|
||||
- Homebrew
|
||||
- Docker
|
||||
- Kali repository (via apt)
|
||||
- Built from source
|
||||
- Other (indicate below)
|
||||
validations:
|
||||
required: true
|
||||
- type: input
|
||||
id: package-version
|
||||
attributes:
|
||||
label: Package version
|
||||
description: |
|
||||
Knowing the version of the package you are using can help us diagnose your issue more quickly.
|
||||
You can find the version by running `sherlock --version`.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: Description
|
||||
description: |
|
||||
Detailed descriptions that help contributors understand and reproduce your bug are much more likely to lead to a fix.
|
||||
Please include the following information:
|
||||
- What you were trying to do
|
||||
- What you expected to happen
|
||||
- What actually happened
|
||||
placeholder: |
|
||||
When doing {action}, the expected result should be {expected result}.
|
||||
When doing {action}, however, the actual result was {actual result}.
|
||||
This is undesirable because {reason}.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: steps-to-reproduce
|
||||
attributes:
|
||||
label: Steps to reproduce
|
||||
description: Write a step by step list that will allow us to reproduce this bug.
|
||||
placeholder: |
|
||||
1. Do something
|
||||
2. Then do something else
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: additional-info
|
||||
attributes:
|
||||
label: Additional information
|
||||
description: If you have some additional information, please write it here.
|
||||
validations:
|
||||
required: false
|
||||
- type: checkboxes
|
||||
id: terms
|
||||
attributes:
|
||||
label: Code of Conduct
|
||||
description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/sherlock-project/sherlock/blob/master/docs/CODE_OF_CONDUCT.md).
|
||||
options:
|
||||
- label: I agree to follow this project's Code of Conduct
|
||||
required: true
|
||||
@@ -1 +0,0 @@
|
||||
blank_issues_enabled: false
|
||||
@@ -1,27 +0,0 @@
|
||||
name: False negative
|
||||
description: Report a site that is returning false negative results
|
||||
title: "False negative for: "
|
||||
labels: ["false negative"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Please include the site name in the title of your issue.
|
||||
Submit **one site per report** for faster resolution. If you have multiple sites in the same report, it often takes longer to fix.
|
||||
- type: textarea
|
||||
id: additional-info
|
||||
attributes:
|
||||
label: Additional info
|
||||
description: If you know why the site is returning false negatives, or noticed any patterns, please explain.
|
||||
placeholder: |
|
||||
Reddit is returning false negatives because...
|
||||
validations:
|
||||
required: false
|
||||
- type: checkboxes
|
||||
id: terms
|
||||
attributes:
|
||||
label: Code of Conduct
|
||||
description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/sherlock-project/sherlock/blob/master/docs/CODE_OF_CONDUCT.md).
|
||||
options:
|
||||
- label: I agree to follow this project's Code of Conduct
|
||||
required: true
|
||||
@@ -1,28 +0,0 @@
|
||||
name: False positive
|
||||
description: Report a site that is returning false positive results
|
||||
title: "False positive for: "
|
||||
labels: ["false positive"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Please include the site name in the title of your issue.
|
||||
Submit **one site per report** for faster resolution. If you have multiple sites in the same report, it often takes longer to fix.
|
||||
- type: textarea
|
||||
id: additional-info
|
||||
attributes:
|
||||
label: Additional info
|
||||
description: If you know why the site is returning false positives, or noticed any patterns, please explain.
|
||||
placeholder: |
|
||||
Reddit is returning false positives because...
|
||||
False positives only occur after x searches...
|
||||
validations:
|
||||
required: false
|
||||
- type: checkboxes
|
||||
id: terms
|
||||
attributes:
|
||||
label: Code of Conduct
|
||||
description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/sherlock-project/sherlock/blob/master/docs/CODE_OF_CONDUCT.md).
|
||||
options:
|
||||
- label: I agree to follow this project's Code of Conduct
|
||||
required: true
|
||||
@@ -1,24 +0,0 @@
|
||||
name: Feature request
|
||||
description: Request a feature or enhancement
|
||||
labels: ["enhancement"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Concise and thoughtful titles help other contributors find and add your requested feature.
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: Description
|
||||
description: Describe the feature you are requesting
|
||||
placeholder: I'd like Sherlock to be able to do xyz
|
||||
validations:
|
||||
required: true
|
||||
- type: checkboxes
|
||||
id: terms
|
||||
attributes:
|
||||
label: Code of Conduct
|
||||
description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/sherlock-project/sherlock/blob/master/docs/CODE_OF_CONDUCT.md).
|
||||
options:
|
||||
- label: I agree to follow this project's Code of Conduct
|
||||
required: true
|
||||
@@ -1,35 +0,0 @@
|
||||
name: Reuest a new website
|
||||
description: Request that Sherlock add support for a new website
|
||||
title: "Requesting support for: "
|
||||
labels: ["site support request"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Ensure that the site name is in the title of your request. Requests without this information will be **closed**.
|
||||
- type: input
|
||||
id: site-url
|
||||
attributes:
|
||||
label: Site URL
|
||||
description: |
|
||||
What is the URL of the website indicated in your title?
|
||||
Websites sometimes have similar names. This helps constributors find the correct site.
|
||||
placeholder: https://reddit.com
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: additional-info
|
||||
attributes:
|
||||
label: Additional info
|
||||
description: If you have suggestions on how Sherlock should detect for usernames, please explain below
|
||||
placeholder: Sherlock can detect if a username exists on Reddit by checking for...
|
||||
validations:
|
||||
required: false
|
||||
- type: checkboxes
|
||||
id: terms
|
||||
attributes:
|
||||
label: Code of Conduct
|
||||
description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/sherlock-project/sherlock/blob/master/docs/CODE_OF_CONDUCT.md).
|
||||
options:
|
||||
- label: I agree to follow this project's Code of Conduct
|
||||
required: true
|
||||
@@ -1,11 +0,0 @@
|
||||
## Security Policy
|
||||
|
||||
### Supported Versions
|
||||
|
||||
Sherlock is a forward looking project. Only the latest and most current version is supported.
|
||||
|
||||
### Reporting a Vulnerability
|
||||
|
||||
Security concerns can be submitted [__here__][report-url] without risk of exposing sensitive information. For issues that are low severity or unlikely to see exploitation, public issues are often acceptable.
|
||||
|
||||
[report-url]: https://github.com/sherlock-project/sherlock/security/advisories/new
|
||||
@@ -1,89 +0,0 @@
|
||||
name: Exclusions Updater
|
||||
|
||||
on:
|
||||
schedule:
|
||||
#- cron: '0 5 * * 0' # Runs at 05:00 every Sunday
|
||||
- cron: '0 5 * * *' # Runs at 05:00 every day
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
update-exclusions:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.13'
|
||||
|
||||
- name: Install Poetry
|
||||
uses: abatilo/actions-poetry@v4
|
||||
with:
|
||||
poetry-version: 'latest'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install --no-interaction --with dev
|
||||
|
||||
- name: Run false positive tests
|
||||
run: |
|
||||
$(poetry env activate)
|
||||
pytest -q --tb no -m validate_targets_fp -n 20 | tee fp_test_results.txt
|
||||
deactivate
|
||||
|
||||
- name: Parse false positive detections by desired categories
|
||||
run: |
|
||||
grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was Claimed)' fp_test_results.txt \
|
||||
| sort -u > false_positive_exclusions.txt
|
||||
grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was WAF)' fp_test_results.txt \
|
||||
| sort -u > waf_hits.txt
|
||||
|
||||
- name: Detect if exclusions list changed
|
||||
id: detect_changes
|
||||
run: |
|
||||
git fetch origin exclusions || true
|
||||
|
||||
if git show origin/exclusions:false_positive_exclusions.txt >/dev/null 2>&1; then
|
||||
# If the exclusions branch and file exist, compare
|
||||
if git diff --quiet origin/exclusions -- false_positive_exclusions.txt; then
|
||||
echo "exclusions_changed=false" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
else
|
||||
# If the exclusions branch or file do not exist, treat as changed
|
||||
echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Quantify and display results
|
||||
run: |
|
||||
FP_COUNT=$(wc -l < false_positive_exclusions.txt | xargs)
|
||||
WAF_COUNT=$(wc -l < waf_hits.txt | xargs)
|
||||
echo ">>> Found $FP_COUNT false positives and $WAF_COUNT WAF hits."
|
||||
echo ">>> False positive exclusions:" && cat false_positive_exclusions.txt
|
||||
echo ">>> WAF hits:" && cat waf_hits.txt
|
||||
|
||||
- name: Commit and push exclusions list
|
||||
if: steps.detect_changes.outputs.exclusions_changed == 'true'
|
||||
run: |
|
||||
git config user.name "Paul Pfeister (automation)"
|
||||
git config user.email "code@pfeister.dev"
|
||||
|
||||
mv false_positive_exclusions.txt false_positive_exclusions.txt.tmp
|
||||
|
||||
git add -f false_positive_exclusions.txt.tmp # -f required to override .gitignore
|
||||
git stash push -m "stash false positive exclusion list" -- false_positive_exclusions.txt.tmp
|
||||
|
||||
git fetch origin exclusions || true # Allows creation of branch if deleted
|
||||
git checkout -B exclusions origin/exclusions || (git checkout --orphan exclusions && git rm -rf .)
|
||||
|
||||
git stash pop || true
|
||||
|
||||
mv false_positive_exclusions.txt.tmp false_positive_exclusions.txt
|
||||
|
||||
git rm -f false_positive_exclusions.txt.tmp || true
|
||||
git add false_positive_exclusions.txt
|
||||
git commit -m "auto: update exclusions list" || echo "No changes to commit"
|
||||
git push origin exclusions
|
||||
@@ -1,94 +0,0 @@
|
||||
name: Regression Testing
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
- release/**
|
||||
paths:
|
||||
- '.github/workflows/regression.yml'
|
||||
- '**/*.json'
|
||||
- '**/*.py'
|
||||
- '**/*.ini'
|
||||
- '**/*.toml'
|
||||
- 'Dockerfile'
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- release/**
|
||||
paths:
|
||||
- '.github/workflows/regression.yml'
|
||||
- '**/*.json'
|
||||
- '**/*.py'
|
||||
- '**/*.ini'
|
||||
- '**/*.toml'
|
||||
- 'Dockerfile'
|
||||
|
||||
jobs:
|
||||
tox-lint:
|
||||
runs-on: ubuntu-latest
|
||||
# Linting is run through tox to ensure that the same linter
|
||||
# is used by local runners
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- name: Set up linting environment
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Install tox and related dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install tox
|
||||
- name: Run tox linting environment
|
||||
run: tox -e lint
|
||||
tox-matrix:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
# We want to know what specific versions it fails on
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [
|
||||
ubuntu-latest,
|
||||
windows-latest,
|
||||
macos-latest,
|
||||
]
|
||||
python-version: [
|
||||
'3.10',
|
||||
'3.11',
|
||||
'3.12',
|
||||
'3.13',
|
||||
'3.14',
|
||||
'3.14t',
|
||||
]
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- name: Set up environment ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install tox and related dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install tox
|
||||
pip install tox-gh-actions
|
||||
- name: Run tox
|
||||
run: tox
|
||||
docker-build-test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v6
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Get version from pyproject.toml
|
||||
id: get-version
|
||||
run: |
|
||||
VERSION=$(grep -m1 'version = ' pyproject.toml | cut -d'"' -f2)
|
||||
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||
- name: Build Docker image
|
||||
run: |
|
||||
docker build \
|
||||
--build-arg VERSION_TAG=${{ steps.get-version.outputs.version }} \
|
||||
-t sherlock-test:latest .
|
||||
- name: Test Docker image runs
|
||||
run: docker run --rm sherlock-test:latest --version
|
||||
@@ -1,46 +0,0 @@
|
||||
name: Update Site List
|
||||
|
||||
# Trigger the workflow when changes are pushed to the main branch
|
||||
# and the changes include the sherlock_project/resources/data.json file
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- sherlock_project/resources/data.json
|
||||
|
||||
jobs:
|
||||
sync-json-data:
|
||||
# Use the latest version of Ubuntu as the runner environment
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
# Check out the code at the specified pull request head commit
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
fetch-depth: 0
|
||||
|
||||
# Install Python 3
|
||||
- name: Install Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.x'
|
||||
|
||||
# Execute the site_list.py Python script
|
||||
- name: Execute site-list.py
|
||||
run: python devel/site-list.py
|
||||
|
||||
- name: Pushes to another repository
|
||||
uses: sdushantha/github-action-push-to-another-repository@main
|
||||
env:
|
||||
SSH_DEPLOY_KEY: ${{ secrets.SSH_DEPLOY_KEY }}
|
||||
API_TOKEN_GITHUB: ${{ secrets.API_TOKEN_GITHUB }}
|
||||
with:
|
||||
source-directory: 'output'
|
||||
destination-github-username: 'sherlock-project'
|
||||
commit-message: 'Updated site list'
|
||||
destination-repository-name: 'sherlockproject.xyz'
|
||||
user-email: siddharth.dushantha@gmail.com
|
||||
target-branch: master
|
||||
@@ -1,127 +0,0 @@
|
||||
name: Modified Target Validation
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- "sherlock_project/resources/data.json"
|
||||
|
||||
jobs:
|
||||
validate-modified-targets:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
# Checkout the base branch but fetch all history to avoid a second fetch call
|
||||
ref: ${{ github.base_ref }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.13"
|
||||
|
||||
- name: Install Poetry
|
||||
uses: abatilo/actions-poetry@v4
|
||||
with:
|
||||
poetry-version: "latest"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install --no-interaction --with dev
|
||||
|
||||
- name: Prepare JSON versions for comparison
|
||||
run: |
|
||||
# Fetch only the PR's branch head (single network call in this step)
|
||||
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr
|
||||
|
||||
# Find the merge-base commit between the target branch and the PR branch
|
||||
MERGE_BASE=$(git merge-base origin/${{ github.base_ref }} pr)
|
||||
echo "Comparing PR head against merge-base commit: $MERGE_BASE"
|
||||
|
||||
# Safely extract the file from the PR's head and the merge-base commit
|
||||
git show pr:sherlock_project/resources/data.json > data.json.head
|
||||
git show $MERGE_BASE:sherlock_project/resources/data.json > data.json.base
|
||||
|
||||
# CRITICAL FIX: Overwrite the checked-out data.json with the one from the PR
|
||||
# This ensures that pytest runs against the new, updated file.
|
||||
cp data.json.head sherlock_project/resources/data.json
|
||||
|
||||
- name: Discover modified targets
|
||||
id: discover-modified
|
||||
run: |
|
||||
CHANGED=$(
|
||||
python - <<'EOF'
|
||||
import json
|
||||
import sys
|
||||
try:
|
||||
with open("data.json.base") as f: base = json.load(f)
|
||||
with open("data.json.head") as f: head = json.load(f)
|
||||
except FileNotFoundError as e:
|
||||
print(f"Error: Could not find {e.filename}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error: Could not decode JSON from a file - {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
changed = []
|
||||
for k, v in head.items():
|
||||
if k not in base or base[k] != v:
|
||||
changed.append(k)
|
||||
|
||||
print(",".join(sorted(changed)))
|
||||
EOF
|
||||
)
|
||||
|
||||
# Preserve changelist
|
||||
echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
|
||||
echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Validate remote manifest against local schema
|
||||
if: steps.discover-modified.outputs.changed_targets != ''
|
||||
run: |
|
||||
poetry run pytest tests/test_manifest.py::test_validate_manifest_against_local_schema
|
||||
|
||||
# --- The rest of the steps below are unchanged ---
|
||||
|
||||
- name: Validate modified targets
|
||||
env:
|
||||
CHANGED_TARGETS: ${{ steps.discover-modified.outputs.changed_targets }}
|
||||
run: |
|
||||
poetry run pytest -q --tb no -rA -m validate_targets -n 20 \
|
||||
--chunked-sites "$CHANGED_TARGETS" \
|
||||
--junitxml=validation_results.xml
|
||||
|
||||
- name: Prepare validation summary
|
||||
if: steps.discover-modified.outputs.changed_targets != ''
|
||||
id: prepare-summary
|
||||
run: |
|
||||
summary=$(
|
||||
poetry run python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results"
|
||||
)
|
||||
echo "$summary" > validation_summary.md
|
||||
|
||||
- name: Announce validation results
|
||||
if: steps.discover-modified.outputs.changed_targets != ''
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const body = fs.readFileSync('validation_summary.md', 'utf8');
|
||||
await github.rest.issues.createComment({
|
||||
issue_number: context.payload.pull_request.number,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body: body,
|
||||
});
|
||||
|
||||
- name: This step shows as ran when no modifications are found
|
||||
if: steps.discover-modified.outputs.changed_targets == ''
|
||||
run: |
|
||||
echo "No modified targets found"
|
||||
-47
@@ -1,47 +0,0 @@
|
||||
# Virtual Environments
|
||||
venv/
|
||||
bin/
|
||||
lib/
|
||||
pyvenv.cfg
|
||||
poetry.lock
|
||||
|
||||
# Regression Testing
|
||||
.coverage
|
||||
.tox/
|
||||
|
||||
# Editor Configurations
|
||||
.vscode/
|
||||
.idea/
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
|
||||
# Pip
|
||||
src/
|
||||
|
||||
# Devel, Build, and Installation
|
||||
*.egg-info/
|
||||
dist/**
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
*.ipynb
|
||||
|
||||
# Output files, except requirements.txt
|
||||
*.txt
|
||||
!requirements.txt
|
||||
|
||||
# Comma-Separated Values (CSV) Reports
|
||||
*.csv
|
||||
|
||||
#XLSX Reports
|
||||
*.xlsx
|
||||
|
||||
# Excluded sites list
|
||||
tests/.excluded_sites
|
||||
|
||||
# MacOS Folder Metadata File
|
||||
.DS_Store
|
||||
|
||||
# Vim swap files
|
||||
*.swp
|
||||
-31
@@ -1,31 +0,0 @@
|
||||
# Release instructions:
|
||||
# 1. Update the version tag in the Dockerfile to match the version in sherlock/__init__.py
|
||||
# 2. Update the VCS_REF tag to match the tagged version's FULL commit hash
|
||||
# 3. Build image with BOTH latest and version tags
|
||||
# i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .`
|
||||
|
||||
FROM python:3.12-slim-bullseye AS build
|
||||
WORKDIR /sherlock
|
||||
|
||||
RUN pip3 install --no-cache-dir --upgrade pip
|
||||
|
||||
FROM python:3.12-slim-bullseye
|
||||
WORKDIR /sherlock
|
||||
|
||||
ARG VCS_REF= # CHANGE ME ON UPDATE
|
||||
ARG VCS_URL="https://github.com/sherlock-project/sherlock"
|
||||
ARG VERSION_TAG= # CHANGE ME ON UPDATE
|
||||
|
||||
ENV SHERLOCK_ENV=docker
|
||||
|
||||
LABEL org.label-schema.vcs-ref=$VCS_REF \
|
||||
org.label-schema.vcs-url=$VCS_URL \
|
||||
org.label-schema.name="Sherlock" \
|
||||
org.label-schema.version=$VERSION_TAG \
|
||||
website="https://sherlockproject.xyz"
|
||||
|
||||
RUN pip3 install --no-cache-dir sherlock-project==$VERSION_TAG
|
||||
|
||||
WORKDIR /sherlock
|
||||
|
||||
ENTRYPOINT ["sherlock"]
|
||||
@@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 Sherlock Project
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -1,45 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# This module generates the listing of supported sites which can be found in
|
||||
# sites.mdx. It also organizes all the sites in alphanumeric order
|
||||
import json
|
||||
import os
|
||||
|
||||
DATA_REL_URI: str = "sherlock_project/resources/data.json"
|
||||
|
||||
DEFAULT_ENCODING = "utf-8"
|
||||
|
||||
# Read the data.json file
|
||||
with open(DATA_REL_URI, "r", encoding=DEFAULT_ENCODING) as data_file:
|
||||
data: dict = json.load(data_file)
|
||||
|
||||
# Removes schema-specific keywords for proper processing
|
||||
social_networks = data.copy()
|
||||
social_networks.pop('$schema', None)
|
||||
|
||||
# Sort the social networks in alphanumeric order
|
||||
social_networks = sorted(social_networks.items())
|
||||
|
||||
# Make output dir where the site list will be written
|
||||
os.mkdir("output")
|
||||
|
||||
# Write the list of supported sites to sites.mdx
|
||||
with open("output/sites.mdx", "w", encoding=DEFAULT_ENCODING) as site_file:
|
||||
site_file.write("---\n")
|
||||
site_file.write("title: 'List of supported sites'\n")
|
||||
site_file.write("sidebarTitle: 'Supported sites'\n")
|
||||
site_file.write("icon: 'globe'\n")
|
||||
site_file.write("description: 'Sherlock currently supports **400+** sites'\n")
|
||||
site_file.write("---\n\n")
|
||||
|
||||
for social_network, info in social_networks:
|
||||
url_main = info["urlMain"]
|
||||
is_nsfw = "**(NSFW)**" if info.get("isNSFW") else ""
|
||||
site_file.write(f"1. [{social_network}]({url_main}) {is_nsfw}\n")
|
||||
|
||||
# Overwrite the data.json file with sorted data
|
||||
with open(DATA_REL_URI, "w", encoding=DEFAULT_ENCODING) as data_file:
|
||||
sorted_data = json.dumps(data, indent=2, sort_keys=True)
|
||||
data_file.write(sorted_data)
|
||||
data_file.write("\n") # Keep the newline after writing data
|
||||
|
||||
print("Finished updating supported site listing!")
|
||||
@@ -1,72 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# This module summarizes the results of site validation tests queued by
|
||||
# workflow validate_modified_targets for presentation in Issue comments.
|
||||
|
||||
from defusedxml import ElementTree as ET
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
def summarize_junit_xml(xml_path: Path) -> str:
|
||||
tree = ET.parse(xml_path)
|
||||
root = tree.getroot()
|
||||
suite = root.find('testsuite')
|
||||
|
||||
pass_message: str = ":heavy_check_mark: Pass"
|
||||
fail_message: str = ":x: Fail"
|
||||
|
||||
if suite is None:
|
||||
raise ValueError("Invalid JUnit XML: No testsuite found")
|
||||
|
||||
summary_lines: list[str] = []
|
||||
summary_lines.append("#### Automatic validation of changes\n")
|
||||
summary_lines.append("| Target | F+ Check | F- Check |")
|
||||
summary_lines.append("|---|---|---|")
|
||||
|
||||
failures = int(suite.get('failures', 0))
|
||||
errors_detected: bool = False
|
||||
|
||||
results: dict[str, dict[str, str]] = {}
|
||||
|
||||
for testcase in suite.findall('testcase'):
|
||||
test_name = testcase.get('name').split('[')[0]
|
||||
site_name = testcase.get('name').split('[')[1].rstrip(']')
|
||||
failure = testcase.find('failure')
|
||||
error = testcase.find('error')
|
||||
|
||||
if site_name not in results:
|
||||
results[site_name] = {}
|
||||
|
||||
if test_name == "test_false_neg":
|
||||
results[site_name]['F- Check'] = pass_message if failure is None and error is None else fail_message
|
||||
elif test_name == "test_false_pos":
|
||||
results[site_name]['F+ Check'] = pass_message if failure is None and error is None else fail_message
|
||||
|
||||
if error is not None:
|
||||
errors_detected = True
|
||||
|
||||
for result in results:
|
||||
summary_lines.append(f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |")
|
||||
|
||||
if failures > 0:
|
||||
summary_lines.append("\n___\n" +
|
||||
"\nFailures were detected on at least one updated target. Commits containing accuracy failures" +
|
||||
" will often not be merged (unless a rationale is provided, such as false negatives due to regional differences).")
|
||||
|
||||
if errors_detected:
|
||||
summary_lines.append("\n___\n" +
|
||||
"\n**Errors were detected during validation. Please review the workflow logs.**")
|
||||
|
||||
return "\n".join(summary_lines)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: summarize_site_validation.py <junit-xml-file>")
|
||||
sys.exit(1)
|
||||
|
||||
xml_path: Path = Path(sys.argv[1])
|
||||
if not xml_path.is_file():
|
||||
print(f"Error: File '{xml_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
summary: str = summarize_junit_xml(xml_path)
|
||||
print(summary)
|
||||
@@ -1,130 +0,0 @@
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
## Our Pledge
|
||||
|
||||
We as members, contributors, and leaders pledge to make participation in our
|
||||
community a harassment-free experience for everyone, regardless of age, body
|
||||
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
||||
identity and expression, level of experience, education, socio-economic status,
|
||||
nationality, personal appearance, race, caste, color, religion, or sexual
|
||||
identity and orientation.
|
||||
|
||||
We pledge to act and interact in ways that contribute to an open, welcoming,
|
||||
diverse, inclusive, and healthy community.
|
||||
## Our Standards
|
||||
|
||||
Examples of behavior that contributes to a positive environment for our
|
||||
community include:
|
||||
|
||||
* Demonstrating empathy and kindness toward other people
|
||||
* Being respectful of differing opinions, viewpoints, and experiences
|
||||
* Giving and gracefully accepting constructive feedback
|
||||
* Accepting responsibility and apologizing to those affected by our mistakes,
|
||||
and learning from the experience
|
||||
* Focusing on what is best not just for us as individuals, but for the overall
|
||||
community
|
||||
|
||||
Examples of unacceptable behavior include:
|
||||
|
||||
* The use of sexualized language or imagery, and sexual attention or advances of
|
||||
any kind
|
||||
* Trolling, insulting or derogatory comments, and personal or political attacks
|
||||
* Public or private harassment
|
||||
* Publishing others' private information, such as a physical or email address,
|
||||
without their explicit permission
|
||||
* Other conduct which could reasonably be considered inappropriate in a
|
||||
professional setting
|
||||
|
||||
## Enforcement Responsibilities
|
||||
|
||||
Community leaders are responsible for clarifying and enforcing our standards of
|
||||
acceptable behavior and will take appropriate and fair corrective action in
|
||||
response to any behavior that they deem inappropriate, threatening, offensive,
|
||||
or harmful.
|
||||
|
||||
Community leaders have the right and responsibility to remove, edit, or reject
|
||||
comments, commits, code, wiki edits, issues, and other contributions that are
|
||||
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
||||
decisions when appropriate.
|
||||
|
||||
## Scope
|
||||
|
||||
This Code of Conduct applies within all community spaces, and also applies when
|
||||
an individual is officially representing the community in public spaces.
|
||||
Examples of representing our community include using an official e-mail address,
|
||||
posting via an official social media account, or acting as an appointed
|
||||
representative at an online or offline event.
|
||||
|
||||
## Enforcement
|
||||
|
||||
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
||||
reported to the community leaders responsible for enforcement at yahya.arbabi@gmail.com.
|
||||
All complaints will be reviewed and investigated promptly and fairly.
|
||||
|
||||
All community leaders are obligated to respect the privacy and security of the
|
||||
reporter of any incident.
|
||||
|
||||
## Enforcement Guidelines
|
||||
|
||||
Community leaders will follow these Community Impact Guidelines in determining
|
||||
the consequences for any action they deem in violation of this Code of Conduct:
|
||||
|
||||
### 1. Correction
|
||||
|
||||
**Community Impact**: Use of inappropriate language or other behavior deemed
|
||||
unprofessional or unwelcome in the community.
|
||||
|
||||
**Consequence**: A private, written warning from community leaders, providing
|
||||
clarity around the nature of the violation and an explanation of why the
|
||||
behavior was inappropriate. A public apology may be requested.
|
||||
|
||||
### 2. Warning
|
||||
|
||||
**Community Impact**: A violation through a single incident or series of
|
||||
actions.
|
||||
|
||||
**Consequence**: A warning with consequences for continued behavior. No
|
||||
interaction with the people involved, including unsolicited interaction with
|
||||
those enforcing the Code of Conduct, for a specified period of time. This
|
||||
includes avoiding interactions in community spaces as well as external channels
|
||||
like social media. Violating these terms may lead to a temporary or permanent
|
||||
ban.
|
||||
|
||||
### 3. Temporary Ban
|
||||
|
||||
**Community Impact**: A serious violation of community standards, including
|
||||
sustained inappropriate behavior.
|
||||
|
||||
**Consequence**: A temporary ban from any sort of interaction or public
|
||||
communication with the community for a specified period of time. No public or
|
||||
private interaction with the people involved, including unsolicited interaction
|
||||
with those enforcing the Code of Conduct, is allowed during this period.
|
||||
Violating these terms may lead to a permanent ban.
|
||||
|
||||
### 4. Permanent Ban
|
||||
|
||||
**Community Impact**: Demonstrating a pattern of violation of community
|
||||
standards, including sustained inappropriate behavior, harassment of an
|
||||
individual, or aggression toward or disparagement of classes of individuals.
|
||||
|
||||
**Consequence**: A permanent ban from any sort of public interaction within the
|
||||
community.
|
||||
|
||||
## Attribution
|
||||
|
||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
||||
version 2.1, available at
|
||||
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
|
||||
|
||||
Community Impact Guidelines were inspired by
|
||||
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
||||
|
||||
For answers to common questions about this code of conduct, see the FAQ at
|
||||
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
|
||||
[https://www.contributor-covenant.org/translations][translations].
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
||||
[Mozilla CoC]: https://github.com/mozilla/diversity
|
||||
[FAQ]: https://www.contributor-covenant.org/faq
|
||||
[translations]: https://www.contributor-covenant.org/translations
|
||||
-115
@@ -1,115 +0,0 @@
|
||||
<p align="center">
|
||||
<br>
|
||||
<a href="https://sherlock-project.github.io/" target="_blank"><img src="images/sherlock-logo.png" alt="sherlock"/></a>
|
||||
<br>
|
||||
<span>Hunt down social media accounts by username across <a href="https://sherlockproject.xyz/sites">400+ social networks</a></span>
|
||||
<br>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://sherlockproject.xyz/installation">Installation</a>
|
||||
•
|
||||
<a href="https://sherlockproject.xyz/usage">Usage</a>
|
||||
•
|
||||
<a href="https://sherlockproject.xyz/contribute">Contributing</a>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" height="70%" src="images/demo.png" alt="demo"/>
|
||||
</p>
|
||||
|
||||
|
||||
## Installation
|
||||
|
||||
> [!WARNING]
|
||||
> Packages for ParrotOS and Ubuntu 24.04, maintained by a third party, appear to be __broken__.
|
||||
> Users of these systems should defer to [`uv`](https://docs.astral.sh/uv/)/`pipx`/`pip` or Docker.
|
||||
|
||||
| Method | Notes |
|
||||
| - | - |
|
||||
| `pipx install sherlock-project` | `pip` or [`uv`](https://docs.astral.sh/uv/) may be used in place of `pipx` |
|
||||
| `docker run -it --rm sherlock/sherlock` |
|
||||
| `dnf install sherlock-project` | |
|
||||
|
||||
Community-maintained packages are available for Debian (>= 13), Ubuntu (>= 22.10), Homebrew, Kali, and BlackArch. These packages are not directly supported or maintained by the Sherlock Project.
|
||||
|
||||
See all alternative installation methods [here](https://sherlockproject.xyz/installation).
|
||||
|
||||
## General usage
|
||||
|
||||
To search for only one user:
|
||||
```bash
|
||||
sherlock user123
|
||||
```
|
||||
|
||||
To search for more than one user:
|
||||
```bash
|
||||
sherlock user1 user2 user3
|
||||
```
|
||||
|
||||
Accounts found will be stored in an individual text file with the corresponding username (e.g ```user123.txt```).
|
||||
|
||||
```console
|
||||
$ sherlock --help
|
||||
usage: sherlock [-h] [--version] [--verbose] [--folderoutput FOLDEROUTPUT] [--output OUTPUT] [--csv] [--xlsx] [--site SITE_NAME] [--proxy PROXY_URL] [--dump-response]
|
||||
[--json JSON_FILE] [--timeout TIMEOUT] [--print-all] [--print-found] [--no-color] [--browse] [--local] [--nsfw] [--txt] [--ignore-exclusions]
|
||||
USERNAMES [USERNAMES ...]
|
||||
|
||||
Sherlock: Find Usernames Across Social Networks (Version 0.16.0)
|
||||
|
||||
positional arguments:
|
||||
USERNAMES One or more usernames to check with social networks. Check similar usernames using {?} (replace to '_', '-', '.').
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--version Display version information and dependencies.
|
||||
--verbose, -v, -d, --debug
|
||||
Display extra debugging information and metrics.
|
||||
--folderoutput FOLDEROUTPUT, -fo FOLDEROUTPUT
|
||||
If using multiple usernames, the output of the results will be saved to this folder.
|
||||
--output OUTPUT, -o OUTPUT
|
||||
If using single username, the output of the result will be saved to this file.
|
||||
--csv Create Comma-Separated Values (CSV) File.
|
||||
--xlsx Create the standard file for the modern Microsoft Excel spreadsheet (xlsx).
|
||||
--site SITE_NAME Limit analysis to just the listed sites. Add multiple options to specify more than one site.
|
||||
--proxy PROXY_URL, -p PROXY_URL
|
||||
Make requests over a proxy. e.g. socks5://127.0.0.1:1080
|
||||
--dump-response Dump the HTTP response to stdout for targeted debugging.
|
||||
--json JSON_FILE, -j JSON_FILE
|
||||
Load data from a JSON file or an online, valid, JSON file. Upstream PR numbers also accepted.
|
||||
--timeout TIMEOUT Time (in seconds) to wait for response to requests (Default: 60)
|
||||
--print-all Output sites where the username was not found.
|
||||
--print-found Output sites where the username was found (also if exported as file).
|
||||
--no-color Don't color terminal output
|
||||
--browse, -b Browse to all results on default browser.
|
||||
--local, -l Force the use of the local data.json file.
|
||||
--nsfw Include checking of NSFW sites from default list.
|
||||
--txt Enable creation of a txt file
|
||||
--ignore-exclusions Ignore upstream exclusions (may return more false positives)
|
||||
```
|
||||
|
||||
## Credits
|
||||
|
||||
Thank you to everyone who has contributed to Sherlock! ❤️
|
||||
|
||||
<a href="https://github.com/sherlock-project/sherlock/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" alt="contributors"/>
|
||||
</a>
|
||||
|
||||
## Star History
|
||||
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date&theme=dark" />
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date" />
|
||||
<img alt="Sherlock Project Star History Chart" src="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date" />
|
||||
</picture>
|
||||
|
||||
## License
|
||||
|
||||
MIT © Sherlock Project<br/>
|
||||
Creator - [Siddharth Dushantha](https://github.com/sdushantha)
|
||||
|
||||
<!-- Reference Links -->
|
||||
|
||||
[ext_pypi]: https://pypi.org/project/sherlock-project/
|
||||
[ext_brew]: https://formulae.brew.sh/formula/sherlock
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 440 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 70 KiB |
@@ -1,42 +0,0 @@
|
||||
<!-- This README should be a mini version at all times for use on pypi -->
|
||||
|
||||
<p align=center>
|
||||
<br>
|
||||
<a href="https://sherlock-project.github.io/" target="_blank"><img src="https://www.kali.org/tools/sherlock/images/sherlock-logo.svg" width="25%"/></a>
|
||||
<br>
|
||||
<strong><span>Hunt down social media accounts by username across <a href="https://github.com/sherlock-project/sherlock/blob/master/sites.md">400+ social networks</a></span></strong>
|
||||
<br><br>
|
||||
<span>Additional documentation can be found at our <a href="https://github.com/sherlock-project/sherlock/">GitHub repository</a></span>
|
||||
<br>
|
||||
</p>
|
||||
|
||||
## Usage
|
||||
|
||||
```console
|
||||
$ sherlock --help
|
||||
usage: sherlock [-h] [--version] [--verbose] [--folderoutput FOLDEROUTPUT]
|
||||
[--output OUTPUT] [--tor] [--unique-tor] [--csv] [--xlsx]
|
||||
[--site SITE_NAME] [--proxy PROXY_URL] [--json JSON_FILE]
|
||||
[--timeout TIMEOUT] [--print-all] [--print-found] [--no-color]
|
||||
[--browse] [--local] [--nsfw]
|
||||
USERNAMES [USERNAMES ...]
|
||||
```
|
||||
|
||||
To search for only one user:
|
||||
```bash
|
||||
$ sherlock user123
|
||||
```
|
||||
|
||||
To search for more than one user:
|
||||
```bash
|
||||
$ sherlock user1 user2 user3
|
||||
```
|
||||
<br>
|
||||
|
||||
___
|
||||
|
||||
<br>
|
||||
<p align="center">
|
||||
<img width="70%" height="70%" src="https://user-images.githubusercontent.com/27065646/219638267-a5e11090-aa6e-4e77-87f7-0e95f6ad5978.png"/>
|
||||
</a>
|
||||
</p>
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,40 @@
|
||||
7Cups
|
||||
APClips
|
||||
Airliners
|
||||
Apple Discussions
|
||||
Archive.org
|
||||
Bandcamp
|
||||
BitBucket
|
||||
Codolio
|
||||
Discord.bio
|
||||
Envato Forum
|
||||
Giphy
|
||||
Hashnode
|
||||
Hubski
|
||||
LessWrong
|
||||
Motherless
|
||||
Patched
|
||||
Pornhub
|
||||
Rarible
|
||||
Realmeye
|
||||
Reddit
|
||||
RocketTube
|
||||
RuneScape
|
||||
Scribd
|
||||
Shelf
|
||||
SlideShare
|
||||
Smule
|
||||
Splice
|
||||
Spotify
|
||||
TryHackMe
|
||||
Velomania
|
||||
Weblate
|
||||
YandexMusic
|
||||
dailykos
|
||||
igromania
|
||||
interpals
|
||||
mercadolivre
|
||||
opennet
|
||||
phpRU
|
||||
svidbook
|
||||
xHamster
|
||||
@@ -1,68 +0,0 @@
|
||||
[build-system]
|
||||
requires = [ "poetry-core>=1.2.0" ]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
# poetry-core 1.8 not available in .fc39. Can upgrade to 1.8.0 at .fc39 EOL
|
||||
|
||||
[tool.poetry-version-plugin]
|
||||
source = "init"
|
||||
|
||||
[tool.poetry]
|
||||
name = "sherlock-project"
|
||||
version = "0.16.1"
|
||||
description = "Hunt down social media accounts by username across social networks"
|
||||
license = "MIT"
|
||||
authors = [
|
||||
"Siddharth Dushantha <siddharth.dushantha@gmail.com>"
|
||||
]
|
||||
maintainers = [
|
||||
"Paul Pfeister <code@pfeister.dev>",
|
||||
"Matheus Felipe <matheusfelipeog@protonmail.com>",
|
||||
"Sondre Karlsen Dyrnes <sondre@villdyr.no>"
|
||||
]
|
||||
readme = "docs/pyproject/README.md"
|
||||
packages = [ { include = "sherlock_project"} ]
|
||||
keywords = [ "osint", "reconnaissance", "information gathering" ]
|
||||
classifiers = [
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Information Technology",
|
||||
"Natural Language :: English",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Topic :: Security"
|
||||
]
|
||||
homepage = "https://sherlockproject.xyz/"
|
||||
repository = "https://github.com/sherlock-project/sherlock"
|
||||
|
||||
|
||||
[tool.poetry.urls]
|
||||
"Bug Tracker" = "https://github.com/sherlock-project/sherlock/issues"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.9"
|
||||
certifi = ">=2019.6.16"
|
||||
colorama = "^0.4.1"
|
||||
PySocks = "^1.7.0"
|
||||
requests = "^2.22.0"
|
||||
requests-futures = "^1.0.0"
|
||||
stem = "^1.8.0"
|
||||
pandas = "^2.2.1"
|
||||
openpyxl = "^3.0.10"
|
||||
tomli = "^2.2.1"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
jsonschema = "^4.0.0"
|
||||
rstr = "^3.2.2"
|
||||
pytest = "^8.4.2"
|
||||
pytest-xdist = "^3.8.0"
|
||||
|
||||
|
||||
[tool.poetry.group.ci.dependencies]
|
||||
defusedxml = "^0.7.1"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
sherlock = 'sherlock_project.sherlock:main'
|
||||
@@ -1,7 +0,0 @@
|
||||
[pytest]
|
||||
addopts = --strict-markers -m "not validate_targets"
|
||||
markers =
|
||||
online: mark tests are requiring internet access.
|
||||
validate_targets: mark tests for sweeping manifest validation (sends many requests).
|
||||
validate_targets_fp: validate_targets, false positive tests only.
|
||||
validate_targets_fn: validate_targets, false negative tests only.
|
||||
@@ -1,30 +0,0 @@
|
||||
""" Sherlock Module
|
||||
|
||||
This module contains the main logic to search for usernames at social
|
||||
networks.
|
||||
|
||||
"""
|
||||
|
||||
from importlib.metadata import version as pkg_version, PackageNotFoundError
|
||||
import pathlib
|
||||
import tomli
|
||||
|
||||
|
||||
def get_version() -> str:
|
||||
"""Fetch the version number of the installed package."""
|
||||
try:
|
||||
return pkg_version("sherlock_project")
|
||||
except PackageNotFoundError:
|
||||
pyproject_path: pathlib.Path = pathlib.Path(__file__).resolve().parent.parent / "pyproject.toml"
|
||||
with pyproject_path.open("rb") as f:
|
||||
pyproject_data = tomli.load(f)
|
||||
return pyproject_data["tool"]["poetry"]["version"]
|
||||
|
||||
# This variable is only used to check for ImportErrors induced by users running as script rather than as module or package
|
||||
import_error_test_var = None
|
||||
|
||||
__shortname__ = "Sherlock"
|
||||
__longname__ = "Sherlock: Find Usernames Across Social Networks"
|
||||
__version__ = get_version()
|
||||
|
||||
forge_api_latest_release = "https://api.github.com/repos/sherlock-project/sherlock/releases/latest"
|
||||
@@ -1,22 +0,0 @@
|
||||
#! /usr/bin/env python3
|
||||
|
||||
"""
|
||||
Sherlock: Find Usernames Across Social Networks Module
|
||||
|
||||
This module contains the main logic to search for usernames at social
|
||||
networks.
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check if the user is using the correct version of Python
|
||||
python_version = sys.version.split()[0]
|
||||
|
||||
if sys.version_info < (3, 9):
|
||||
print(f"Sherlock requires Python 3.9+\nYou are using Python {python_version}, which is not supported by Sherlock.")
|
||||
sys.exit(1)
|
||||
|
||||
from sherlock_project import sherlock
|
||||
sherlock.main()
|
||||
@@ -1,279 +0,0 @@
|
||||
"""Sherlock Notify Module
|
||||
|
||||
This module defines the objects for notifying the caller about the
|
||||
results of queries.
|
||||
"""
|
||||
from sherlock_project.result import QueryStatus
|
||||
from colorama import Fore, Style
|
||||
import webbrowser
|
||||
|
||||
# Global variable to count the number of results.
|
||||
globvar = 0
|
||||
|
||||
|
||||
class QueryNotify:
|
||||
"""Query Notify Object.
|
||||
|
||||
Base class that describes methods available to notify the results of
|
||||
a query.
|
||||
It is intended that other classes inherit from this base class and
|
||||
override the methods to implement specific functionality.
|
||||
"""
|
||||
|
||||
def __init__(self, result=None):
|
||||
"""Create Query Notify Object.
|
||||
|
||||
Contains information about a specific method of notifying the results
|
||||
of a query.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
result -- Object of type QueryResult() containing
|
||||
results for this query.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
self.result = result
|
||||
|
||||
|
||||
def start(self, message=None):
|
||||
"""Notify Start.
|
||||
|
||||
Notify method for start of query. This method will be called before
|
||||
any queries are performed. This method will typically be
|
||||
overridden by higher level classes that will inherit from it.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
message -- Object that is used to give context to start
|
||||
of query.
|
||||
Default is None.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
|
||||
def update(self, result):
|
||||
"""Notify Update.
|
||||
|
||||
Notify method for query result. This method will typically be
|
||||
overridden by higher level classes that will inherit from it.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
result -- Object of type QueryResult() containing
|
||||
results for this query.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
self.result = result
|
||||
|
||||
|
||||
def finish(self, message=None):
|
||||
"""Notify Finish.
|
||||
|
||||
Notify method for finish of query. This method will be called after
|
||||
all queries have been performed. This method will typically be
|
||||
overridden by higher level classes that will inherit from it.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
message -- Object that is used to give context to start
|
||||
of query.
|
||||
Default is None.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
|
||||
def __str__(self):
|
||||
"""Convert Object To String.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nicely formatted string to get information about this object.
|
||||
"""
|
||||
return str(self.result)
|
||||
|
||||
|
||||
class QueryNotifyPrint(QueryNotify):
|
||||
"""Query Notify Print Object.
|
||||
|
||||
Query notify class that prints results.
|
||||
"""
|
||||
|
||||
def __init__(self, result=None, verbose=False, print_all=False, browse=False):
|
||||
"""Create Query Notify Print Object.
|
||||
|
||||
Contains information about a specific method of notifying the results
|
||||
of a query.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
result -- Object of type QueryResult() containing
|
||||
results for this query.
|
||||
verbose -- Boolean indicating whether to give verbose output.
|
||||
print_all -- Boolean indicating whether to only print all sites, including not found.
|
||||
browse -- Boolean indicating whether to open found sites in a web browser.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
super().__init__(result)
|
||||
self.verbose = verbose
|
||||
self.print_all = print_all
|
||||
self.browse = browse
|
||||
|
||||
|
||||
def start(self, message):
|
||||
"""Notify Start.
|
||||
|
||||
Will print the title to the standard output.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
message -- String containing username that the series
|
||||
of queries are about.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
title = "Checking username"
|
||||
|
||||
print(Style.BRIGHT + Fore.GREEN + "[" +
|
||||
Fore.YELLOW + "*" +
|
||||
Fore.GREEN + f"] {title}" +
|
||||
Fore.WHITE + f" {message}" +
|
||||
Fore.GREEN + " on:")
|
||||
# An empty line between first line and the result(more clear output)
|
||||
print('\r')
|
||||
|
||||
|
||||
def countResults(self):
|
||||
"""This function counts the number of results. Every time the function is called,
|
||||
the number of results is increasing.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
The number of results by the time we call the function.
|
||||
"""
|
||||
global globvar
|
||||
globvar += 1
|
||||
return globvar
|
||||
|
||||
def update(self, result):
|
||||
"""Notify Update.
|
||||
|
||||
Will print the query result to the standard output.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
result -- Object of type QueryResult() containing
|
||||
results for this query.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
self.result = result
|
||||
|
||||
response_time_text = ""
|
||||
if self.result.query_time is not None and self.verbose is True:
|
||||
response_time_text = f" [{round(self.result.query_time * 1000)}ms]"
|
||||
|
||||
# Output to the terminal is desired.
|
||||
if result.status == QueryStatus.CLAIMED:
|
||||
self.countResults()
|
||||
print(Style.BRIGHT + Fore.WHITE + "[" +
|
||||
Fore.GREEN + "+" +
|
||||
Fore.WHITE + "]" +
|
||||
response_time_text +
|
||||
Fore.GREEN +
|
||||
f" {self.result.site_name}: " +
|
||||
Style.RESET_ALL +
|
||||
f"{self.result.site_url_user}")
|
||||
if self.browse:
|
||||
webbrowser.open(self.result.site_url_user, 2)
|
||||
|
||||
elif result.status == QueryStatus.AVAILABLE:
|
||||
if self.print_all:
|
||||
print(Style.BRIGHT + Fore.WHITE + "[" +
|
||||
Fore.RED + "-" +
|
||||
Fore.WHITE + "]" +
|
||||
response_time_text +
|
||||
Fore.GREEN + f" {self.result.site_name}:" +
|
||||
Fore.YELLOW + " Not Found!")
|
||||
|
||||
elif result.status == QueryStatus.UNKNOWN:
|
||||
if self.print_all:
|
||||
print(Style.BRIGHT + Fore.WHITE + "[" +
|
||||
Fore.RED + "-" +
|
||||
Fore.WHITE + "]" +
|
||||
Fore.GREEN + f" {self.result.site_name}:" +
|
||||
Fore.RED + f" {self.result.context}" +
|
||||
Fore.YELLOW + " ")
|
||||
|
||||
elif result.status == QueryStatus.ILLEGAL:
|
||||
if self.print_all:
|
||||
msg = "Illegal Username Format For This Site!"
|
||||
print(Style.BRIGHT + Fore.WHITE + "[" +
|
||||
Fore.RED + "-" +
|
||||
Fore.WHITE + "]" +
|
||||
Fore.GREEN + f" {self.result.site_name}:" +
|
||||
Fore.YELLOW + f" {msg}")
|
||||
|
||||
elif result.status == QueryStatus.WAF:
|
||||
if self.print_all:
|
||||
print(Style.BRIGHT + Fore.WHITE + "[" +
|
||||
Fore.RED + "-" +
|
||||
Fore.WHITE + "]" +
|
||||
Fore.GREEN + f" {self.result.site_name}:" +
|
||||
Fore.RED + " Blocked by bot detection" +
|
||||
Fore.YELLOW + " (proxy may help)")
|
||||
|
||||
else:
|
||||
# It should be impossible to ever get here...
|
||||
raise ValueError(
|
||||
f"Unknown Query Status '{result.status}' for site '{self.result.site_name}'"
|
||||
)
|
||||
|
||||
|
||||
def finish(self, message="The processing has been finished."):
|
||||
"""Notify Finish.
|
||||
Will print the last line to the standard output.
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
message -- The 2 last phrases.
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
NumberOfResults = self.countResults() - 1
|
||||
|
||||
print(Style.BRIGHT + Fore.GREEN + "[" +
|
||||
Fore.YELLOW + "*" +
|
||||
Fore.GREEN + "] Search completed with" +
|
||||
Fore.WHITE + f" {NumberOfResults} " +
|
||||
Fore.GREEN + "results" + Style.RESET_ALL
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
"""Convert Object To String.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nicely formatted string to get information about this object.
|
||||
"""
|
||||
return str(self.result)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,149 +0,0 @@
|
||||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"title": "Sherlock Target Manifest",
|
||||
"description": "Social media targets to probe for the existence of known usernames",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"$schema": { "type": "string" }
|
||||
},
|
||||
"patternProperties": {
|
||||
"^(?!\\$).*?$": {
|
||||
"type": "object",
|
||||
"description": "Target name and associated information (key should be human readable name)",
|
||||
"required": ["url", "urlMain", "errorType", "username_claimed"],
|
||||
"properties": {
|
||||
"url": { "type": "string" },
|
||||
"urlMain": { "type": "string" },
|
||||
"urlProbe": { "type": "string" },
|
||||
"username_claimed": { "type": "string" },
|
||||
"regexCheck": { "type": "string" },
|
||||
"isNSFW": { "type": "boolean" },
|
||||
"headers": { "type": "object" },
|
||||
"request_payload": { "type": "object" },
|
||||
"__comment__": {
|
||||
"type": "string",
|
||||
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
|
||||
},
|
||||
"tags": {
|
||||
"oneOf": [
|
||||
{ "$ref": "#/$defs/tag" },
|
||||
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
|
||||
]
|
||||
},
|
||||
"request_method": {
|
||||
"type": "string",
|
||||
"enum": ["GET", "POST", "HEAD", "PUT"]
|
||||
},
|
||||
"errorType": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"enum": ["message", "response_url", "status_code"]
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"enum": ["message", "response_url", "status_code"]
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"errorMsg": {
|
||||
"oneOf": [
|
||||
{ "type": "string" },
|
||||
{ "type": "array", "items": { "type": "string" } }
|
||||
]
|
||||
},
|
||||
"errorCode": {
|
||||
"oneOf": [
|
||||
{ "type": "integer" },
|
||||
{ "type": "array", "items": { "type": "integer" } }
|
||||
]
|
||||
},
|
||||
"errorUrl": { "type": "string" },
|
||||
"response_url": { "type": "string" }
|
||||
},
|
||||
"dependencies": {
|
||||
"errorMsg": {
|
||||
"oneOf": [
|
||||
{ "properties": { "errorType": { "const": "message" } } },
|
||||
{
|
||||
"properties": {
|
||||
"errorType": {
|
||||
"type": "array",
|
||||
"contains": { "const": "message" }
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"errorUrl": {
|
||||
"oneOf": [
|
||||
{ "properties": { "errorType": { "const": "response_url" } } },
|
||||
{
|
||||
"properties": {
|
||||
"errorType": {
|
||||
"type": "array",
|
||||
"contains": { "const": "response_url" }
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"errorCode": {
|
||||
"oneOf": [
|
||||
{ "properties": { "errorType": { "const": "status_code" } } },
|
||||
{
|
||||
"properties": {
|
||||
"errorType": {
|
||||
"type": "array",
|
||||
"contains": { "const": "status_code" }
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"allOf": [
|
||||
{
|
||||
"if": {
|
||||
"anyOf": [
|
||||
{ "properties": { "errorType": { "const": "message" } } },
|
||||
{
|
||||
"properties": {
|
||||
"errorType": {
|
||||
"type": "array",
|
||||
"contains": { "const": "message" }
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"then": { "required": ["errorMsg"] }
|
||||
},
|
||||
{
|
||||
"if": {
|
||||
"anyOf": [
|
||||
{ "properties": { "errorType": { "const": "response_url" } } },
|
||||
{
|
||||
"properties": {
|
||||
"errorType": {
|
||||
"type": "array",
|
||||
"contains": { "const": "response_url" }
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"then": { "required": ["errorUrl"] }
|
||||
}
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"$defs": {
|
||||
"tag": { "type": "string", "enum": ["adult", "gaming"] }
|
||||
}
|
||||
}
|
||||
@@ -1,89 +0,0 @@
|
||||
"""Sherlock Result Module
|
||||
|
||||
This module defines various objects for recording the results of queries.
|
||||
"""
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class QueryStatus(Enum):
|
||||
"""Query Status Enumeration.
|
||||
|
||||
Describes status of query about a given username.
|
||||
"""
|
||||
CLAIMED = "Claimed" # Username Detected
|
||||
AVAILABLE = "Available" # Username Not Detected
|
||||
UNKNOWN = "Unknown" # Error Occurred While Trying To Detect Username
|
||||
ILLEGAL = "Illegal" # Username Not Allowable For This Site
|
||||
WAF = "WAF" # Request blocked by WAF (i.e. Cloudflare)
|
||||
|
||||
def __str__(self):
|
||||
"""Convert Object To String.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nicely formatted string to get information about this object.
|
||||
"""
|
||||
return self.value
|
||||
|
||||
class QueryResult():
|
||||
"""Query Result Object.
|
||||
|
||||
Describes result of query about a given username.
|
||||
"""
|
||||
def __init__(self, username, site_name, site_url_user, status,
|
||||
query_time=None, context=None):
|
||||
"""Create Query Result Object.
|
||||
|
||||
Contains information about a specific method of detecting usernames on
|
||||
a given type of web sites.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
username -- String indicating username that query result
|
||||
was about.
|
||||
site_name -- String which identifies site.
|
||||
site_url_user -- String containing URL for username on site.
|
||||
NOTE: The site may or may not exist: this
|
||||
just indicates what the name would
|
||||
be, if it existed.
|
||||
status -- Enumeration of type QueryStatus() indicating
|
||||
the status of the query.
|
||||
query_time -- Time (in seconds) required to perform query.
|
||||
Default of None.
|
||||
context -- String indicating any additional context
|
||||
about the query. For example, if there was
|
||||
an error, this might indicate the type of
|
||||
error that occurred.
|
||||
Default of None.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
self.username = username
|
||||
self.site_name = site_name
|
||||
self.site_url_user = site_url_user
|
||||
self.status = status
|
||||
self.query_time = query_time
|
||||
self.context = context
|
||||
|
||||
return
|
||||
|
||||
def __str__(self):
|
||||
"""Convert Object To String.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nicely formatted string to get information about this object.
|
||||
"""
|
||||
status = str(self.status)
|
||||
if self.context is not None:
|
||||
# There is extra context information available about the results.
|
||||
# Append it to the normal response text.
|
||||
status += f" ({self.context})"
|
||||
|
||||
return status
|
||||
@@ -1,935 +0,0 @@
|
||||
#! /usr/bin/env python3
|
||||
|
||||
"""
|
||||
Sherlock: Find Usernames Across Social Networks Module
|
||||
|
||||
This module contains the main logic to search for usernames at social
|
||||
networks.
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
try:
|
||||
from sherlock_project.__init__ import import_error_test_var # noqa: F401
|
||||
except ImportError:
|
||||
print("Did you run Sherlock with `python3 sherlock/sherlock.py ...`?")
|
||||
print("This is an outdated method. Please see https://sherlockproject.xyz/installation for up to date instructions.")
|
||||
sys.exit(1)
|
||||
|
||||
import csv
|
||||
import signal
|
||||
import pandas as pd
|
||||
import os
|
||||
import re
|
||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||
from json import loads as json_loads
|
||||
from time import monotonic
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
from requests_futures.sessions import FuturesSession
|
||||
|
||||
from sherlock_project.__init__ import (
|
||||
__longname__,
|
||||
__shortname__,
|
||||
__version__,
|
||||
forge_api_latest_release,
|
||||
)
|
||||
|
||||
from sherlock_project.result import QueryStatus
|
||||
from sherlock_project.result import QueryResult
|
||||
from sherlock_project.notify import QueryNotify
|
||||
from sherlock_project.notify import QueryNotifyPrint
|
||||
from sherlock_project.sites import SitesInformation
|
||||
from colorama import init
|
||||
from argparse import ArgumentTypeError
|
||||
|
||||
|
||||
class SherlockFuturesSession(FuturesSession):
|
||||
def request(self, method, url, hooks=None, *args, **kwargs):
|
||||
"""Request URL.
|
||||
|
||||
This extends the FuturesSession request method to calculate a response
|
||||
time metric to each request.
|
||||
|
||||
It is taken (almost) directly from the following Stack Overflow answer:
|
||||
https://github.com/ross/requests-futures#working-in-the-background
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
method -- String containing method desired for request.
|
||||
url -- String containing URL for request.
|
||||
hooks -- Dictionary containing hooks to execute after
|
||||
request finishes.
|
||||
args -- Arguments.
|
||||
kwargs -- Keyword arguments.
|
||||
|
||||
Return Value:
|
||||
Request object.
|
||||
"""
|
||||
# Record the start time for the request.
|
||||
if hooks is None:
|
||||
hooks = {}
|
||||
start = monotonic()
|
||||
|
||||
def response_time(resp, *args, **kwargs):
|
||||
"""Response Time Hook.
|
||||
|
||||
Keyword Arguments:
|
||||
resp -- Response object.
|
||||
args -- Arguments.
|
||||
kwargs -- Keyword arguments.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
resp.elapsed = monotonic() - start
|
||||
|
||||
return
|
||||
|
||||
# Install hook to execute when response completes.
|
||||
# Make sure that the time measurement hook is first, so we will not
|
||||
# track any later hook's execution time.
|
||||
try:
|
||||
if isinstance(hooks["response"], list):
|
||||
hooks["response"].insert(0, response_time)
|
||||
elif isinstance(hooks["response"], tuple):
|
||||
# Convert tuple to list and insert time measurement hook first.
|
||||
hooks["response"] = list(hooks["response"])
|
||||
hooks["response"].insert(0, response_time)
|
||||
else:
|
||||
# Must have previously contained a single hook function,
|
||||
# so convert to list.
|
||||
hooks["response"] = [response_time, hooks["response"]]
|
||||
except KeyError:
|
||||
# No response hook was already defined, so install it ourselves.
|
||||
hooks["response"] = [response_time]
|
||||
|
||||
return super(SherlockFuturesSession, self).request(
|
||||
method, url, hooks=hooks, *args, **kwargs
|
||||
)
|
||||
|
||||
|
||||
def get_response(request_future, error_type, social_network):
|
||||
# Default for Response object if some failure occurs.
|
||||
response = None
|
||||
|
||||
error_context = "General Unknown Error"
|
||||
exception_text = None
|
||||
try:
|
||||
response = request_future.result()
|
||||
if response.status_code:
|
||||
# Status code exists in response object
|
||||
error_context = None
|
||||
except requests.exceptions.HTTPError as errh:
|
||||
error_context = "HTTP Error"
|
||||
exception_text = str(errh)
|
||||
except requests.exceptions.ProxyError as errp:
|
||||
error_context = "Proxy Error"
|
||||
exception_text = str(errp)
|
||||
except requests.exceptions.ConnectionError as errc:
|
||||
error_context = "Error Connecting"
|
||||
exception_text = str(errc)
|
||||
except requests.exceptions.Timeout as errt:
|
||||
error_context = "Timeout Error"
|
||||
exception_text = str(errt)
|
||||
except requests.exceptions.RequestException as err:
|
||||
error_context = "Unknown Error"
|
||||
exception_text = str(err)
|
||||
except UnicodeError as err:
|
||||
error_context = "Encoding Error"
|
||||
exception_text = str(err)
|
||||
|
||||
return response, error_context, exception_text
|
||||
|
||||
|
||||
def interpolate_string(input_object, username):
|
||||
if isinstance(input_object, str):
|
||||
return input_object.replace("{}", username)
|
||||
elif isinstance(input_object, dict):
|
||||
return {k: interpolate_string(v, username) for k, v in input_object.items()}
|
||||
elif isinstance(input_object, list):
|
||||
return [interpolate_string(i, username) for i in input_object]
|
||||
return input_object
|
||||
|
||||
|
||||
def check_for_parameter(username):
|
||||
"""checks if {?} exists in the username
|
||||
if exist it means that sherlock is looking for more multiple username"""
|
||||
return "{?}" in username
|
||||
|
||||
|
||||
checksymbols = ["_", "-", "."]
|
||||
|
||||
|
||||
def multiple_usernames(username):
|
||||
"""replace the parameter with with symbols and return a list of usernames"""
|
||||
allUsernames = []
|
||||
for i in checksymbols:
|
||||
allUsernames.append(username.replace("{?}", i))
|
||||
return allUsernames
|
||||
|
||||
|
||||
def sherlock(
|
||||
username: str,
|
||||
site_data: dict[str, dict[str, str]],
|
||||
query_notify: QueryNotify,
|
||||
dump_response: bool = False,
|
||||
proxy: Optional[str] = None,
|
||||
timeout: int = 60,
|
||||
) -> dict[str, dict[str, str | QueryResult]]:
|
||||
"""Run Sherlock Analysis.
|
||||
|
||||
Checks for existence of username on various social media sites.
|
||||
|
||||
Keyword Arguments:
|
||||
username -- String indicating username that report
|
||||
should be created against.
|
||||
site_data -- Dictionary containing all of the site data.
|
||||
query_notify -- Object with base type of QueryNotify().
|
||||
This will be used to notify the caller about
|
||||
query results.
|
||||
proxy -- String indicating the proxy URL
|
||||
timeout -- Time in seconds to wait before timing out request.
|
||||
Default is 60 seconds.
|
||||
|
||||
Return Value:
|
||||
Dictionary containing results from report. Key of dictionary is the name
|
||||
of the social network site, and the value is another dictionary with
|
||||
the following keys:
|
||||
url_main: URL of main site.
|
||||
url_user: URL of user on site (if account exists).
|
||||
status: QueryResult() object indicating results of test for
|
||||
account existence.
|
||||
http_status: HTTP status code of query which checked for existence on
|
||||
site.
|
||||
response_text: Text that came back from request. May be None if
|
||||
there was an HTTP error when checking for existence.
|
||||
"""
|
||||
|
||||
# Notify caller that we are starting the query.
|
||||
query_notify.start(username)
|
||||
|
||||
# Normal requests
|
||||
underlying_session = requests.session()
|
||||
|
||||
# Limit number of workers to 20.
|
||||
# This is probably vastly overkill.
|
||||
if len(site_data) >= 20:
|
||||
max_workers = 20
|
||||
else:
|
||||
max_workers = len(site_data)
|
||||
|
||||
# Create multi-threaded session for all requests.
|
||||
session = SherlockFuturesSession(
|
||||
max_workers=max_workers, session=underlying_session
|
||||
)
|
||||
|
||||
# Results from analysis of all sites
|
||||
results_total = {}
|
||||
|
||||
# First create futures for all requests. This allows for the requests to run in parallel
|
||||
for social_network, net_info in site_data.items():
|
||||
# Results from analysis of this specific site
|
||||
results_site = {"url_main": net_info.get("urlMain")}
|
||||
|
||||
# Record URL of main site
|
||||
|
||||
# A user agent is needed because some sites don't return the correct
|
||||
# information since they think that we are bots (Which we actually are...)
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:129.0) Gecko/20100101 Firefox/129.0",
|
||||
}
|
||||
|
||||
if "headers" in net_info:
|
||||
# Override/append any extra headers required by a given site.
|
||||
headers.update(net_info["headers"])
|
||||
|
||||
# URL of user on site (if it exists)
|
||||
url = interpolate_string(net_info["url"], username.replace(' ', '%20'))
|
||||
|
||||
# Don't make request if username is invalid for the site
|
||||
regex_check = net_info.get("regexCheck")
|
||||
if regex_check and re.search(regex_check, username) is None:
|
||||
# No need to do the check at the site: this username is not allowed.
|
||||
results_site["status"] = QueryResult(
|
||||
username, social_network, url, QueryStatus.ILLEGAL
|
||||
)
|
||||
results_site["url_user"] = ""
|
||||
results_site["http_status"] = ""
|
||||
results_site["response_text"] = ""
|
||||
query_notify.update(results_site["status"])
|
||||
else:
|
||||
# URL of user on site (if it exists)
|
||||
results_site["url_user"] = url
|
||||
url_probe = net_info.get("urlProbe")
|
||||
request_method = net_info.get("request_method")
|
||||
request_payload = net_info.get("request_payload")
|
||||
request = None
|
||||
|
||||
if request_method is not None:
|
||||
if request_method == "GET":
|
||||
request = session.get
|
||||
elif request_method == "HEAD":
|
||||
request = session.head
|
||||
elif request_method == "POST":
|
||||
request = session.post
|
||||
elif request_method == "PUT":
|
||||
request = session.put
|
||||
else:
|
||||
raise RuntimeError(f"Unsupported request_method for {url}")
|
||||
|
||||
if request_payload is not None:
|
||||
request_payload = interpolate_string(request_payload, username)
|
||||
|
||||
if url_probe is None:
|
||||
# Probe URL is normal one seen by people out on the web.
|
||||
url_probe = url
|
||||
else:
|
||||
# There is a special URL for probing existence separate
|
||||
# from where the user profile normally can be found.
|
||||
url_probe = interpolate_string(url_probe, username)
|
||||
|
||||
if request is None:
|
||||
if net_info["errorType"] == "status_code":
|
||||
# In most cases when we are detecting by status code,
|
||||
# it is not necessary to get the entire body: we can
|
||||
# detect fine with just the HEAD response.
|
||||
request = session.head
|
||||
else:
|
||||
# Either this detect method needs the content associated
|
||||
# with the GET response, or this specific website will
|
||||
# not respond properly unless we request the whole page.
|
||||
request = session.get
|
||||
|
||||
if net_info["errorType"] == "response_url":
|
||||
# Site forwards request to a different URL if username not
|
||||
# found. Disallow the redirect so we can capture the
|
||||
# http status from the original URL request.
|
||||
allow_redirects = False
|
||||
else:
|
||||
# Allow whatever redirect that the site wants to do.
|
||||
# The final result of the request will be what is available.
|
||||
allow_redirects = True
|
||||
|
||||
# This future starts running the request in a new thread, doesn't block the main thread
|
||||
if proxy is not None:
|
||||
proxies = {"http": proxy, "https": proxy}
|
||||
future = request(
|
||||
url=url_probe,
|
||||
headers=headers,
|
||||
proxies=proxies,
|
||||
allow_redirects=allow_redirects,
|
||||
timeout=timeout,
|
||||
json=request_payload,
|
||||
)
|
||||
else:
|
||||
future = request(
|
||||
url=url_probe,
|
||||
headers=headers,
|
||||
allow_redirects=allow_redirects,
|
||||
timeout=timeout,
|
||||
json=request_payload,
|
||||
)
|
||||
|
||||
# Store future in data for access later
|
||||
net_info["request_future"] = future
|
||||
|
||||
# Add this site's results into final dictionary with all the other results.
|
||||
results_total[social_network] = results_site
|
||||
|
||||
# Open the file containing account links
|
||||
for social_network, net_info in site_data.items():
|
||||
# Retrieve results again
|
||||
results_site = results_total.get(social_network)
|
||||
|
||||
# Retrieve other site information again
|
||||
url = results_site.get("url_user")
|
||||
status = results_site.get("status")
|
||||
if status is not None:
|
||||
# We have already determined the user doesn't exist here
|
||||
continue
|
||||
|
||||
# Get the expected error type
|
||||
error_type = net_info["errorType"]
|
||||
if isinstance(error_type, str):
|
||||
error_type: list[str] = [error_type]
|
||||
|
||||
# Retrieve future and ensure it has finished
|
||||
future = net_info["request_future"]
|
||||
r, error_text, exception_text = get_response(
|
||||
request_future=future, error_type=error_type, social_network=social_network
|
||||
)
|
||||
|
||||
# Get response time for response of our request.
|
||||
try:
|
||||
response_time = r.elapsed
|
||||
except AttributeError:
|
||||
response_time = None
|
||||
|
||||
# Attempt to get request information
|
||||
try:
|
||||
http_status = r.status_code
|
||||
except Exception:
|
||||
http_status = "?"
|
||||
try:
|
||||
response_text = r.text.encode(r.encoding or "UTF-8")
|
||||
except Exception:
|
||||
response_text = ""
|
||||
|
||||
query_status = QueryStatus.UNKNOWN
|
||||
error_context = None
|
||||
|
||||
# As WAFs advance and evolve, they will occasionally block Sherlock and
|
||||
# lead to false positives and negatives. Fingerprints should be added
|
||||
# here to filter results that fail to bypass WAFs. Fingerprints should
|
||||
# be highly targetted. Comment at the end of each fingerprint to
|
||||
# indicate target and date fingerprinted.
|
||||
WAFHitMsgs = [
|
||||
r'.loading-spinner{visibility:hidden}body.no-js .challenge-running{display:none}body.dark{background-color:#222;color:#d9d9d9}body.dark a{color:#fff}body.dark a:hover{color:#ee730a;text-decoration:underline}body.dark .lds-ring div{border-color:#999 transparent transparent}body.dark .font-red{color:#b20f03}body.dark', # 2024-05-13 Cloudflare
|
||||
r'<span id="challenge-error-text">', # 2024-11-11 Cloudflare error page
|
||||
r'AwsWafIntegration.forceRefreshToken', # 2024-11-11 Cloudfront (AWS)
|
||||
r'{return l.onPageView}}),Object.defineProperty(r,"perimeterxIdentifiers",{enumerable:' # 2024-04-09 PerimeterX / Human Security
|
||||
]
|
||||
|
||||
if error_text is not None:
|
||||
error_context = error_text
|
||||
|
||||
elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
|
||||
query_status = QueryStatus.WAF
|
||||
|
||||
else:
|
||||
if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
|
||||
error_context = f"Unknown error type '{error_type}' for {social_network}"
|
||||
query_status = QueryStatus.UNKNOWN
|
||||
else:
|
||||
if "message" in error_type:
|
||||
# error_flag True denotes no error found in the HTML
|
||||
# error_flag False denotes error found in the HTML
|
||||
error_flag = True
|
||||
errors = net_info.get("errorMsg")
|
||||
# errors will hold the error message
|
||||
# it can be string or list
|
||||
# by isinstance method we can detect that
|
||||
# and handle the case for strings as normal procedure
|
||||
# and if its list we can iterate the errors
|
||||
if isinstance(errors, str):
|
||||
# Checks if the error message is in the HTML
|
||||
# if error is present we will set flag to False
|
||||
if errors in r.text:
|
||||
error_flag = False
|
||||
else:
|
||||
# If it's list, it will iterate all the error message
|
||||
for error in errors:
|
||||
if error in r.text:
|
||||
error_flag = False
|
||||
break
|
||||
if error_flag:
|
||||
query_status = QueryStatus.CLAIMED
|
||||
else:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
|
||||
if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
|
||||
error_codes = net_info.get("errorCode")
|
||||
query_status = QueryStatus.CLAIMED
|
||||
|
||||
# Type consistency, allowing for both singlets and lists in manifest
|
||||
if isinstance(error_codes, int):
|
||||
error_codes = [error_codes]
|
||||
|
||||
if error_codes is not None and r.status_code in error_codes:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
elif r.status_code >= 300 or r.status_code < 200:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
|
||||
if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
|
||||
# For this detection method, we have turned off the redirect.
|
||||
# So, there is no need to check the response URL: it will always
|
||||
# match the request. Instead, we will ensure that the response
|
||||
# code indicates that the request was successful (i.e. no 404, or
|
||||
# forward to some odd redirect).
|
||||
if 200 <= r.status_code < 300:
|
||||
query_status = QueryStatus.CLAIMED
|
||||
else:
|
||||
query_status = QueryStatus.AVAILABLE
|
||||
|
||||
if dump_response:
|
||||
print("+++++++++++++++++++++")
|
||||
print(f"TARGET NAME : {social_network}")
|
||||
print(f"USERNAME : {username}")
|
||||
print(f"TARGET URL : {url}")
|
||||
print(f"TEST METHOD : {error_type}")
|
||||
try:
|
||||
print(f"STATUS CODES : {net_info['errorCode']}")
|
||||
except KeyError:
|
||||
pass
|
||||
print("Results...")
|
||||
try:
|
||||
print(f"RESPONSE CODE : {r.status_code}")
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
print(f"ERROR TEXT : {net_info['errorMsg']}")
|
||||
except KeyError:
|
||||
pass
|
||||
print(">>>>> BEGIN RESPONSE TEXT")
|
||||
try:
|
||||
print(r.text)
|
||||
except Exception:
|
||||
pass
|
||||
print("<<<<< END RESPONSE TEXT")
|
||||
print("VERDICT : " + str(query_status))
|
||||
print("+++++++++++++++++++++")
|
||||
|
||||
# Notify caller about results of query.
|
||||
result: QueryResult = QueryResult(
|
||||
username=username,
|
||||
site_name=social_network,
|
||||
site_url_user=url,
|
||||
status=query_status,
|
||||
query_time=response_time,
|
||||
context=error_context,
|
||||
)
|
||||
query_notify.update(result)
|
||||
|
||||
# Save status of request
|
||||
results_site["status"] = result
|
||||
|
||||
# Save results from request
|
||||
results_site["http_status"] = http_status
|
||||
results_site["response_text"] = response_text
|
||||
|
||||
# Add this site's results into final dictionary with all of the other results.
|
||||
results_total[social_network] = results_site
|
||||
|
||||
return results_total
|
||||
|
||||
|
||||
def timeout_check(value):
|
||||
"""Check Timeout Argument.
|
||||
|
||||
Checks timeout for validity.
|
||||
|
||||
Keyword Arguments:
|
||||
value -- Time in seconds to wait before timing out request.
|
||||
|
||||
Return Value:
|
||||
Floating point number representing the time (in seconds) that should be
|
||||
used for the timeout.
|
||||
|
||||
NOTE: Will raise an exception if the timeout in invalid.
|
||||
"""
|
||||
|
||||
float_value = float(value)
|
||||
|
||||
if float_value <= 0:
|
||||
raise ArgumentTypeError(
|
||||
f"Invalid timeout value: {value}. Timeout must be a positive number."
|
||||
)
|
||||
|
||||
return float_value
|
||||
|
||||
|
||||
def handler(signal_received, frame):
|
||||
"""Exit gracefully without throwing errors
|
||||
|
||||
Source: https://www.devdungeon.com/content/python-catch-sigint-ctrl-c
|
||||
"""
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
def main():
|
||||
parser = ArgumentParser(
|
||||
formatter_class=RawDescriptionHelpFormatter,
|
||||
description=f"{__longname__} (Version {__version__})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--version",
|
||||
action="version",
|
||||
version=f"{__shortname__} v{__version__}",
|
||||
help="Display version information and dependencies.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose",
|
||||
"-v",
|
||||
"-d",
|
||||
"--debug",
|
||||
action="store_true",
|
||||
dest="verbose",
|
||||
default=False,
|
||||
help="Display extra debugging information and metrics.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--folderoutput",
|
||||
"-fo",
|
||||
dest="folderoutput",
|
||||
help="If using multiple usernames, the output of the results will be saved to this folder.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
"-o",
|
||||
dest="output",
|
||||
help="If using single username, the output of the result will be saved to this file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--csv",
|
||||
action="store_true",
|
||||
dest="csv",
|
||||
default=False,
|
||||
help="Create Comma-Separated Values (CSV) File.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--xlsx",
|
||||
action="store_true",
|
||||
dest="xlsx",
|
||||
default=False,
|
||||
help="Create the standard file for the modern Microsoft Excel spreadsheet (xlsx).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--site",
|
||||
action="append",
|
||||
metavar="SITE_NAME",
|
||||
dest="site_list",
|
||||
default=[],
|
||||
help="Limit analysis to just the listed sites. Add multiple options to specify more than one site.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--proxy",
|
||||
"-p",
|
||||
metavar="PROXY_URL",
|
||||
action="store",
|
||||
dest="proxy",
|
||||
default=None,
|
||||
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dump-response",
|
||||
action="store_true",
|
||||
dest="dump_response",
|
||||
default=False,
|
||||
help="Dump the HTTP response to stdout for targeted debugging.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--json",
|
||||
"-j",
|
||||
metavar="JSON_FILE",
|
||||
dest="json_file",
|
||||
default=None,
|
||||
help="Load data from a JSON file or an online, valid, JSON file. Upstream PR numbers also accepted.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--timeout",
|
||||
action="store",
|
||||
metavar="TIMEOUT",
|
||||
dest="timeout",
|
||||
type=timeout_check,
|
||||
default=60,
|
||||
help="Time (in seconds) to wait for response to requests (Default: 60)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--print-all",
|
||||
action="store_true",
|
||||
dest="print_all",
|
||||
default=False,
|
||||
help="Output sites where the username was not found.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--print-found",
|
||||
action="store_true",
|
||||
dest="print_found",
|
||||
default=True,
|
||||
help="Output sites where the username was found (also if exported as file).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-color",
|
||||
action="store_true",
|
||||
dest="no_color",
|
||||
default=False,
|
||||
help="Don't color terminal output",
|
||||
)
|
||||
parser.add_argument(
|
||||
"username",
|
||||
nargs="+",
|
||||
metavar="USERNAMES",
|
||||
action="store",
|
||||
help="One or more usernames to check with social networks. Check similar usernames using {?} (replace to '_', '-', '.').",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--browse",
|
||||
"-b",
|
||||
action="store_true",
|
||||
dest="browse",
|
||||
default=False,
|
||||
help="Browse to all results on default browser.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--local",
|
||||
"-l",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Force the use of the local data.json file.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--nsfw",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Include checking of NSFW sites from default list.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--txt",
|
||||
action="store_true",
|
||||
dest="output_txt",
|
||||
default=False,
|
||||
help="Enable creation of a txt file",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--ignore-exclusions",
|
||||
action="store_true",
|
||||
dest="ignore_exclusions",
|
||||
default=False,
|
||||
help="Ignore upstream exclusions (may return more false positives)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# If the user presses CTRL-C, exit gracefully without throwing errors
|
||||
signal.signal(signal.SIGINT, handler)
|
||||
|
||||
# Check for newer version of Sherlock. If it exists, let the user know about it
|
||||
try:
|
||||
latest_release_raw = requests.get(forge_api_latest_release, timeout=10).text
|
||||
latest_release_json = json_loads(latest_release_raw)
|
||||
latest_remote_tag = latest_release_json["tag_name"]
|
||||
|
||||
if latest_remote_tag[1:] != __version__:
|
||||
print(
|
||||
f"Update available! {__version__} --> {latest_remote_tag[1:]}"
|
||||
f"\n{latest_release_json['html_url']}"
|
||||
)
|
||||
|
||||
except Exception as error:
|
||||
print(f"A problem occurred while checking for an update: {error}")
|
||||
|
||||
# Make prompts
|
||||
if args.proxy is not None:
|
||||
print("Using the proxy: " + args.proxy)
|
||||
|
||||
if args.no_color:
|
||||
# Disable color output.
|
||||
init(strip=True, convert=False)
|
||||
else:
|
||||
# Enable color output.
|
||||
init(autoreset=True)
|
||||
|
||||
# Check if both output methods are entered as input.
|
||||
if args.output is not None and args.folderoutput is not None:
|
||||
print("You can only use one of the output methods.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check validity for single username output.
|
||||
if args.output is not None and len(args.username) != 1:
|
||||
print("You can only use --output with a single username")
|
||||
sys.exit(1)
|
||||
|
||||
# Create object with all information about sites we are aware of.
|
||||
try:
|
||||
if args.local:
|
||||
sites = SitesInformation(
|
||||
os.path.join(os.path.dirname(__file__), "resources/data.json"),
|
||||
honor_exclusions=False,
|
||||
)
|
||||
else:
|
||||
json_file_location = args.json_file
|
||||
if args.json_file:
|
||||
# If --json parameter is a number, interpret it as a pull request number
|
||||
if args.json_file.isnumeric():
|
||||
pull_number = args.json_file
|
||||
pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}"
|
||||
pull_request_raw = requests.get(pull_url, timeout=10).text
|
||||
pull_request_json = json_loads(pull_request_raw)
|
||||
|
||||
# Check if it's a valid pull request
|
||||
if "message" in pull_request_json:
|
||||
print(f"ERROR: Pull request #{pull_number} not found.")
|
||||
sys.exit(1)
|
||||
|
||||
head_commit_sha = pull_request_json["head"]["sha"]
|
||||
json_file_location = f"https://raw.githubusercontent.com/sherlock-project/sherlock/{head_commit_sha}/sherlock_project/resources/data.json"
|
||||
|
||||
sites = SitesInformation(
|
||||
data_file_path=json_file_location,
|
||||
honor_exclusions=not args.ignore_exclusions,
|
||||
do_not_exclude=args.site_list,
|
||||
)
|
||||
except Exception as error:
|
||||
print(f"ERROR: {error}")
|
||||
sys.exit(1)
|
||||
|
||||
if not args.nsfw:
|
||||
sites.remove_nsfw_sites(do_not_remove=args.site_list)
|
||||
|
||||
# Create original dictionary from SitesInformation() object.
|
||||
# Eventually, the rest of the code will be updated to use the new object
|
||||
# directly, but this will glue the two pieces together.
|
||||
site_data_all = {site.name: site.information for site in sites}
|
||||
if args.site_list == []:
|
||||
# Not desired to look at a sub-set of sites
|
||||
site_data = site_data_all
|
||||
else:
|
||||
# User desires to selectively run queries on a sub-set of the site list.
|
||||
# Make sure that the sites are supported & build up pruned site database.
|
||||
site_data = {}
|
||||
site_missing = []
|
||||
for site in args.site_list:
|
||||
counter = 0
|
||||
for existing_site in site_data_all:
|
||||
if site.lower() == existing_site.lower():
|
||||
site_data[existing_site] = site_data_all[existing_site]
|
||||
counter += 1
|
||||
if counter == 0:
|
||||
# Build up list of sites not supported for future error message.
|
||||
site_missing.append(f"'{site}'")
|
||||
|
||||
if site_missing:
|
||||
print(f"Error: Desired sites not found: {', '.join(site_missing)}.")
|
||||
|
||||
if not site_data:
|
||||
sys.exit(1)
|
||||
|
||||
# Create notify object for query results.
|
||||
query_notify = QueryNotifyPrint(
|
||||
result=None, verbose=args.verbose, print_all=args.print_all, browse=args.browse
|
||||
)
|
||||
|
||||
# Run report on all specified users.
|
||||
all_usernames = []
|
||||
for username in args.username:
|
||||
if check_for_parameter(username):
|
||||
for name in multiple_usernames(username):
|
||||
all_usernames.append(name)
|
||||
else:
|
||||
all_usernames.append(username)
|
||||
for username in all_usernames:
|
||||
results = sherlock(
|
||||
username,
|
||||
site_data,
|
||||
query_notify,
|
||||
dump_response=args.dump_response,
|
||||
proxy=args.proxy,
|
||||
timeout=args.timeout,
|
||||
)
|
||||
|
||||
if args.output:
|
||||
result_file = args.output
|
||||
elif args.folderoutput:
|
||||
# The usernames results should be stored in a targeted folder.
|
||||
# If the folder doesn't exist, create it first
|
||||
os.makedirs(args.folderoutput, exist_ok=True)
|
||||
result_file = os.path.join(args.folderoutput, f"{username}.txt")
|
||||
else:
|
||||
result_file = f"{username}.txt"
|
||||
|
||||
if args.output_txt:
|
||||
with open(result_file, "w", encoding="utf-8") as file:
|
||||
exists_counter = 0
|
||||
for website_name in results:
|
||||
dictionary = results[website_name]
|
||||
if dictionary.get("status").status == QueryStatus.CLAIMED:
|
||||
exists_counter += 1
|
||||
file.write(dictionary["url_user"] + "\n")
|
||||
file.write(f"Total Websites Username Detected On : {exists_counter}\n")
|
||||
|
||||
if args.csv:
|
||||
result_file = f"{username}.csv"
|
||||
if args.folderoutput:
|
||||
# The usernames results should be stored in a targeted folder.
|
||||
# If the folder doesn't exist, create it first
|
||||
os.makedirs(args.folderoutput, exist_ok=True)
|
||||
result_file = os.path.join(args.folderoutput, result_file)
|
||||
|
||||
with open(result_file, "w", newline="", encoding="utf-8") as csv_report:
|
||||
writer = csv.writer(csv_report)
|
||||
writer.writerow(
|
||||
[
|
||||
"username",
|
||||
"name",
|
||||
"url_main",
|
||||
"url_user",
|
||||
"exists",
|
||||
"http_status",
|
||||
"response_time_s",
|
||||
]
|
||||
)
|
||||
for site in results:
|
||||
if (
|
||||
args.print_found
|
||||
and not args.print_all
|
||||
and results[site]["status"].status != QueryStatus.CLAIMED
|
||||
):
|
||||
continue
|
||||
|
||||
response_time_s = results[site]["status"].query_time
|
||||
if response_time_s is None:
|
||||
response_time_s = ""
|
||||
writer.writerow(
|
||||
[
|
||||
username,
|
||||
site,
|
||||
results[site]["url_main"],
|
||||
results[site]["url_user"],
|
||||
str(results[site]["status"].status),
|
||||
results[site]["http_status"],
|
||||
response_time_s,
|
||||
]
|
||||
)
|
||||
if args.xlsx:
|
||||
usernames = []
|
||||
names = []
|
||||
url_main = []
|
||||
url_user = []
|
||||
exists = []
|
||||
http_status = []
|
||||
response_time_s = []
|
||||
|
||||
for site in results:
|
||||
if (
|
||||
args.print_found
|
||||
and not args.print_all
|
||||
and results[site]["status"].status != QueryStatus.CLAIMED
|
||||
):
|
||||
continue
|
||||
|
||||
if response_time_s is None:
|
||||
response_time_s.append("")
|
||||
else:
|
||||
response_time_s.append(results[site]["status"].query_time)
|
||||
usernames.append(username)
|
||||
names.append(site)
|
||||
url_main.append(results[site]["url_main"])
|
||||
url_user.append(results[site]["url_user"])
|
||||
exists.append(str(results[site]["status"].status))
|
||||
http_status.append(results[site]["http_status"])
|
||||
|
||||
DataFrame = pd.DataFrame(
|
||||
{
|
||||
"username": usernames,
|
||||
"name": names,
|
||||
"url_main": [f'=HYPERLINK(\"{u}\")' for u in url_main],
|
||||
"url_user": [f'=HYPERLINK(\"{u}\")' for u in url_user],
|
||||
"exists": exists,
|
||||
"http_status": http_status,
|
||||
"response_time_s": response_time_s,
|
||||
}
|
||||
)
|
||||
DataFrame.to_excel(f"{username}.xlsx", sheet_name="sheet1", index=False)
|
||||
|
||||
print()
|
||||
query_notify.finish()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,260 +0,0 @@
|
||||
"""Sherlock Sites Information Module
|
||||
|
||||
This module supports storing information about websites.
|
||||
This is the raw data that will be used to search for usernames.
|
||||
"""
|
||||
import json
|
||||
import requests
|
||||
import secrets
|
||||
|
||||
|
||||
MANIFEST_URL = "https://data.sherlockproject.xyz"
|
||||
EXCLUSIONS_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/refs/heads/exclusions/false_positive_exclusions.txt"
|
||||
|
||||
class SiteInformation:
|
||||
def __init__(self, name, url_home, url_username_format, username_claimed,
|
||||
information, is_nsfw, username_unclaimed=secrets.token_urlsafe(10)):
|
||||
"""Create Site Information Object.
|
||||
|
||||
Contains information about a specific website.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
name -- String which identifies site.
|
||||
url_home -- String containing URL for home of site.
|
||||
url_username_format -- String containing URL for Username format
|
||||
on site.
|
||||
NOTE: The string should contain the
|
||||
token "{}" where the username should
|
||||
be substituted. For example, a string
|
||||
of "https://somesite.com/users/{}"
|
||||
indicates that the individual
|
||||
usernames would show up under the
|
||||
"https://somesite.com/users/" area of
|
||||
the website.
|
||||
username_claimed -- String containing username which is known
|
||||
to be claimed on website.
|
||||
username_unclaimed -- String containing username which is known
|
||||
to be unclaimed on website.
|
||||
information -- Dictionary containing all known information
|
||||
about website.
|
||||
NOTE: Custom information about how to
|
||||
actually detect the existence of the
|
||||
username will be included in this
|
||||
dictionary. This information will
|
||||
be needed by the detection method,
|
||||
but it is only recorded in this
|
||||
object for future use.
|
||||
is_nsfw -- Boolean indicating if site is Not Safe For Work.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
self.name = name
|
||||
self.url_home = url_home
|
||||
self.url_username_format = url_username_format
|
||||
|
||||
self.username_claimed = username_claimed
|
||||
self.username_unclaimed = secrets.token_urlsafe(32)
|
||||
self.information = information
|
||||
self.is_nsfw = is_nsfw
|
||||
|
||||
return
|
||||
|
||||
def __str__(self):
|
||||
"""Convert Object To String.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nicely formatted string to get information about this object.
|
||||
"""
|
||||
|
||||
return f"{self.name} ({self.url_home})"
|
||||
|
||||
|
||||
class SitesInformation:
|
||||
def __init__(
|
||||
self,
|
||||
data_file_path: str|None = None,
|
||||
honor_exclusions: bool = True,
|
||||
do_not_exclude: list[str] = [],
|
||||
):
|
||||
"""Create Sites Information Object.
|
||||
|
||||
Contains information about all supported websites.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
data_file_path -- String which indicates path to data file.
|
||||
The file name must end in ".json".
|
||||
|
||||
There are 3 possible formats:
|
||||
* Absolute File Format
|
||||
For example, "c:/stuff/data.json".
|
||||
* Relative File Format
|
||||
The current working directory is used
|
||||
as the context.
|
||||
For example, "data.json".
|
||||
* URL Format
|
||||
For example,
|
||||
"https://example.com/data.json", or
|
||||
"http://example.com/data.json".
|
||||
|
||||
An exception will be thrown if the path
|
||||
to the data file is not in the expected
|
||||
format, or if there was any problem loading
|
||||
the file.
|
||||
|
||||
If this option is not specified, then a
|
||||
default site list will be used.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
if not data_file_path:
|
||||
# The default data file is the live data.json which is in the GitHub repo. The reason why we are using
|
||||
# this instead of the local one is so that the user has the most up-to-date data. This prevents
|
||||
# users from creating issue about false positives which has already been fixed or having outdated data
|
||||
data_file_path = MANIFEST_URL
|
||||
|
||||
if data_file_path.lower().startswith("http"):
|
||||
# Reference is to a URL.
|
||||
try:
|
||||
response = requests.get(url=data_file_path, timeout=30)
|
||||
except Exception as error:
|
||||
raise FileNotFoundError(
|
||||
f"Problem while attempting to access data file URL '{data_file_path}': {error}"
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise FileNotFoundError(f"Bad response while accessing "
|
||||
f"data file URL '{data_file_path}'."
|
||||
)
|
||||
try:
|
||||
site_data = response.json()
|
||||
except Exception as error:
|
||||
raise ValueError(
|
||||
f"Problem parsing json contents at '{data_file_path}': {error}."
|
||||
)
|
||||
|
||||
else:
|
||||
# Reference is to a file.
|
||||
try:
|
||||
with open(data_file_path, "r", encoding="utf-8") as file:
|
||||
try:
|
||||
site_data = json.load(file)
|
||||
except Exception as error:
|
||||
raise ValueError(
|
||||
f"Problem parsing json contents at '{data_file_path}': {error}."
|
||||
)
|
||||
|
||||
except FileNotFoundError:
|
||||
raise FileNotFoundError(f"Problem while attempting to access "
|
||||
f"data file '{data_file_path}'."
|
||||
)
|
||||
|
||||
site_data.pop('$schema', None)
|
||||
|
||||
if honor_exclusions:
|
||||
try:
|
||||
response = requests.get(url=EXCLUSIONS_URL, timeout=10)
|
||||
if response.status_code == 200:
|
||||
exclusions = response.text.splitlines()
|
||||
exclusions = [exclusion.strip() for exclusion in exclusions]
|
||||
|
||||
for site in do_not_exclude:
|
||||
if site in exclusions:
|
||||
exclusions.remove(site)
|
||||
|
||||
for exclusion in exclusions:
|
||||
try:
|
||||
site_data.pop(exclusion, None)
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
except Exception:
|
||||
# If there was any problem loading the exclusions, just continue without them
|
||||
print("Warning: Could not load exclusions, continuing without them.")
|
||||
honor_exclusions = False
|
||||
|
||||
self.sites = {}
|
||||
|
||||
# Add all site information from the json file to internal site list.
|
||||
for site_name in site_data:
|
||||
try:
|
||||
|
||||
self.sites[site_name] = \
|
||||
SiteInformation(site_name,
|
||||
site_data[site_name]["urlMain"],
|
||||
site_data[site_name]["url"],
|
||||
site_data[site_name]["username_claimed"],
|
||||
site_data[site_name],
|
||||
site_data[site_name].get("isNSFW",False)
|
||||
|
||||
)
|
||||
except KeyError as error:
|
||||
raise ValueError(
|
||||
f"Problem parsing json contents at '{data_file_path}': Missing attribute {error}."
|
||||
)
|
||||
except TypeError:
|
||||
print(f"Encountered TypeError parsing json contents for target '{site_name}' at {data_file_path}\nSkipping target.\n")
|
||||
|
||||
return
|
||||
|
||||
def remove_nsfw_sites(self, do_not_remove: list = []):
|
||||
"""
|
||||
Remove NSFW sites from the sites, if isNSFW flag is true for site
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
None
|
||||
"""
|
||||
sites = {}
|
||||
do_not_remove = [site.casefold() for site in do_not_remove]
|
||||
for site in self.sites:
|
||||
if self.sites[site].is_nsfw and site.casefold() not in do_not_remove:
|
||||
continue
|
||||
sites[site] = self.sites[site]
|
||||
self.sites = sites
|
||||
|
||||
def site_name_list(self):
|
||||
"""Get Site Name List.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
List of strings containing names of sites.
|
||||
"""
|
||||
|
||||
return sorted([site.name for site in self], key=str.lower)
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterator For Object.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Iterator for sites object.
|
||||
"""
|
||||
|
||||
for site_name in self.sites:
|
||||
yield self.sites[site_name]
|
||||
|
||||
def __len__(self):
|
||||
"""Length For Object.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Length of sites object.
|
||||
"""
|
||||
return len(self.sites)
|
||||
@@ -1,51 +0,0 @@
|
||||
import os
|
||||
import json
|
||||
import urllib
|
||||
import pytest
|
||||
from sherlock_project.sites import SitesInformation
|
||||
|
||||
def fetch_local_manifest(honor_exclusions: bool = True) -> dict[str, dict[str, str]]:
|
||||
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"), honor_exclusions=honor_exclusions)
|
||||
sites_iterable: dict[str, dict[str, str]] = {site.name: site.information for site in sites_obj}
|
||||
return sites_iterable
|
||||
|
||||
@pytest.fixture()
|
||||
def sites_obj():
|
||||
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
|
||||
yield sites_obj
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sites_info():
|
||||
yield fetch_local_manifest()
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def remote_schema():
|
||||
schema_url: str = 'https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.schema.json'
|
||||
with urllib.request.urlopen(schema_url) as remoteschema:
|
||||
schemadat = json.load(remoteschema)
|
||||
yield schemadat
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--chunked-sites",
|
||||
action="store",
|
||||
default=None,
|
||||
help="For tests utilizing chunked sites, include only the (comma-separated) site(s) specified.",
|
||||
)
|
||||
|
||||
def pytest_generate_tests(metafunc):
|
||||
if "chunked_sites" in metafunc.fixturenames:
|
||||
sites_info = fetch_local_manifest(honor_exclusions=False)
|
||||
|
||||
# Ingest and apply site selections
|
||||
site_filter: str | None = metafunc.config.getoption("--chunked-sites")
|
||||
if site_filter:
|
||||
selected_sites: list[str] = [site.strip() for site in site_filter.split(",")]
|
||||
sites_info = {
|
||||
site: data for site, data in sites_info.items()
|
||||
if site in selected_sites
|
||||
}
|
||||
|
||||
params = [{name: data} for name, data in sites_info.items()]
|
||||
ids = list(sites_info.keys())
|
||||
metafunc.parametrize("chunked_sites", params, ids=ids)
|
||||
@@ -1,7 +0,0 @@
|
||||
import sherlock_project
|
||||
|
||||
#from sherlock.sites import SitesInformation
|
||||
#local_manifest = data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json")
|
||||
|
||||
def test_username_via_message():
|
||||
sherlock_project.__main__("--version")
|
||||
@@ -1,38 +0,0 @@
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
class Interactives:
|
||||
def run_cli(args:str = "") -> str:
|
||||
"""Pass arguments to Sherlock as a normal user on the command line"""
|
||||
# Adapt for platform differences (Windows likes to be special)
|
||||
if platform.system() == "Windows":
|
||||
command:str = f"py -m sherlock_project {args}"
|
||||
else:
|
||||
command:str = f"sherlock {args}"
|
||||
|
||||
proc_out:str = ""
|
||||
try:
|
||||
proc_out = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT)
|
||||
return proc_out.decode()
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise InteractivesSubprocessError(e.output.decode())
|
||||
|
||||
|
||||
def walk_sherlock_for_files_with(pattern: str) -> list[str]:
|
||||
"""Check all files within the Sherlock package for matching patterns"""
|
||||
pattern:re.Pattern = re.compile(pattern)
|
||||
matching_files:list[str] = []
|
||||
for root, dirs, files in os.walk("sherlock_project"):
|
||||
for file in files:
|
||||
file_path = os.path.join(root,file)
|
||||
if "__pycache__" in file_path:
|
||||
continue
|
||||
with open(file_path, 'r', errors='ignore') as f:
|
||||
if pattern.search(f.read()):
|
||||
matching_files.append(file_path)
|
||||
return matching_files
|
||||
|
||||
class InteractivesSubprocessError(Exception):
|
||||
pass
|
||||
@@ -1,39 +0,0 @@
|
||||
import os
|
||||
import json
|
||||
import pytest
|
||||
from jsonschema import validate
|
||||
|
||||
def test_validate_manifest_against_local_schema():
|
||||
"""Ensures that the manifest matches the local schema, for situations where the schema is being changed."""
|
||||
json_relative: str = '../sherlock_project/resources/data.json'
|
||||
schema_relative: str = '../sherlock_project/resources/data.schema.json'
|
||||
|
||||
json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
|
||||
schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative)
|
||||
|
||||
with open(json_path, 'r') as f:
|
||||
jsondat = json.load(f)
|
||||
with open(schema_path, 'r') as f:
|
||||
schemadat = json.load(f)
|
||||
|
||||
validate(instance=jsondat, schema=schemadat)
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
def test_validate_manifest_against_remote_schema(remote_schema):
|
||||
"""Ensures that the manifest matches the remote schema, so as to not unexpectedly break clients."""
|
||||
json_relative: str = '../sherlock_project/resources/data.json'
|
||||
json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
|
||||
|
||||
with open(json_path, 'r') as f:
|
||||
jsondat = json.load(f)
|
||||
|
||||
validate(instance=jsondat, schema=remote_schema)
|
||||
|
||||
# Ensure that the expected values are beind returned by the site list
|
||||
@pytest.mark.parametrize("target_name,target_expected_err_type", [
|
||||
('GitHub', 'status_code'),
|
||||
('GitLab', 'message'),
|
||||
])
|
||||
def test_site_list_iterability (sites_info, target_name, target_expected_err_type):
|
||||
assert sites_info[target_name]['errorType'] == target_expected_err_type
|
||||
@@ -1,105 +0,0 @@
|
||||
import pytest
|
||||
import random
|
||||
import string
|
||||
import re
|
||||
from sherlock_project.sherlock import sherlock
|
||||
from sherlock_project.notify import QueryNotify
|
||||
from sherlock_project.result import QueryStatus
|
||||
#from sherlock_interactives import Interactives
|
||||
|
||||
|
||||
def simple_query(sites_info: dict, site: str, username: str) -> QueryStatus:
|
||||
query_notify = QueryNotify()
|
||||
site_data: dict = {}
|
||||
site_data[site] = sites_info[site]
|
||||
return sherlock(
|
||||
username=username,
|
||||
site_data=site_data,
|
||||
query_notify=query_notify,
|
||||
)[site]['status'].status
|
||||
|
||||
|
||||
@pytest.mark.online
|
||||
class TestLiveTargets:
|
||||
"""Actively test probes against live and trusted targets"""
|
||||
# Known positives should only use sites trusted to be reliable and unchanging
|
||||
@pytest.mark.parametrize('site,username',[
|
||||
('GitLab', 'ppfeister'),
|
||||
('AllMyLinks', 'blue'),
|
||||
])
|
||||
def test_known_positives_via_message(self, sites_info, site, username):
|
||||
assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
|
||||
|
||||
|
||||
# Known positives should only use sites trusted to be reliable and unchanging
|
||||
@pytest.mark.parametrize('site,username',[
|
||||
('GitHub', 'ppfeister'),
|
||||
('GitHub', 'sherlock-project'),
|
||||
('Docker Hub', 'ppfeister'),
|
||||
('Docker Hub', 'sherlock'),
|
||||
])
|
||||
def test_known_positives_via_status_code(self, sites_info, site, username):
|
||||
assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
|
||||
|
||||
|
||||
# Known positives should only use sites trusted to be reliable and unchanging
|
||||
@pytest.mark.parametrize('site,username',[
|
||||
('Keybase', 'blue'),
|
||||
('devRant', 'blue'),
|
||||
])
|
||||
def test_known_positives_via_response_url(self, sites_info, site, username):
|
||||
assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
|
||||
|
||||
|
||||
# Randomly generate usernames of high length and test for positive availability
|
||||
# Randomly generated usernames should be simple alnum for simplicity and high
|
||||
# compatibility. Several attempts may be made ~just in case~ a real username is
|
||||
# generated.
|
||||
@pytest.mark.parametrize('site,random_len',[
|
||||
('GitLab', 255),
|
||||
('Codecademy', 30)
|
||||
])
|
||||
def test_likely_negatives_via_message(self, sites_info, site, random_len):
|
||||
num_attempts: int = 3
|
||||
attempted_usernames: list[str] = []
|
||||
status: QueryStatus = QueryStatus.CLAIMED
|
||||
for i in range(num_attempts):
|
||||
acceptable_types = string.ascii_letters + string.digits
|
||||
random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len))
|
||||
attempted_usernames.append(random_handle)
|
||||
status = simple_query(sites_info=sites_info, site=site, username=random_handle)
|
||||
if status is QueryStatus.AVAILABLE:
|
||||
break
|
||||
assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}."
|
||||
|
||||
|
||||
# Randomly generate usernames of high length and test for positive availability
|
||||
# Randomly generated usernames should be simple alnum for simplicity and high
|
||||
# compatibility. Several attempts may be made ~just in case~ a real username is
|
||||
# generated.
|
||||
@pytest.mark.parametrize('site,random_len',[
|
||||
('GitHub', 39),
|
||||
('Docker Hub', 30)
|
||||
])
|
||||
def test_likely_negatives_via_status_code(self, sites_info, site, random_len):
|
||||
num_attempts: int = 3
|
||||
attempted_usernames: list[str] = []
|
||||
status: QueryStatus = QueryStatus.CLAIMED
|
||||
for i in range(num_attempts):
|
||||
acceptable_types = string.ascii_letters + string.digits
|
||||
random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len))
|
||||
attempted_usernames.append(random_handle)
|
||||
status = simple_query(sites_info=sites_info, site=site, username=random_handle)
|
||||
if status is QueryStatus.AVAILABLE:
|
||||
break
|
||||
assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}."
|
||||
|
||||
|
||||
def test_username_illegal_regex(sites_info):
|
||||
site: str = 'BitBucket'
|
||||
invalid_handle: str = '*#$Y&*JRE'
|
||||
pattern = re.compile(sites_info[site]['regexCheck'])
|
||||
# Ensure that the username actually fails regex before testing sherlock
|
||||
assert pattern.match(invalid_handle) is None
|
||||
assert simple_query(sites_info=sites_info, site=site, username=invalid_handle) is QueryStatus.ILLEGAL
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
"""Tests for handling usernames with special/unicode characters."""
|
||||
|
||||
from concurrent.futures import Future
|
||||
|
||||
from sherlock_project.sherlock import get_response
|
||||
|
||||
|
||||
def _make_future_with_exception(exc):
|
||||
"""Create a Future that raises the given exception."""
|
||||
future = Future()
|
||||
future.set_exception(exc)
|
||||
return future
|
||||
|
||||
|
||||
def test_get_response_handles_unicode_decode_error():
|
||||
"""Regression test for issue #2730.
|
||||
|
||||
Usernames with special characters (e.g. 'Émile') can trigger a
|
||||
UnicodeDecodeError inside the requests library during redirect
|
||||
handling. This must not crash the program.
|
||||
"""
|
||||
future = _make_future_with_exception(
|
||||
UnicodeDecodeError("utf-8", b"\xe9", 0, 1, "invalid continuation byte")
|
||||
)
|
||||
response, error_context, exception_text = get_response(
|
||||
request_future=future,
|
||||
error_type=["status_code"],
|
||||
social_network="TestSite",
|
||||
)
|
||||
assert response is None
|
||||
assert error_context == "Encoding Error"
|
||||
assert "utf-8" in exception_text
|
||||
|
||||
|
||||
def test_get_response_handles_unicode_encode_error():
|
||||
"""UnicodeEncodeError should also be caught (subclass of UnicodeError)."""
|
||||
future = _make_future_with_exception(
|
||||
UnicodeEncodeError("ascii", "É", 0, 1, "ordinal not in range(128)")
|
||||
)
|
||||
response, error_context, exception_text = get_response(
|
||||
request_future=future,
|
||||
error_type=["status_code"],
|
||||
social_network="TestSite",
|
||||
)
|
||||
assert response is None
|
||||
assert error_context == "Encoding Error"
|
||||
assert "ascii" in exception_text
|
||||
@@ -1,43 +0,0 @@
|
||||
import pytest
|
||||
from sherlock_project import sherlock
|
||||
from sherlock_interactives import Interactives
|
||||
from sherlock_interactives import InteractivesSubprocessError
|
||||
|
||||
def test_remove_nsfw(sites_obj):
|
||||
nsfw_target: str = 'Xvideos'
|
||||
assert nsfw_target in {site.name: site.information for site in sites_obj}
|
||||
sites_obj.remove_nsfw_sites()
|
||||
assert nsfw_target not in {site.name: site.information for site in sites_obj}
|
||||
|
||||
|
||||
# Parametrized sites should *not* include Motherless, which is acting as the control
|
||||
@pytest.mark.parametrize('nsfwsites', [
|
||||
['Xvideos'],
|
||||
['Xvideos', 'Erome'],
|
||||
])
|
||||
def test_nsfw_explicit_selection(sites_obj, nsfwsites):
|
||||
for site in nsfwsites:
|
||||
assert site in {site.name: site.information for site in sites_obj}
|
||||
sites_obj.remove_nsfw_sites(do_not_remove=nsfwsites)
|
||||
for site in nsfwsites:
|
||||
assert site in {site.name: site.information for site in sites_obj}
|
||||
assert 'Motherless' not in {site.name: site.information for site in sites_obj}
|
||||
|
||||
def test_wildcard_username_expansion():
|
||||
assert sherlock.check_for_parameter('test{?}test') is True
|
||||
assert sherlock.check_for_parameter('test{.}test') is False
|
||||
assert sherlock.check_for_parameter('test{}test') is False
|
||||
assert sherlock.check_for_parameter('testtest') is False
|
||||
assert sherlock.check_for_parameter('test{?test') is False
|
||||
assert sherlock.check_for_parameter('test?}test') is False
|
||||
assert sherlock.multiple_usernames('test{?}test') == ["test_test" , "test-test" , "test.test"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize('cliargs', [
|
||||
'',
|
||||
'--site urghrtuight --egiotr',
|
||||
'--',
|
||||
])
|
||||
def test_no_usernames_provided(cliargs):
|
||||
with pytest.raises(InteractivesSubprocessError, match=r"error: the following arguments are required: USERNAMES"):
|
||||
Interactives.run_cli(cliargs)
|
||||
@@ -1,100 +0,0 @@
|
||||
import pytest
|
||||
import re
|
||||
import rstr
|
||||
|
||||
from sherlock_project.sherlock import sherlock
|
||||
from sherlock_project.notify import QueryNotify
|
||||
from sherlock_project.result import QueryResult, QueryStatus
|
||||
|
||||
|
||||
FALSE_POSITIVE_ATTEMPTS: int = 2 # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit
|
||||
FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND: int = 15 # If a pattern uses quantifiers such as `+` `*` or `{n,}`, limit the upper bound (0 to disable)
|
||||
FALSE_POSITIVE_DEFAULT_PATTERN: str = r'^[a-zA-Z0-9]{7,20}$' # Used in absence of a regexCheck entry
|
||||
|
||||
|
||||
def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND) -> str:
|
||||
"""Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`."""
|
||||
def replace_upper_bound(match: re.Match) -> str: # type: ignore
|
||||
lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore
|
||||
nonlocal upper_bound
|
||||
upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore # noqa: F823
|
||||
return f'{{{lower_bound},{upper_bound}}}'
|
||||
|
||||
pattern = re.sub(r'(?<!\\)\{(\d+),\}', replace_upper_bound, pattern) # {n,} # type: ignore
|
||||
pattern = re.sub(r'(?<!\\)\+', f'{{1,{upper_bound}}}', pattern) # +
|
||||
pattern = re.sub(r'(?<!\\)\*', f'{{0,{upper_bound}}}', pattern) # *
|
||||
|
||||
return pattern
|
||||
|
||||
def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, pattern: str) -> QueryStatus:
|
||||
"""Check if a site is likely to produce false positives."""
|
||||
status: QueryStatus = QueryStatus.UNKNOWN
|
||||
|
||||
for _ in range(FALSE_POSITIVE_ATTEMPTS):
|
||||
query_notify: QueryNotify = QueryNotify()
|
||||
username: str = rstr.xeger(pattern)
|
||||
|
||||
result: QueryResult | str = sherlock(
|
||||
username=username,
|
||||
site_data=sites_info,
|
||||
query_notify=query_notify,
|
||||
)[site]['status']
|
||||
|
||||
if not hasattr(result, 'status'):
|
||||
raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
|
||||
if type(result.status) is not QueryStatus: # type: ignore
|
||||
raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
|
||||
status = result.status # type: ignore
|
||||
|
||||
if status in (QueryStatus.AVAILABLE, QueryStatus.WAF):
|
||||
return status
|
||||
|
||||
return status
|
||||
|
||||
|
||||
def false_negative_check(sites_info: dict[str, dict[str, str]], site: str) -> QueryStatus:
|
||||
"""Check if a site is likely to produce false negatives."""
|
||||
status: QueryStatus = QueryStatus.UNKNOWN
|
||||
query_notify: QueryNotify = QueryNotify()
|
||||
|
||||
result: QueryResult | str = sherlock(
|
||||
username=sites_info[site]['username_claimed'],
|
||||
site_data=sites_info,
|
||||
query_notify=query_notify,
|
||||
)[site]['status']
|
||||
|
||||
if not hasattr(result, 'status'):
|
||||
raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
|
||||
if type(result.status) is not QueryStatus: # type: ignore
|
||||
raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
|
||||
status = result.status # type: ignore
|
||||
|
||||
return status
|
||||
|
||||
@pytest.mark.validate_targets
|
||||
@pytest.mark.online
|
||||
class Test_All_Targets:
|
||||
|
||||
@pytest.mark.validate_targets_fp
|
||||
def test_false_pos(self, chunked_sites: dict[str, dict[str, str]]):
|
||||
"""Iterate through all sites in the manifest to discover possible false-positive inducting targets."""
|
||||
pattern: str
|
||||
for site in chunked_sites:
|
||||
try:
|
||||
pattern = chunked_sites[site]['regexCheck']
|
||||
except KeyError:
|
||||
pattern = FALSE_POSITIVE_DEFAULT_PATTERN
|
||||
|
||||
if FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND > 0:
|
||||
pattern = set_pattern_upper_bound(pattern)
|
||||
|
||||
result: QueryStatus = false_positive_check(chunked_sites, site, pattern)
|
||||
assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}"
|
||||
|
||||
@pytest.mark.validate_targets_fn
|
||||
def test_false_neg(self, chunked_sites: dict[str, dict[str, str]]):
|
||||
"""Iterate through all sites in the manifest to discover possible false-negative inducting targets."""
|
||||
for site in chunked_sites:
|
||||
result: QueryStatus = false_negative_check(chunked_sites, site)
|
||||
assert result is QueryStatus.CLAIMED, f"{site} produced false negative, result was {result}"
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
import os
|
||||
from sherlock_interactives import Interactives
|
||||
import sherlock_project
|
||||
|
||||
def test_versioning() -> None:
|
||||
# Ensure __version__ matches version presented to the user
|
||||
assert sherlock_project.__version__ in Interactives.run_cli("--version")
|
||||
# Ensure __init__ is single source of truth for __version__ in package
|
||||
# Temporarily allows sherlock.py so as to not trigger early upgrades
|
||||
found:list = Interactives.walk_sherlock_for_files_with(r'__version__ *= *')
|
||||
expected:list = [
|
||||
# Normalization is REQUIRED for Windows ( / vs \ )
|
||||
os.path.normpath("sherlock_project/__init__.py"),
|
||||
]
|
||||
# Sorting is REQUIRED for Mac
|
||||
assert sorted(found) == sorted(expected)
|
||||
@@ -1,42 +0,0 @@
|
||||
[tox]
|
||||
requires =
|
||||
tox >= 3
|
||||
envlist =
|
||||
lint
|
||||
py313
|
||||
py312
|
||||
py311
|
||||
py310
|
||||
|
||||
[testenv]
|
||||
description = Attempt to build and install the package
|
||||
deps =
|
||||
coverage
|
||||
jsonschema
|
||||
pytest
|
||||
rstr
|
||||
allowlist_externals = coverage
|
||||
commands =
|
||||
coverage run --source=sherlock_project --module pytest -v
|
||||
coverage report --show-missing
|
||||
|
||||
[testenv:offline]
|
||||
deps =
|
||||
jsonschema
|
||||
pytest
|
||||
commands =
|
||||
pytest -v -m "not online"
|
||||
|
||||
[testenv:lint]
|
||||
description = Lint with Ruff
|
||||
deps =
|
||||
ruff
|
||||
commands =
|
||||
ruff check
|
||||
|
||||
[gh-actions]
|
||||
python =
|
||||
3.13: py313
|
||||
3.12: py312
|
||||
3.11: py311
|
||||
3.10: py310
|
||||
Reference in New Issue
Block a user