Compare commits
496 Commits
remove-tor
...
clean-up
| Author | SHA1 | Date | |
|---|---|---|---|
| b5e891550c | |||
| 190c2af514 | |||
| 8175af39ae | |||
| 574aeb4ac5 | |||
| 382bc3210a | |||
| 17c443af19 | |||
| 9d6c47fdb4 | |||
| 10bed20e70 | |||
| fd3833b744 | |||
| 8f8ebf3c15 | |||
| 4253014085 | |||
| 725c68907a | |||
| c66d10bfed | |||
| e0002779b4 | |||
| 8f1308b90d | |||
| e856b05c2c | |||
| fe9e750dab | |||
| 842ae1f754 | |||
| 339634f7bc | |||
| c1632693bb | |||
| e19cb32009 | |||
| b69c8ef940 | |||
| 2724711060 | |||
| 0a68ab7f4c | |||
| 8675178be1 | |||
| 9bafb8a280 | |||
| 8e5549862a | |||
| 8797fcd517 | |||
| 0995d4d669 | |||
| 6c0c273a0b | |||
| 3eeba790fd | |||
| 61a29ec373 | |||
| 9fbbbf7c73 | |||
| 331b68d909 | |||
| 8c3e093561 | |||
| e35e5e3af1 | |||
| 906287b305 | |||
| 0dbb6abcc5 | |||
| 03e097cc82 | |||
| 91c1964918 | |||
| 373f3d389a | |||
| 828c47109d | |||
| 94245b25df | |||
| 734542f0af | |||
| 1f8166ba9f | |||
| 6f1ddaa615 | |||
| 7ee2891517 | |||
| b893e4aa20 | |||
| eff869906a | |||
| 2a0107e189 | |||
| 5d8c4de212 | |||
| 1f9d7e8373 | |||
| 184470f871 | |||
| 342dbc85cc | |||
| 457e16e84f | |||
| 43b3736b75 | |||
| 64a49ffe17 | |||
| 0afd2006c6 | |||
| 3c270173a7 | |||
| 8d73f9ef4c | |||
| 472c086805 | |||
| 400c277f24 | |||
| e759564550 | |||
| deebe7137c | |||
| cb14ccbaaf | |||
| eb892795e9 | |||
| 09de90066b | |||
| cd1f27c12b | |||
| b837de8358 | |||
| 7a70f35883 | |||
| 4b17dae385 | |||
| efefe3f54a | |||
| 4b70a1fc25 | |||
| a7893f399e | |||
| 1cb6c12851 | |||
| c4f7485ecf | |||
| 228f50413e | |||
| d1867b1b51 | |||
| 6d2497582e | |||
| 885c43b8af | |||
| 8ad47b0b23 | |||
| e93af99424 | |||
| 5862ab4f92 | |||
| 4110cac45c | |||
| d66b18e8ae | |||
| b532fc6a38 | |||
| 99cf073835 | |||
| ec7e1b8b81 | |||
| a4aab38901 | |||
| 5202900618 | |||
| 26444a98ad | |||
| bced3242f3 | |||
| 08aabdad76 | |||
| 170ee0b928 | |||
| 2c9a54438a | |||
| 84f4886809 | |||
| e26fd6b643 | |||
| ce5de20f80 | |||
| 3ff2d135b5 | |||
| 1e65b4a209 | |||
| db3545b7b0 | |||
| 1898a0c4a9 | |||
| 0d32357b10 | |||
| 1be2abb056 | |||
| fb392534ef | |||
| bd49aac9d1 | |||
| 94838863fd | |||
| 79973a58ea | |||
| b9a72b55ca | |||
| ef55f7ddd3 | |||
| 28b78e7ddd | |||
| d2072e2cac | |||
| 3edb73cb23 | |||
| 6d1280ee9d | |||
| 0c457e590a | |||
| dc307fc0fd | |||
| d6256e9fc6 | |||
| 1645828527 | |||
| e774b08dc5 | |||
| 99067b2e59 | |||
| f039b50c4e | |||
| 7d5bd97142 | |||
| 70b5055631 | |||
| 1be25e70df | |||
| 9000575f7c | |||
| 220ebf935c | |||
| 959c4a2b26 | |||
| 443d43df21 | |||
| 80080cd57c | |||
| 80922a93fa | |||
| 45494fc74b | |||
| d92e2339a1 | |||
| 659bf92d99 | |||
| 3e4d9bcd85 | |||
| d3076cdfe0 | |||
| 51436cefe8 | |||
| 08a8177286 | |||
| e6d5fd64e0 | |||
| ac9f3a7fd5 | |||
| 289ab28b98 | |||
| 46ad6c9a5e | |||
| d20dcbe8db | |||
| 70c3c84196 | |||
| 53840c6a98 | |||
| 068fff8711 | |||
| 5735d01804 | |||
| f60de0d8f8 | |||
| cb3ab91492 | |||
| 4eea79ed6a | |||
| 03c051a525 | |||
| eccdf80b95 | |||
| eb51bf9b1a | |||
| 5d7b438fd6 | |||
| ef0b97fb57 | |||
| c6c3522159 | |||
| 2908c8eaa8 | |||
| f05b8e0ed6 | |||
| 01bca6b39f | |||
| d2835e56a4 | |||
| 0cf110e69e | |||
| a88adb0488 | |||
| 4010a58dde | |||
| b9e28b9b23 | |||
| d0e005da23 | |||
| 7a4f19e6b3 | |||
| f958e7b96f | |||
| 4c99bf3b75 | |||
| e3066a1d7a | |||
| f0510a169a | |||
| 738df6c362 | |||
| 83a38db110 | |||
| 9e3448d992 | |||
| 70e3c0ddd8 | |||
| 017c08a45d | |||
| f32f4ffaee | |||
| 7379ba7b19 | |||
| 3aeb6d6356 | |||
| 4246a7b16f | |||
| e44fe49c8f | |||
| 52cd5fdfc1 | |||
| 947f1ad2b6 | |||
| 4d00884d8c | |||
| cfcc82aaca | |||
| 0794e02b52 | |||
| 975965abed | |||
| a678bed154 | |||
| 4ec6f1eec0 | |||
| d1527376e7 | |||
| b99719ce60 | |||
| dc869852bc | |||
| 3079e7a218 | |||
| 5cd769c2f4 | |||
| 977ad5c1a4 | |||
| 57a0ccef38 | |||
| 94c013886a | |||
| c5e209d78e | |||
| 3e653c46b0 | |||
| 91f3b16993 | |||
| 0f3df0f4da | |||
| 0e7219b191 | |||
| 1d2c4b134f | |||
| b245c462c9 | |||
| 876e58b159 | |||
| 66d9733da7 | |||
| c55deab3a2 | |||
| edcb697793 | |||
| d314d75db1 | |||
| c89a52caf7 | |||
| 9c18cfe273 | |||
| 779d4c33f4 | |||
| 072c24687b | |||
| b811b2bd47 | |||
| 355bfbd328 | |||
| 7b3632bdad | |||
| 4fe41f09ff | |||
| cd7c52e4fa | |||
| 86140af50e | |||
| e5cd5e5bfe | |||
| dc89f1cd27 | |||
| 388a1e06d4 | |||
| 61eeeb7876 | |||
| df7da4288c | |||
| 70896f1da4 | |||
| 0a38cad926 | |||
| 1e38fb6f7b | |||
| 9b3dc3e581 | |||
| 37b30602fd | |||
| 7afdee4c58 | |||
| d4d8e01e31 | |||
| e5e0da00fe | |||
| dc61cdc7a4 | |||
| 0fa2e1afc7 | |||
| 7ca90ba728 | |||
| cd6fa5bb30 | |||
| fa05641661 | |||
| 97ba4e8616 | |||
| 9882478fb5 | |||
| 9f5b7e1846 | |||
| 05afac7082 | |||
| ae362b0f02 | |||
| 435540606e | |||
| 96aa12c140 | |||
| 9560355a7c | |||
| b44ac231c1 | |||
| 7ff3924f0b | |||
| 39c3729524 | |||
| faddcbd15f | |||
| 78a2d309d1 | |||
| 35940e7584 | |||
| 524415b5d5 | |||
| 8882310450 | |||
| 6d15f1319e | |||
| 69d3308c71 | |||
| 5c57b20936 | |||
| e09319f29f | |||
| b15242881e | |||
| e02507e5a1 | |||
| 284662e156 | |||
| 1b9f823cef | |||
| f0f37d841c | |||
| 58b20db9f1 | |||
| a98a113a4b | |||
| 164d01d163 | |||
| ddd94474b8 | |||
| 541b023b7f | |||
| 9b502d9245 | |||
| b9c352fb7c | |||
| 48ef668e1e | |||
| 481c39ace3 | |||
| 6b9305250d | |||
| 87bd15f927 | |||
| db23ae933f | |||
| ad76b3685f | |||
| 34cb23bc6e | |||
| 702bfee988 | |||
| dfe8b1599d | |||
| ca094d8264 | |||
| 5113dcfb36 | |||
| d3f4c65459 | |||
| 2504f238e5 | |||
| 9646055560 | |||
| 80d4abae34 | |||
| 19ae05d68a | |||
| 5c62b2ab1b | |||
| 6cc4d9e0c7 | |||
| 1ddfc08d7d | |||
| cca68bb9ab | |||
| d6db0f7d79 | |||
| d60562130c | |||
| aa1945b017 | |||
| dafcaec192 | |||
| 3c9eda75e9 | |||
| 8635d68864 | |||
| 6e7b3cecb8 | |||
| 1e12c3f7a6 | |||
| 9e40e0a0f4 | |||
| 4706323976 | |||
| 4721c7f553 | |||
| c82c00650a | |||
| 9e54e68da5 | |||
| 4423230c11 | |||
| a04fbe6ccc | |||
| f599ae5ff1 | |||
| de81f38622 | |||
| a40944d336 | |||
| e0f184f263 | |||
| 6c1623a3ad | |||
| 4428b15162 | |||
| 2adc96833a | |||
| b7ce20b2ca | |||
| 5e3828882e | |||
| 78cba6b7ca | |||
| 9be92b9834 | |||
| 53cbd332ca | |||
| 2ff2836159 | |||
| 0d008b109e | |||
| a29faa8288 | |||
| 809f8ba6c4 | |||
| 1912cbdea4 | |||
| b1fb7ac2ff | |||
| b5726e5edf | |||
| 9eb100c819 | |||
| 86387d0baf | |||
| c6f9e2eac9 | |||
| 73df548532 | |||
| ae87699824 | |||
| 8568ef7d99 | |||
| c6f7a99b1c | |||
| b5bd536e6b | |||
| d029af3e89 | |||
| af2bb98901 | |||
| 68c4edf8b6 | |||
| 300d6eda21 | |||
| 33b567d453 | |||
| c779d21c13 | |||
| d818c5ebf2 | |||
| 072b581f98 | |||
| 2de353d8d6 | |||
| ca2f19ae52 | |||
| b8bdfd8601 | |||
| a985a0891e | |||
| a688e268b3 | |||
| 3a7384e5f1 | |||
| ca17c39172 | |||
| 55f0628c2b | |||
| 276167be9c | |||
| d87f4f2b60 | |||
| 1684fbf866 | |||
| c0c5d829e2 | |||
| 0a0e4fe606 | |||
| 979f17cf3b | |||
| fe6e2e57c3 | |||
| 2c303a2869 | |||
| 0f395d037b | |||
| 839eab1384 | |||
| 98fbd525ee | |||
| 046c2957f3 | |||
| 18bae485ae | |||
| 46023a86b6 | |||
| 6f3b89c98a | |||
| 0b7d925b50 | |||
| 785346c12d | |||
| a998ec309c | |||
| 557394dc56 | |||
| 5990cf1e8e | |||
| cf393b8fec | |||
| 662d80e1a6 | |||
| 270fbf6473 | |||
| 06b062c122 | |||
| 6fa603981d | |||
| 8f5d601758 | |||
| 08aad5a755 | |||
| 3ffb514f71 | |||
| 24f64b3e32 | |||
| e84c5fce37 | |||
| e94e00af53 | |||
| 185478cf8e | |||
| 98d8120ccd | |||
| 3804fd9a91 | |||
| bd46baa639 | |||
| c64e795447 | |||
| 0e5769154c | |||
| d4b57510f1 | |||
| b06fb4e425 | |||
| 1c2e99a5b3 | |||
| 43e543acae | |||
| 3f1f2534a3 | |||
| 821062bb81 | |||
| 7cd9f2acb0 | |||
| 7b7a0d2c8e | |||
| f50d0e6c41 | |||
| bbe9e93164 | |||
| beb57d2e49 | |||
| a03aa3157f | |||
| 4deba5f147 | |||
| af4c08a08b | |||
| deb1936027 | |||
| fb52343aa3 | |||
| fdf3655e63 | |||
| d83e7c1652 | |||
| 8e0c7eff17 | |||
| b7406919dc | |||
| 656abbbbf8 | |||
| ef751d34f2 | |||
| 4ef9e6b0de | |||
| ecd59455b0 | |||
| 15e6924338 | |||
| ad86a8b954 | |||
| 61fdb6e206 | |||
| 193de54b6d | |||
| b6c33d2901 | |||
| b65b03fe63 | |||
| 5193ab8a97 | |||
| 84965712f6 | |||
| 5f0d55bcfa | |||
| 277d19816e | |||
| a7b370bc3d | |||
| efd765eba7 | |||
| 192e2c333e | |||
| 89b4cec3cb | |||
| 4660afb7d8 | |||
| e9eb7d32ce | |||
| f7075e1b64 | |||
| f32fdaa93a | |||
| 1c8e3f8142 | |||
| 298161114b | |||
| 0d0335bca0 | |||
| 1e2e380876 | |||
| bceb625984 | |||
| a5dda7ae91 | |||
| 9e111a334b | |||
| 74a3576132 | |||
| 0646063509 | |||
| c6c1f3eef7 | |||
| 47ab466d85 | |||
| 378967c2a5 | |||
| 2cc854bd6b | |||
| 4d83f057ac | |||
| 573ae6c488 | |||
| fce4347a3c | |||
| 7b2076c113 | |||
| 7e18e0eb4c | |||
| 22100ceed3 | |||
| 40102be04a | |||
| 201ab43631 | |||
| defd1740b8 | |||
| 4544ddc219 | |||
| 7e87a88d71 | |||
| db4bb5ada6 | |||
| 09b324f7d4 | |||
| 35773d43da | |||
| eeda506990 | |||
| cda65e3da5 | |||
| d016276478 | |||
| 2a1e06975d | |||
| 930ed2ac7c | |||
| 18367353df | |||
| ba3952d86b | |||
| c5b25fa494 | |||
| e1c4db4dab | |||
| 12590137f5 | |||
| 2680cc85fb | |||
| 9aa8242d92 | |||
| c12304a71a | |||
| cf7032dd99 | |||
| f79bbfcdc1 | |||
| e966b9c169 | |||
| 27badf6b3d | |||
| bb9dd410da | |||
| f1d4a841eb | |||
| 80e61cd3be | |||
| f9617d4f64 | |||
| 04472af9c0 | |||
| 448da43bf7 | |||
| 2add15e92c | |||
| efc6b12c65 | |||
| 0ece8bf672 | |||
| 079f14ec46 | |||
| cc57469a65 | |||
| 33db232493 | |||
| 99586a56cf | |||
| e3a4879fcd | |||
| c71cb72a29 | |||
| 501cb3dce2 | |||
| fff8feb1f6 | |||
| 5019e8a122 | |||
| 2d5217c56a | |||
| b42a58c86d | |||
| e33d595201 | |||
| 32a55103e1 | |||
| 6d6e17c22f | |||
| 267e5a6979 | |||
| ca781a3c3b | |||
| 7f6f600fed | |||
| db1e82c2f4 | |||
| 75a0bdc1f1 |
@@ -0,0 +1,19 @@
|
|||||||
|
FROM sherlock/sherlock as sherlock
|
||||||
|
|
||||||
|
# Install Node.js
|
||||||
|
RUN apt-get update; apt-get install curl gpg -y
|
||||||
|
RUN mkdir -p /etc/apt/keyrings
|
||||||
|
RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
|
||||||
|
RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list
|
||||||
|
RUN apt-get update && apt-get install -y curl bash git jq jo xz-utils nodejs
|
||||||
|
|
||||||
|
# Install Apify CLI (node.js) for the Actor Runtime
|
||||||
|
RUN npm -g install apify-cli
|
||||||
|
|
||||||
|
# Install Dependencies for the Actor Shell Script
|
||||||
|
RUN apt-get update && apt-get install -y bash jq jo xz-utils nodejs
|
||||||
|
|
||||||
|
# Copy Actor dir with the actorization shell script
|
||||||
|
COPY .actor/ .actor
|
||||||
|
|
||||||
|
ENTRYPOINT [".actor/actor.sh"]
|
||||||
@@ -0,0 +1,93 @@
|
|||||||
|
# Sherlock Actor on Apify
|
||||||
|
|
||||||
|
[](https://apify.com/netmilk/sherlock?fpr=sherlock)
|
||||||
|
|
||||||
|
This Actor wraps the [Sherlock Project](https://sherlockproject.xyz/) to provide serverless username reconnaissance across social networks in the cloud. It helps you find usernames across multiple social media platforms without installing and running the tool locally.
|
||||||
|
|
||||||
|
## What are Actors?
|
||||||
|
[Actors](https://docs.apify.com/platform/actors?fpr=sherlock) are serverless microservices running on the [Apify Platform](https://apify.com/?fpr=sherlock). They are based on the [Actor SDK](https://docs.apify.com/sdk/js?fpr=sherlock) and can be found in the [Apify Store](https://apify.com/store?fpr=sherlock). Learn more about Actors in the [Apify Whitepaper](https://whitepaper.actor?fpr=sherlock).
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Apify Console
|
||||||
|
|
||||||
|
1. Go to the Apify Actor page
|
||||||
|
2. Click "Run"
|
||||||
|
3. In the input form, fill in **Username(s)** to search for
|
||||||
|
4. The Actor will run and produce its outputs in the default datastore
|
||||||
|
|
||||||
|
|
||||||
|
### Apify CLI
|
||||||
|
|
||||||
|
```bash
|
||||||
|
apify call YOUR_USERNAME/sherlock --input='{
|
||||||
|
"usernames": ["johndoe", "janedoe"]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Using Apify API
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --request POST \
|
||||||
|
--url "https://api.apify.com/v2/acts/YOUR_USERNAME~sherlock/run" \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--header 'Authorization: Bearer YOUR_API_TOKEN' \
|
||||||
|
--data '{
|
||||||
|
"usernames": ["johndoe", "janedoe"],
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Input Parameters
|
||||||
|
|
||||||
|
The Actor accepts a JSON schema with the following structure:
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `usernames` | array | Yes | - | List of usernames to search for |
|
||||||
|
| `usernames[]` | string | Yes | "json" | Username to search for |
|
||||||
|
|
||||||
|
|
||||||
|
### Example Input
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"usernames": ["techuser", "designuser"],
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output
|
||||||
|
|
||||||
|
The Actor provides three types of outputs:
|
||||||
|
|
||||||
|
### Dataset Record*
|
||||||
|
|
||||||
|
| Field | Type | Required | Description |
|
||||||
|
|-------|------|----------|-------------|
|
||||||
|
| `username` | string | Yes | Username the search was conducted for |
|
||||||
|
| `links` | array | Yes | Array with found links to the social media |
|
||||||
|
| `links[]`| string | No | URL to the account
|
||||||
|
|
||||||
|
### Example Dataset Item (JSON)
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"username": "johndoe",
|
||||||
|
"links": [
|
||||||
|
"https://github.com/johndoe"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Performance & Resources
|
||||||
|
|
||||||
|
- **Memory Requirements**:
|
||||||
|
- Minimum: 512 MB RAM
|
||||||
|
- Recommended: 1 GB RAM for multiple usernames
|
||||||
|
- **Processing Time**:
|
||||||
|
- Single username: ~1-2 minutes
|
||||||
|
- Multiple usernames: 2-5 minutes
|
||||||
|
- Varies based on number of sites checked and response times
|
||||||
|
|
||||||
|
|
||||||
|
For more help, check the [Sherlock Project documentation](https://github.com/sherlock-project/sherlock) or raise an issue in the Actor's repository.
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"actorSpecification": 1,
|
||||||
|
"name": "sherlock",
|
||||||
|
"version": "0.0",
|
||||||
|
"buildTag": "latest",
|
||||||
|
"environmentVariables": {},
|
||||||
|
"dockerFile": "./Dockerfile",
|
||||||
|
"dockerContext": "../",
|
||||||
|
"input": "./input_schema.json",
|
||||||
|
"storages": {
|
||||||
|
"dataset": "./dataset_schema.json"
|
||||||
|
}
|
||||||
|
}
|
||||||
Executable
+14
@@ -0,0 +1,14 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
INPUT=`apify actor:get-input | jq -r .usernames[] | xargs echo`
|
||||||
|
echo "INPUT: $INPUT"
|
||||||
|
|
||||||
|
sherlock $INPUT
|
||||||
|
|
||||||
|
for username in $INPUT; do
|
||||||
|
# escape the special meaning leading characters
|
||||||
|
# https://github.com/jpmens/jo/blob/master/jo.md#description
|
||||||
|
safe_username=$(echo $username | sed 's/^@/\\@/' | sed 's/^:/\\:/' | sed 's/%/\\%/')
|
||||||
|
echo "pushing results for username: $username, content:"
|
||||||
|
cat $username.txt
|
||||||
|
sed '$d' $username.txt | jo -a | jo username=$safe_username links:=- | apify actor:push-data
|
||||||
|
done
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
{
|
||||||
|
"actorSpecification": 1,
|
||||||
|
"fields":{
|
||||||
|
"title": "Sherlock actor input",
|
||||||
|
"description": "This is actor input schema",
|
||||||
|
"type": "object",
|
||||||
|
"schemaVersion": 1,
|
||||||
|
"properties": {
|
||||||
|
"links": {
|
||||||
|
"title": "Links to accounts",
|
||||||
|
"type": "array",
|
||||||
|
"description": "A list of social media accounts found for the uername"
|
||||||
|
},
|
||||||
|
"username": {
|
||||||
|
"title": "Lookup username",
|
||||||
|
"type": "string",
|
||||||
|
"description": "Username the lookup was performed for"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"username",
|
||||||
|
"links"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"views": {
|
||||||
|
"overview": {
|
||||||
|
"title": "Overview",
|
||||||
|
"transformation": {
|
||||||
|
"fields": [
|
||||||
|
"username",
|
||||||
|
"links"
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"display": {
|
||||||
|
"component": "table",
|
||||||
|
"links": {
|
||||||
|
"label": "Links"
|
||||||
|
},
|
||||||
|
"username":{
|
||||||
|
"label": "Username"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"title": "Sherlock actor input",
|
||||||
|
"description": "This is actor input schema",
|
||||||
|
"type": "object",
|
||||||
|
"schemaVersion": 1,
|
||||||
|
"properties": {
|
||||||
|
"usernames": {
|
||||||
|
"title": "Usernames to hunt down",
|
||||||
|
"type": "array",
|
||||||
|
"description": "A list of usernames to be checked for existence across social media",
|
||||||
|
"editor": "stringList",
|
||||||
|
"prefill": ["johndoe"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"usernames"
|
||||||
|
]
|
||||||
|
}
|
||||||
+1
-1
@@ -1,5 +1,5 @@
|
|||||||
### REPOSITORY
|
### REPOSITORY
|
||||||
/.github/CODEOWNERS @sdushantha
|
/.github/CODEOWNERS @sdushantha @ppfeister
|
||||||
/.github/FUNDING.yml @sdushantha
|
/.github/FUNDING.yml @sdushantha
|
||||||
/LICENSE @sdushantha
|
/LICENSE @sdushantha
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,15 @@ body:
|
|||||||
- Other (indicate below)
|
- Other (indicate below)
|
||||||
validations:
|
validations:
|
||||||
required: true
|
required: true
|
||||||
|
- type: input
|
||||||
|
id: package-version
|
||||||
|
attributes:
|
||||||
|
label: Package version
|
||||||
|
description: |
|
||||||
|
Knowing the version of the package you are using can help us diagnose your issue more quickly.
|
||||||
|
You can find the version by running `sherlock --version`.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
- type: textarea
|
- type: textarea
|
||||||
id: description
|
id: description
|
||||||
attributes:
|
attributes:
|
||||||
|
|||||||
@@ -0,0 +1,11 @@
|
|||||||
|
## Security Policy
|
||||||
|
|
||||||
|
### Supported Versions
|
||||||
|
|
||||||
|
Sherlock is a forward looking project. Only the latest and most current version is supported.
|
||||||
|
|
||||||
|
### Reporting a Vulnerability
|
||||||
|
|
||||||
|
Security concerns can be submitted [__here__][report-url] without risk of exposing sensitive information. For issues that are low severity or unlikely to see exploitation, public issues are often acceptable.
|
||||||
|
|
||||||
|
[report-url]: https://github.com/sherlock-project/sherlock/security/advisories/new
|
||||||
@@ -0,0 +1,89 @@
|
|||||||
|
name: Exclusions Updater
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
#- cron: '0 5 * * 0' # Runs at 05:00 every Sunday
|
||||||
|
- cron: '0 5 * * *' # Runs at 05:00 every day
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
update-exclusions:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v6
|
||||||
|
with:
|
||||||
|
python-version: '3.13'
|
||||||
|
|
||||||
|
- name: Install Poetry
|
||||||
|
uses: abatilo/actions-poetry@v4
|
||||||
|
with:
|
||||||
|
poetry-version: 'latest'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
poetry install --no-interaction --with dev
|
||||||
|
|
||||||
|
- name: Run false positive tests
|
||||||
|
run: |
|
||||||
|
$(poetry env activate)
|
||||||
|
pytest -q --tb no -m validate_targets_fp -n 20 | tee fp_test_results.txt
|
||||||
|
deactivate
|
||||||
|
|
||||||
|
- name: Parse false positive detections by desired categories
|
||||||
|
run: |
|
||||||
|
grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was Claimed)' fp_test_results.txt \
|
||||||
|
| sort -u > false_positive_exclusions.txt
|
||||||
|
grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was WAF)' fp_test_results.txt \
|
||||||
|
| sort -u > waf_hits.txt
|
||||||
|
|
||||||
|
- name: Detect if exclusions list changed
|
||||||
|
id: detect_changes
|
||||||
|
run: |
|
||||||
|
git fetch origin exclusions || true
|
||||||
|
|
||||||
|
if git show origin/exclusions:false_positive_exclusions.txt >/dev/null 2>&1; then
|
||||||
|
# If the exclusions branch and file exist, compare
|
||||||
|
if git diff --quiet origin/exclusions -- false_positive_exclusions.txt; then
|
||||||
|
echo "exclusions_changed=false" >> "$GITHUB_OUTPUT"
|
||||||
|
else
|
||||||
|
echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
# If the exclusions branch or file do not exist, treat as changed
|
||||||
|
echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Quantify and display results
|
||||||
|
run: |
|
||||||
|
FP_COUNT=$(wc -l < false_positive_exclusions.txt | xargs)
|
||||||
|
WAF_COUNT=$(wc -l < waf_hits.txt | xargs)
|
||||||
|
echo ">>> Found $FP_COUNT false positives and $WAF_COUNT WAF hits."
|
||||||
|
echo ">>> False positive exclusions:" && cat false_positive_exclusions.txt
|
||||||
|
echo ">>> WAF hits:" && cat waf_hits.txt
|
||||||
|
|
||||||
|
- name: Commit and push exclusions list
|
||||||
|
if: steps.detect_changes.outputs.exclusions_changed == 'true'
|
||||||
|
run: |
|
||||||
|
git config user.name "Paul Pfeister (automation)"
|
||||||
|
git config user.email "code@pfeister.dev"
|
||||||
|
|
||||||
|
mv false_positive_exclusions.txt false_positive_exclusions.txt.tmp
|
||||||
|
|
||||||
|
git add -f false_positive_exclusions.txt.tmp # -f required to override .gitignore
|
||||||
|
git stash push -m "stash false positive exclusion list" -- false_positive_exclusions.txt.tmp
|
||||||
|
|
||||||
|
git fetch origin exclusions || true # Allows creation of branch if deleted
|
||||||
|
git checkout -B exclusions origin/exclusions || (git checkout --orphan exclusions && git rm -rf .)
|
||||||
|
|
||||||
|
git stash pop || true
|
||||||
|
|
||||||
|
mv false_positive_exclusions.txt.tmp false_positive_exclusions.txt
|
||||||
|
|
||||||
|
git rm -f false_positive_exclusions.txt.tmp || true
|
||||||
|
git add false_positive_exclusions.txt
|
||||||
|
git commit -m "auto: update exclusions list" || echo "No changes to commit"
|
||||||
|
git push origin exclusions
|
||||||
@@ -2,30 +2,37 @@ name: Regression Testing
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ master ]
|
branches:
|
||||||
|
- master
|
||||||
|
- release/**
|
||||||
paths:
|
paths:
|
||||||
- '.github/workflows/regression.yml'
|
- '.github/workflows/regression.yml'
|
||||||
- '**/*.json'
|
- '**/*.json'
|
||||||
- '**/*.py'
|
- '**/*.py'
|
||||||
- '**/*.ini'
|
- '**/*.ini'
|
||||||
- '**/*.toml'
|
- '**/*.toml'
|
||||||
|
- 'Dockerfile'
|
||||||
push:
|
push:
|
||||||
branches: [ master ]
|
branches:
|
||||||
|
- master
|
||||||
|
- release/**
|
||||||
paths:
|
paths:
|
||||||
- '.github/workflows/regression.yml'
|
- '.github/workflows/regression.yml'
|
||||||
- '**/*.json'
|
- '**/*.json'
|
||||||
- '**/*.py'
|
- '**/*.py'
|
||||||
- '**/*.ini'
|
- '**/*.ini'
|
||||||
- '**/*.toml'
|
- '**/*.toml'
|
||||||
|
- 'Dockerfile'
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
tox-lint:
|
tox-lint:
|
||||||
# Linting is ran through tox to ensure that the same linter is used by local runners
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
# Linting is run through tox to ensure that the same linter
|
||||||
|
# is used by local runners
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v6
|
||||||
- name: Set up linting environment
|
- name: Set up linting environment
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v6
|
||||||
with:
|
with:
|
||||||
python-version: '3.x'
|
python-version: '3.x'
|
||||||
- name: Install tox and related dependencies
|
- name: Install tox and related dependencies
|
||||||
@@ -37,7 +44,8 @@ jobs:
|
|||||||
tox-matrix:
|
tox-matrix:
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false # We want to know what specicic versions it fails on
|
# We want to know what specific versions it fails on
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
os: [
|
os: [
|
||||||
ubuntu-latest,
|
ubuntu-latest,
|
||||||
@@ -45,16 +53,17 @@ jobs:
|
|||||||
macos-latest,
|
macos-latest,
|
||||||
]
|
]
|
||||||
python-version: [
|
python-version: [
|
||||||
'3.8',
|
|
||||||
'3.9',
|
|
||||||
'3.10',
|
'3.10',
|
||||||
'3.11',
|
'3.11',
|
||||||
'3.12',
|
'3.12',
|
||||||
|
'3.13',
|
||||||
|
'3.14',
|
||||||
|
'3.14t',
|
||||||
]
|
]
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v6
|
||||||
- name: Set up environment ${{ matrix.python-version }}
|
- name: Set up environment ${{ matrix.python-version }}
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v6
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
- name: Install tox and related dependencies
|
- name: Install tox and related dependencies
|
||||||
@@ -64,3 +73,22 @@ jobs:
|
|||||||
pip install tox-gh-actions
|
pip install tox-gh-actions
|
||||||
- name: Run tox
|
- name: Run tox
|
||||||
run: tox
|
run: tox
|
||||||
|
docker-build-test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
- name: Get version from pyproject.toml
|
||||||
|
id: get-version
|
||||||
|
run: |
|
||||||
|
VERSION=$(grep -m1 'version = ' pyproject.toml | cut -d'"' -f2)
|
||||||
|
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||||
|
- name: Build Docker image
|
||||||
|
run: |
|
||||||
|
docker build \
|
||||||
|
--build-arg VERSION_TAG=${{ steps.get-version.outputs.version }} \
|
||||||
|
-t sherlock-test:latest .
|
||||||
|
- name: Test Docker image runs
|
||||||
|
run: docker run --rm sherlock-test:latest --version
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
name: Update Site List
|
name: Update Site List
|
||||||
|
|
||||||
# Trigger the workflow when changes are pushed to the main branch
|
# Trigger the workflow when changes are pushed to the main branch
|
||||||
# and the changes include the sherlock/resources/data.json file
|
# and the changes include the sherlock_project/resources/data.json file
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
paths:
|
paths:
|
||||||
- sherlock/resources/data.json
|
- sherlock_project/resources/data.json
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
sync-json-data:
|
sync-json-data:
|
||||||
|
|||||||
@@ -0,0 +1,126 @@
|
|||||||
|
name: Modified Target Validation
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request_target:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths:
|
||||||
|
- "sherlock_project/resources/data.json"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
validate-modified-targets:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
pull-requests: write
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v5
|
||||||
|
with:
|
||||||
|
# Checkout the base branch but fetch all history to avoid a second fetch call
|
||||||
|
ref: ${{ github.base_ref }}
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v6
|
||||||
|
with:
|
||||||
|
python-version: "3.13"
|
||||||
|
|
||||||
|
- name: Install Poetry
|
||||||
|
uses: abatilo/actions-poetry@v4
|
||||||
|
with:
|
||||||
|
poetry-version: "latest"
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
poetry install --no-interaction --with dev
|
||||||
|
|
||||||
|
- name: Prepare JSON versions for comparison
|
||||||
|
run: |
|
||||||
|
# Fetch only the PR's branch head (single network call in this step)
|
||||||
|
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr
|
||||||
|
|
||||||
|
# Find the merge-base commit between the target branch and the PR branch
|
||||||
|
MERGE_BASE=$(git merge-base origin/${{ github.base_ref }} pr)
|
||||||
|
echo "Comparing PR head against merge-base commit: $MERGE_BASE"
|
||||||
|
|
||||||
|
# Safely extract the file from the PR's head and the merge-base commit
|
||||||
|
git show pr:sherlock_project/resources/data.json > data.json.head
|
||||||
|
git show $MERGE_BASE:sherlock_project/resources/data.json > data.json.base
|
||||||
|
|
||||||
|
# CRITICAL FIX: Overwrite the checked-out data.json with the one from the PR
|
||||||
|
# This ensures that pytest runs against the new, updated file.
|
||||||
|
cp data.json.head sherlock_project/resources/data.json
|
||||||
|
|
||||||
|
- name: Discover modified targets
|
||||||
|
id: discover-modified
|
||||||
|
run: |
|
||||||
|
CHANGED=$(
|
||||||
|
python - <<'EOF'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
try:
|
||||||
|
with open("data.json.base") as f: base = json.load(f)
|
||||||
|
with open("data.json.head") as f: head = json.load(f)
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
print(f"Error: Could not find {e.filename}", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
print(f"Error: Could not decode JSON from a file - {e}", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
changed = []
|
||||||
|
for k, v in head.items():
|
||||||
|
if k not in base or base[k] != v:
|
||||||
|
changed.append(k)
|
||||||
|
|
||||||
|
print(",".join(sorted(changed)))
|
||||||
|
EOF
|
||||||
|
)
|
||||||
|
|
||||||
|
# Preserve changelist
|
||||||
|
echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
|
||||||
|
echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
|
- name: Validate remote manifest against local schema
|
||||||
|
if: steps.discover-modified.outputs.changed_targets != ''
|
||||||
|
run: |
|
||||||
|
poetry run pytest tests/test_manifest.py::test_validate_manifest_against_local_schema
|
||||||
|
|
||||||
|
# --- The rest of the steps below are unchanged ---
|
||||||
|
|
||||||
|
- name: Validate modified targets
|
||||||
|
if: steps.discover-modified.outputs.changed_targets != ''
|
||||||
|
continue-on-error: true
|
||||||
|
run: |
|
||||||
|
poetry run pytest -q --tb no -rA -m validate_targets -n 20 \
|
||||||
|
--chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" \
|
||||||
|
--junitxml=validation_results.xml
|
||||||
|
|
||||||
|
- name: Prepare validation summary
|
||||||
|
if: steps.discover-modified.outputs.changed_targets != ''
|
||||||
|
id: prepare-summary
|
||||||
|
run: |
|
||||||
|
summary=$(
|
||||||
|
poetry run python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results"
|
||||||
|
)
|
||||||
|
echo "$summary" > validation_summary.md
|
||||||
|
|
||||||
|
- name: Announce validation results
|
||||||
|
if: steps.discover-modified.outputs.changed_targets != ''
|
||||||
|
uses: actions/github-script@v8
|
||||||
|
with:
|
||||||
|
script: |
|
||||||
|
const fs = require('fs');
|
||||||
|
const body = fs.readFileSync('validation_summary.md', 'utf8');
|
||||||
|
await github.rest.issues.createComment({
|
||||||
|
issue_number: context.payload.pull_request.number,
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
body: body,
|
||||||
|
});
|
||||||
|
|
||||||
|
- name: This step shows as ran when no modifications are found
|
||||||
|
if: steps.discover-modified.outputs.changed_targets == ''
|
||||||
|
run: |
|
||||||
|
echo "No modified targets found"
|
||||||
+21
-16
@@ -1,26 +1,31 @@
|
|||||||
FROM python:3.11-slim-bullseye as build
|
# Release instructions:
|
||||||
WORKDIR /wheels
|
# 1. Update the version tag in the Dockerfile to match the version in sherlock/__init__.py
|
||||||
|
# 2. Update the VCS_REF tag to match the tagged version's FULL commit hash
|
||||||
|
# 3. Build image with BOTH latest and version tags
|
||||||
|
# i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .`
|
||||||
|
|
||||||
COPY requirements.txt /opt/sherlock/
|
FROM python:3.12-slim-bullseye AS build
|
||||||
RUN apt-get update \
|
WORKDIR /sherlock
|
||||||
&& apt-get install -y build-essential \
|
|
||||||
&& pip3 wheel -r /opt/sherlock/requirements.txt
|
|
||||||
|
|
||||||
FROM python:3.11-slim-bullseye
|
RUN pip3 install --no-cache-dir --upgrade pip
|
||||||
WORKDIR /opt/sherlock
|
|
||||||
|
|
||||||
ARG VCS_REF
|
FROM python:3.12-slim-bullseye
|
||||||
|
WORKDIR /sherlock
|
||||||
|
|
||||||
|
ARG VCS_REF= # CHANGE ME ON UPDATE
|
||||||
ARG VCS_URL="https://github.com/sherlock-project/sherlock"
|
ARG VCS_URL="https://github.com/sherlock-project/sherlock"
|
||||||
|
ARG VERSION_TAG= # CHANGE ME ON UPDATE
|
||||||
|
|
||||||
|
ENV SHERLOCK_ENV=docker
|
||||||
|
|
||||||
LABEL org.label-schema.vcs-ref=$VCS_REF \
|
LABEL org.label-schema.vcs-ref=$VCS_REF \
|
||||||
org.label-schema.vcs-url=$VCS_URL
|
org.label-schema.vcs-url=$VCS_URL \
|
||||||
|
org.label-schema.name="Sherlock" \
|
||||||
|
org.label-schema.version=$VERSION_TAG \
|
||||||
|
website="https://sherlockproject.xyz"
|
||||||
|
|
||||||
COPY --from=build /wheels /wheels
|
RUN pip3 install --no-cache-dir sherlock-project==$VERSION_TAG
|
||||||
COPY . /opt/sherlock/
|
|
||||||
|
|
||||||
RUN pip3 install --no-cache-dir . -f /wheels \
|
WORKDIR /sherlock
|
||||||
&& rm -rf /wheels
|
|
||||||
|
|
||||||
WORKDIR /opt/sherlock/sherlock
|
|
||||||
|
|
||||||
ENTRYPOINT ["sherlock"]
|
ENTRYPOINT ["sherlock"]
|
||||||
|
|||||||
+19
-10
@@ -1,36 +1,45 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# This module generates the listing of supported sites which can be found in
|
# This module generates the listing of supported sites which can be found in
|
||||||
# sites.md. It also organizes all the sites in alphanumeric order
|
# sites.mdx. It also organizes all the sites in alphanumeric order
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
DATA_REL_URI: str = "sherlock_project/resources/data.json"
|
||||||
|
|
||||||
|
DEFAULT_ENCODING = "utf-8"
|
||||||
|
|
||||||
# Read the data.json file
|
# Read the data.json file
|
||||||
with open("sherlock/resources/data.json", "r", encoding="utf-8") as data_file:
|
with open(DATA_REL_URI, "r", encoding=DEFAULT_ENCODING) as data_file:
|
||||||
data: dict = json.load(data_file)
|
data: dict = json.load(data_file)
|
||||||
|
|
||||||
# Removes schema-specific keywords for proper processing
|
# Removes schema-specific keywords for proper processing
|
||||||
social_networks: dict = dict(data)
|
social_networks = data.copy()
|
||||||
social_networks.pop('$schema', None)
|
social_networks.pop('$schema', None)
|
||||||
|
|
||||||
# Sort the social networks in alphanumeric order
|
# Sort the social networks in alphanumeric order
|
||||||
social_networks: list = sorted(social_networks.items())
|
social_networks = sorted(social_networks.items())
|
||||||
|
|
||||||
# Make output dir where the site list will be written
|
# Make output dir where the site list will be written
|
||||||
os.mkdir("output")
|
os.mkdir("output")
|
||||||
|
|
||||||
# Write the list of supported sites to sites.md
|
# Write the list of supported sites to sites.mdx
|
||||||
with open("output/sites.mdx", "w") as site_file:
|
with open("output/sites.mdx", "w", encoding=DEFAULT_ENCODING) as site_file:
|
||||||
site_file.write("---\ntitle: 'List of supported sites'\nsidebarTitle: 'Supported sites'\nicon: 'globe'\ndescription: 'Sherlock currently supports **400+** sites'\n---\n\n")
|
site_file.write("---\n")
|
||||||
|
site_file.write("title: 'List of supported sites'\n")
|
||||||
|
site_file.write("sidebarTitle: 'Supported sites'\n")
|
||||||
|
site_file.write("icon: 'globe'\n")
|
||||||
|
site_file.write("description: 'Sherlock currently supports **400+** sites'\n")
|
||||||
|
site_file.write("---\n\n")
|
||||||
|
|
||||||
for social_network, info in social_networks:
|
for social_network, info in social_networks:
|
||||||
url_main = info["urlMain"]
|
url_main = info["urlMain"]
|
||||||
is_nsfw = "**(NSFW)**" if info.get("isNSFW") else ""
|
is_nsfw = "**(NSFW)**" if info.get("isNSFW") else ""
|
||||||
site_file.write(f"1. [{social_network}]({url_main}) {is_nsfw}\n")
|
site_file.write(f"1. [{social_network}]({url_main}) {is_nsfw}\n")
|
||||||
|
|
||||||
# Overwrite the data.json file with sorted data
|
# Overwrite the data.json file with sorted data
|
||||||
with open("sherlock/resources/data.json", "w") as data_file:
|
with open(DATA_REL_URI, "w", encoding=DEFAULT_ENCODING) as data_file:
|
||||||
sorted_data = json.dumps(data, indent=2, sort_keys=True)
|
sorted_data = json.dumps(data, indent=2, sort_keys=True)
|
||||||
data_file.write(sorted_data)
|
data_file.write(sorted_data)
|
||||||
data_file.write("\n")
|
data_file.write("\n") # Keep the newline after writing data
|
||||||
|
|
||||||
print("Finished updating supported site listing!")
|
print("Finished updating supported site listing!")
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,72 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# This module summarizes the results of site validation tests queued by
|
||||||
|
# workflow validate_modified_targets for presentation in Issue comments.
|
||||||
|
|
||||||
|
from defusedxml import ElementTree as ET
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
def summarize_junit_xml(xml_path: Path) -> str:
|
||||||
|
tree = ET.parse(xml_path)
|
||||||
|
root = tree.getroot()
|
||||||
|
suite = root.find('testsuite')
|
||||||
|
|
||||||
|
pass_message: str = ":heavy_check_mark: Pass"
|
||||||
|
fail_message: str = ":x: Fail"
|
||||||
|
|
||||||
|
if suite is None:
|
||||||
|
raise ValueError("Invalid JUnit XML: No testsuite found")
|
||||||
|
|
||||||
|
summary_lines: list[str] = []
|
||||||
|
summary_lines.append("#### Automatic validation of changes\n")
|
||||||
|
summary_lines.append("| Target | F+ Check | F- Check |")
|
||||||
|
summary_lines.append("|---|---|---|")
|
||||||
|
|
||||||
|
failures = int(suite.get('failures', 0))
|
||||||
|
errors_detected: bool = False
|
||||||
|
|
||||||
|
results: dict[str, dict[str, str]] = {}
|
||||||
|
|
||||||
|
for testcase in suite.findall('testcase'):
|
||||||
|
test_name = testcase.get('name').split('[')[0]
|
||||||
|
site_name = testcase.get('name').split('[')[1].rstrip(']')
|
||||||
|
failure = testcase.find('failure')
|
||||||
|
error = testcase.find('error')
|
||||||
|
|
||||||
|
if site_name not in results:
|
||||||
|
results[site_name] = {}
|
||||||
|
|
||||||
|
if test_name == "test_false_neg":
|
||||||
|
results[site_name]['F- Check'] = pass_message if failure is None and error is None else fail_message
|
||||||
|
elif test_name == "test_false_pos":
|
||||||
|
results[site_name]['F+ Check'] = pass_message if failure is None and error is None else fail_message
|
||||||
|
|
||||||
|
if error is not None:
|
||||||
|
errors_detected = True
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
summary_lines.append(f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |")
|
||||||
|
|
||||||
|
if failures > 0:
|
||||||
|
summary_lines.append("\n___\n" +
|
||||||
|
"\nFailures were detected on at least one updated target. Commits containing accuracy failures" +
|
||||||
|
" will often not be merged (unless a rationale is provided, such as false negatives due to regional differences).")
|
||||||
|
|
||||||
|
if errors_detected:
|
||||||
|
summary_lines.append("\n___\n" +
|
||||||
|
"\n**Errors were detected during validation. Please review the workflow logs.**")
|
||||||
|
|
||||||
|
return "\n".join(summary_lines)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
print("Usage: summarize_site_validation.py <junit-xml-file>")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
xml_path: Path = Path(sys.argv[1])
|
||||||
|
if not xml_path.is_file():
|
||||||
|
print(f"Error: File '{xml_path}' does not exist.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
summary: str = summarize_junit_xml(xml_path)
|
||||||
|
print(summary)
|
||||||
+18
-14
@@ -1,6 +1,6 @@
|
|||||||
<p align=center>
|
<p align="center">
|
||||||
<br>
|
<br>
|
||||||
<a href="https://sherlock-project.github.io/" target="_blank"><img src="images/sherlock-logo.png"/></a>
|
<a href="https://sherlock-project.github.io/" target="_blank"><img src="images/sherlock-logo.png" alt="sherlock"/></a>
|
||||||
<br>
|
<br>
|
||||||
<span>Hunt down social media accounts by username across <a href="https://sherlockproject.xyz/sites">400+ social networks</a></span>
|
<span>Hunt down social media accounts by username across <a href="https://sherlockproject.xyz/sites">400+ social networks</a></span>
|
||||||
<br>
|
<br>
|
||||||
@@ -15,25 +15,27 @@
|
|||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<img width="70%" height="70%" src="images/demo.png"/>
|
<img width="70%" height="70%" src="images/demo.png" alt="demo"/>
|
||||||
</a>
|
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
> [!WARNING]
|
||||||
|
> Packages for ParrotOS and Ubuntu 24.04, maintained by a third party, appear to be __broken__.
|
||||||
|
> Users of these systems should defer to [`uv`](https://docs.astral.sh/uv/)/`pipx`/`pip` or Docker.
|
||||||
|
|
||||||
| | Command | Notes |
|
| Method | Notes |
|
||||||
| - | - | - |
|
| - | - |
|
||||||
| PyPI | `pipx install sherlock-project` | `pip` may be used in place of `pipx` |
|
| `pipx install sherlock-project` | `pip` or [`uv`](https://docs.astral.sh/uv/) may be used in place of `pipx` |
|
||||||
| Docker | `docker pull sherlock/sherlock` | |
|
| `docker run -it --rm sherlock/sherlock` |
|
||||||
| Debian family | `apt install sherlock` | Kali, Parrot, Debian Testing and Sid |
|
| `dnf install sherlock-project` | |
|
||||||
| BlackArch | `pacman -S sherlock` | |
|
|
||||||
| Homebrew | `brew install sherlock` | |
|
Community-maintained packages are available for Debian (>= 13), Ubuntu (>= 22.10), Homebrew, Kali, and BlackArch. These packages are not directly supported or maintained by the Sherlock Project.
|
||||||
|
|
||||||
See all alternative installation methods [here](https://sherlockproject.xyz/installation)
|
See all alternative installation methods [here](https://sherlockproject.xyz/installation)
|
||||||
|
|
||||||
## Usage
|
## General usage
|
||||||
|
|
||||||
To search for only one user:
|
To search for only one user:
|
||||||
```bash
|
```bash
|
||||||
@@ -101,9 +103,11 @@ optional arguments:
|
|||||||
Thank you to everyone who has contributed to Sherlock! ❤️
|
Thank you to everyone who has contributed to Sherlock! ❤️
|
||||||
|
|
||||||
<a href="https://github.com/sherlock-project/sherlock/graphs/contributors">
|
<a href="https://github.com/sherlock-project/sherlock/graphs/contributors">
|
||||||
<img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" noZoom />
|
<img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" alt="contributors"/>
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
|
## Star History
|
||||||
|
|
||||||
<picture>
|
<picture>
|
||||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date&theme=dark" />
|
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date&theme=dark" />
|
||||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date" />
|
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date" />
|
||||||
@@ -113,7 +117,7 @@ Thank you to everyone who has contributed to Sherlock! ❤️
|
|||||||
## License
|
## License
|
||||||
|
|
||||||
MIT © Sherlock Project<br/>
|
MIT © Sherlock Project<br/>
|
||||||
Original Creator - [Siddharth Dushantha](https://github.com/sdushantha)
|
Creator - [Siddharth Dushantha](https://github.com/sdushantha)
|
||||||
|
|
||||||
<!-- Reference Links -->
|
<!-- Reference Links -->
|
||||||
|
|
||||||
|
|||||||
+140
-40
@@ -84,22 +84,6 @@ As of 2020-02-23, all usernames are reported as not existing.
|
|||||||
},
|
},
|
||||||
```
|
```
|
||||||
|
|
||||||
## Fanpop
|
|
||||||
|
|
||||||
As of 2020-02-23, all usernames are reported as not existing.
|
|
||||||
|
|
||||||
```json
|
|
||||||
"fanpop": {
|
|
||||||
"errorType": "response_url",
|
|
||||||
"errorUrl": "http://www.fanpop.com/",
|
|
||||||
"rank": 9454,
|
|
||||||
"url": "http://www.fanpop.com/fans/{}",
|
|
||||||
"urlMain": "http://www.fanpop.com/",
|
|
||||||
"username_claimed": "blue",
|
|
||||||
"username_unclaimed": "noonewould_everusethis7"
|
|
||||||
},
|
|
||||||
```
|
|
||||||
|
|
||||||
## Canva
|
## Canva
|
||||||
|
|
||||||
As of 2020-02-23, all usernames are reported as not existing.
|
As of 2020-02-23, all usernames are reported as not existing.
|
||||||
@@ -618,7 +602,7 @@ removed
|
|||||||
|
|
||||||
## Coderwall
|
## Coderwall
|
||||||
As of 2020-07-06, Coderwall returns false positives when checking for an username which contains a period.
|
As of 2020-07-06, Coderwall returns false positives when checking for an username which contains a period.
|
||||||
I have tried to find out what Coderwall's criteria is for a valid username, but unfortunately I have not been able to
|
I have tried to find out what Coderwall's criteria is for a valid username, but unfortunately I have not been able to
|
||||||
find it and because of this, the best thing we can do now is to remove it.
|
find it and because of this, the best thing we can do now is to remove it.
|
||||||
```json
|
```json
|
||||||
"Coderwall": {
|
"Coderwall": {
|
||||||
@@ -666,15 +650,15 @@ As of 2020-07-24, Zomato seems to be unstable. Majority of the time, Zomato take
|
|||||||
## Mixer
|
## Mixer
|
||||||
As of 2020-07-22, the Mixer service has closed down.
|
As of 2020-07-22, the Mixer service has closed down.
|
||||||
```json
|
```json
|
||||||
"mixer.com": {
|
"mixer.com": {
|
||||||
"errorType": "status_code",
|
"errorType": "status_code",
|
||||||
"rank": 1544,
|
"rank": 1544,
|
||||||
"url": "https://mixer.com/{}",
|
"url": "https://mixer.com/{}",
|
||||||
"urlMain": "https://mixer.com/",
|
"urlMain": "https://mixer.com/",
|
||||||
"urlProbe": "https://mixer.com/api/v1/channels/{}",
|
"urlProbe": "https://mixer.com/api/v1/channels/{}",
|
||||||
"username_claimed": "blue",
|
"username_claimed": "blue",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@@ -1273,19 +1257,6 @@ As of 2022-05-1, FanCentro returns false positives. Will later in new version of
|
|||||||
},
|
},
|
||||||
```
|
```
|
||||||
|
|
||||||
## Codeforces
|
|
||||||
As og 2022-05-01, Codeforces returns false positives
|
|
||||||
```json
|
|
||||||
"Codeforces": {
|
|
||||||
"errorType": "response_url",
|
|
||||||
"errorUrl": "https://codeforces.com/",
|
|
||||||
"url": "https://codeforces.com/profile/{}",
|
|
||||||
"urlMain": "https://www.codeforces.com/",
|
|
||||||
"username_claimed": "tourist",
|
|
||||||
"username_unclaimed": "noonewouldeverusethis789"
|
|
||||||
},
|
|
||||||
```
|
|
||||||
|
|
||||||
## Smashcast
|
## Smashcast
|
||||||
As og 2022-05-01, Smashcast is down
|
As og 2022-05-01, Smashcast is down
|
||||||
```json
|
```json
|
||||||
@@ -1300,7 +1271,7 @@ As og 2022-05-01, Smashcast is down
|
|||||||
|
|
||||||
## Countable
|
## Countable
|
||||||
|
|
||||||
As og 2022-05-01, Countable returns false positives
|
As og 2022-05-01, Countable returns false positives
|
||||||
```json
|
```json
|
||||||
"Countable": {
|
"Countable": {
|
||||||
"errorType": "status_code",
|
"errorType": "status_code",
|
||||||
@@ -1895,3 +1866,132 @@ As of 24.06.2024, Pentestit returns a 403. This is most likely due to a new site
|
|||||||
"username_claimed": "CSV"
|
"username_claimed": "CSV"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Euw
|
||||||
|
__2024-06-09 :__ errorMsg detection doesn't work anymore, because the error message is included in HTTP request body, even in successful search
|
||||||
|
```json
|
||||||
|
"Euw": {
|
||||||
|
"errorMsg": "This summoner is not registered at OP.GG. Please check spelling.",
|
||||||
|
"errorType": "message",
|
||||||
|
"url": "https://euw.op.gg/summoner/userName={}",
|
||||||
|
"urlMain": "https://euw.op.gg/",
|
||||||
|
"username_claimed": "blue"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Etsy
|
||||||
|
__2024-06-10 :__ Http request returns 403 forbidden, and tries to verify the connection, so it doesn't work anymore
|
||||||
|
```json
|
||||||
|
"Etsy": {
|
||||||
|
"errorType": "status_code",
|
||||||
|
"url": "https://www.etsy.com/shop/{}",
|
||||||
|
"urlMain": "https://www.etsy.com/",
|
||||||
|
"username_claimed": "JennyKrafts"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Alik.cz
|
||||||
|
__2024-07-21 :__ Target is now BLACKLISTED from the default manifest due to the site recieving unnecessarily high traffic from Sherlock (by request of the site owners). This target is not permitted to be reactivited. Inclusion in unrelated manifests is not impacted, but it is discouraged.
|
||||||
|
|
||||||
|
## 8tracks
|
||||||
|
__2025-02-02 :__ Might be dead again. Nobody knows for sure.
|
||||||
|
```json
|
||||||
|
"8tracks": {
|
||||||
|
"errorType": "message",
|
||||||
|
"errorMsg": "\"available\":true",
|
||||||
|
"headers": {
|
||||||
|
"Accept-Language": "en-US,en;q=0.5"
|
||||||
|
},
|
||||||
|
"url": "https://8tracks.com/{}",
|
||||||
|
"urlProbe": "https://8tracks.com/users/check_username?login={}&format=jsonh",
|
||||||
|
"urlMain": "https://8tracks.com/",
|
||||||
|
"username_claimed": "blue"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Shpock
|
||||||
|
__2025-02-02 :__ Can likely be added back with a new endpoint (source username availability endpoint from mobile app reg flow?)
|
||||||
|
```json
|
||||||
|
"Shpock": {
|
||||||
|
"errorType": "status_code",
|
||||||
|
"url": "https://www.shpock.com/shop/{}/items",
|
||||||
|
"urlMain": "https://www.shpock.com/",
|
||||||
|
"username_claimed": "user"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Twitch
|
||||||
|
__2025-02-02 :__
|
||||||
|
```json
|
||||||
|
"Twitch": {
|
||||||
|
"errorType": "message",
|
||||||
|
"errorMsg": "components.availability-tracking.warn-unavailable.component",
|
||||||
|
"url": "https://www.twitch.tv/{}",
|
||||||
|
"urlMain": "https://www.twitch.tv/",
|
||||||
|
"urlProbe": "https://m.twitch.tv/{}",
|
||||||
|
"username_claimed": "jenny"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Fiverr
|
||||||
|
__2025-02-02 :__ Fiverr added CSRF protections that messed with this test
|
||||||
|
```json
|
||||||
|
"Fiverr": {
|
||||||
|
"errorMsg": "\"status\":\"success\"",
|
||||||
|
"errorType": "message",
|
||||||
|
"headers": {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Accept-Language": "en-US,en;q=0.9"
|
||||||
|
},
|
||||||
|
"regexCheck": "^[A-Za-z][A-Za-z\\d_]{5,14}$",
|
||||||
|
"request_method": "POST",
|
||||||
|
"request_payload": {
|
||||||
|
"username": "{}"
|
||||||
|
},
|
||||||
|
"url": "https://www.fiverr.com/{}",
|
||||||
|
"urlMain": "https://www.fiverr.com/",
|
||||||
|
"urlProbe": "https://www.fiverr.com/validate_username",
|
||||||
|
"username_claimed": "blueman"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## BabyRU
|
||||||
|
__2025-02-02 :__ Just being problematic (possibly related to errorMsg encoding?)
|
||||||
|
```json
|
||||||
|
"babyRU": {
|
||||||
|
"errorMsg": [
|
||||||
|
"\u0421\u0442\u0440\u0430\u043d\u0438\u0446\u0430, \u043a\u043e\u0442\u043e\u0440\u0443\u044e \u0432\u044b \u0438\u0441\u043a\u0430\u043b\u0438, \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u0430",
|
||||||
|
"Доступ с вашего IP-адреса временно ограничен"
|
||||||
|
],
|
||||||
|
"errorType": "message",
|
||||||
|
"url": "https://www.baby.ru/u/{}/",
|
||||||
|
"urlMain": "https://www.baby.ru/",
|
||||||
|
"username_claimed": "blue"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## v0.dev
|
||||||
|
__2025-02-16 :__ Unsure if any way to view profiles exists now
|
||||||
|
```json
|
||||||
|
"v0.dev": {
|
||||||
|
"errorType": "message",
|
||||||
|
"errorMsg": "<title>v0 by Vercel</title>",
|
||||||
|
"url": "https://v0.dev/{}",
|
||||||
|
"urlMain": "https://v0.dev",
|
||||||
|
"username_claimed": "t3dotgg"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## TorrentGalaxy
|
||||||
|
__2025-07-06 :__ Site appears to have gone offline in March and hasn't come back
|
||||||
|
```json
|
||||||
|
"TorrentGalaxy": {
|
||||||
|
"errorMsg": "<title>TGx:Can't show details</title>",
|
||||||
|
"errorType": "message",
|
||||||
|
"regexCheck": "^[A-Za-z0-9]{3,15}$",
|
||||||
|
"url": "https://torrentgalaxy.to/profile/{}",
|
||||||
|
"urlMain": "https://torrentgalaxy.to/",
|
||||||
|
"username_claimed": "GalaxyRG"
|
||||||
|
},
|
||||||
|
```
|
||||||
|
|||||||
+19
-9
@@ -8,8 +8,7 @@ source = "init"
|
|||||||
|
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "sherlock-project"
|
name = "sherlock-project"
|
||||||
# single source of truth for version is __init__.py
|
version = "0.16.0"
|
||||||
version = "0"
|
|
||||||
description = "Hunt down social media accounts by username across social networks"
|
description = "Hunt down social media accounts by username across social networks"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
authors = [
|
authors = [
|
||||||
@@ -20,8 +19,8 @@ maintainers = [
|
|||||||
"Matheus Felipe <matheusfelipeog@protonmail.com>",
|
"Matheus Felipe <matheusfelipeog@protonmail.com>",
|
||||||
"Sondre Karlsen Dyrnes <sondre@villdyr.no>"
|
"Sondre Karlsen Dyrnes <sondre@villdyr.no>"
|
||||||
]
|
]
|
||||||
readme = "docs/pyproj/README.md"
|
readme = "docs/pyproject/README.md"
|
||||||
packages = [ { include = "sherlock"} ]
|
packages = [ { include = "sherlock_project"} ]
|
||||||
keywords = [ "osint", "reconnaissance", "information gathering" ]
|
keywords = [ "osint", "reconnaissance", "information gathering" ]
|
||||||
classifiers = [
|
classifiers = [
|
||||||
"Development Status :: 5 - Production/Stable",
|
"Development Status :: 5 - Production/Stable",
|
||||||
@@ -30,9 +29,13 @@ classifiers = [
|
|||||||
"Natural Language :: English",
|
"Natural Language :: English",
|
||||||
"Operating System :: OS Independent",
|
"Operating System :: OS Independent",
|
||||||
"Programming Language :: Python :: 3",
|
"Programming Language :: Python :: 3",
|
||||||
|
"Programming Language :: Python :: 3.10",
|
||||||
|
"Programming Language :: Python :: 3.11",
|
||||||
|
"Programming Language :: Python :: 3.12",
|
||||||
|
"Programming Language :: Python :: 3.13",
|
||||||
"Topic :: Security"
|
"Topic :: Security"
|
||||||
]
|
]
|
||||||
homepage = "https://sherlock-project.github.io/"
|
homepage = "https://sherlockproject.xyz/"
|
||||||
repository = "https://github.com/sherlock-project/sherlock"
|
repository = "https://github.com/sherlock-project/sherlock"
|
||||||
|
|
||||||
|
|
||||||
@@ -40,19 +43,26 @@ repository = "https://github.com/sherlock-project/sherlock"
|
|||||||
"Bug Tracker" = "https://github.com/sherlock-project/sherlock/issues"
|
"Bug Tracker" = "https://github.com/sherlock-project/sherlock/issues"
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.8"
|
python = "^3.9"
|
||||||
certifi = ">=2019.6.16"
|
certifi = ">=2019.6.16"
|
||||||
colorama = "^0.4.1"
|
colorama = "^0.4.1"
|
||||||
PySocks = "^1.7.0"
|
PySocks = "^1.7.0"
|
||||||
requests = "^2.22.0"
|
requests = "^2.22.0"
|
||||||
requests-futures = "^1.0.0"
|
requests-futures = "^1.0.0"
|
||||||
stem = "^1.8.0"
|
stem = "^1.8.0"
|
||||||
# pandas can likely be bumped up to ^2.0.0 after fc39 EOL
|
pandas = "^2.2.1"
|
||||||
pandas = ">=1.0.0,<3.0.0"
|
|
||||||
openpyxl = "^3.0.10"
|
openpyxl = "^3.0.10"
|
||||||
|
tomli = "^2.2.1"
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
jsonschema = "^4.0.0"
|
jsonschema = "^4.0.0"
|
||||||
|
rstr = "^3.2.2"
|
||||||
|
pytest = "^8.4.2"
|
||||||
|
pytest-xdist = "^3.8.0"
|
||||||
|
|
||||||
|
|
||||||
|
[tool.poetry.group.ci.dependencies]
|
||||||
|
defusedxml = "^0.7.1"
|
||||||
|
|
||||||
[tool.poetry.scripts]
|
[tool.poetry.scripts]
|
||||||
sherlock = 'sherlock.sherlock:main'
|
sherlock = 'sherlock_project.sherlock:main'
|
||||||
|
|||||||
+4
-1
@@ -1,4 +1,7 @@
|
|||||||
[pytest]
|
[pytest]
|
||||||
addopts = --strict-markers
|
addopts = --strict-markers -m "not validate_targets"
|
||||||
markers =
|
markers =
|
||||||
online: mark tests are requiring internet access.
|
online: mark tests are requiring internet access.
|
||||||
|
validate_targets: mark tests for sweeping manifest validation (sends many requests).
|
||||||
|
validate_targets_fp: validate_targets, false positive tests only.
|
||||||
|
validate_targets_fn: validate_targets, false negative tests only.
|
||||||
|
|||||||
@@ -1,10 +0,0 @@
|
|||||||
""" Sherlock Module
|
|
||||||
|
|
||||||
This module contains the main logic to search for usernames at social
|
|
||||||
networks.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
__shortname__ = "Sherlock"
|
|
||||||
__longname__ = "Sherlock: Find Usernames Across Social Networks"
|
|
||||||
__version__ = "0.14.4"
|
|
||||||
@@ -1,80 +0,0 @@
|
|||||||
{
|
|
||||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
||||||
"title": "Sherlock Targets",
|
|
||||||
"description": "Social media target to probe for existence of usernames",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"$schema": { "type": "string" }
|
|
||||||
},
|
|
||||||
"patternProperties": {
|
|
||||||
"^(?!\\$).*?$": {
|
|
||||||
"type": "object",
|
|
||||||
"description": "User-friendly target name",
|
|
||||||
"required": [ "url", "urlMain", "errorType", "username_claimed" ],
|
|
||||||
"properties": {
|
|
||||||
"url": { "type": "string" },
|
|
||||||
"urlMain": { "type": "string" },
|
|
||||||
"urlProbe": { "type": "string" },
|
|
||||||
"username_claimed": { "type": "string" },
|
|
||||||
"regexCheck": { "type": "string" },
|
|
||||||
"isNSFW": { "type": "boolean" },
|
|
||||||
"headers": { "type": "object" },
|
|
||||||
"request_payload": { "type": "object" },
|
|
||||||
"__comment__": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
|
|
||||||
},
|
|
||||||
"tags": {
|
|
||||||
"oneOf": [
|
|
||||||
{ "$ref": "#/$defs/tag" },
|
|
||||||
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"request_method": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": [ "GET", "POST", "HEAD", "PUT" ]
|
|
||||||
},
|
|
||||||
"errorType": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": [ "message", "response_url", "status_code" ]
|
|
||||||
},
|
|
||||||
"errorMsg": {
|
|
||||||
"oneOf": [
|
|
||||||
{ "type": "string" },
|
|
||||||
{ "type": "array", "items": { "type": "string" } }
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"errorCode": {
|
|
||||||
"oneOf": [
|
|
||||||
{ "type": "integer" },
|
|
||||||
{ "type": "array", "items": { "type": "integer" } }
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"errorUrl": { "type": "string" },
|
|
||||||
"response_url": { "type": "string" }
|
|
||||||
},
|
|
||||||
"dependencies": {
|
|
||||||
"errorMsg": {
|
|
||||||
"properties" : { "errorType": { "const": "message" } }
|
|
||||||
},
|
|
||||||
"errorUrl": {
|
|
||||||
"properties": { "errorType": { "const": "response_url" } }
|
|
||||||
},
|
|
||||||
"errorCode": {
|
|
||||||
"properties": { "errorType": { "const": "status_code" } }
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"if": { "properties": { "errorType": { "const": "message" } } },
|
|
||||||
"then": { "required": [ "errorMsg" ] },
|
|
||||||
"else": {
|
|
||||||
"if": { "properties": { "errorType": { "const": "response_url" } } },
|
|
||||||
"then": { "required": [ "errorUrl" ] }
|
|
||||||
},
|
|
||||||
"additionalProperties": false
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"$defs": {
|
|
||||||
"tag": { "type": "string", "enum": [ "adult", "gaming" ] }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
""" Sherlock Module
|
||||||
|
|
||||||
|
This module contains the main logic to search for usernames at social
|
||||||
|
networks.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from importlib.metadata import version as pkg_version, PackageNotFoundError
|
||||||
|
import pathlib
|
||||||
|
import tomli
|
||||||
|
|
||||||
|
|
||||||
|
def get_version() -> str:
|
||||||
|
"""Fetch the version number of the installed package."""
|
||||||
|
try:
|
||||||
|
return pkg_version("sherlock_project")
|
||||||
|
except PackageNotFoundError:
|
||||||
|
pyproject_path: pathlib.Path = pathlib.Path(__file__).resolve().parent.parent / "pyproject.toml"
|
||||||
|
with pyproject_path.open("rb") as f:
|
||||||
|
pyproject_data = tomli.load(f)
|
||||||
|
return pyproject_data["tool"]["poetry"]["version"]
|
||||||
|
|
||||||
|
# This variable is only used to check for ImportErrors induced by users running as script rather than as module or package
|
||||||
|
import_error_test_var = None
|
||||||
|
|
||||||
|
__shortname__ = "Sherlock"
|
||||||
|
__longname__ = "Sherlock: Find Usernames Across Social Networks"
|
||||||
|
__version__ = get_version()
|
||||||
|
|
||||||
|
forge_api_latest_release = "https://api.github.com/repos/sherlock-project/sherlock/releases/latest"
|
||||||
@@ -14,9 +14,9 @@ if __name__ == "__main__":
|
|||||||
# Check if the user is using the correct version of Python
|
# Check if the user is using the correct version of Python
|
||||||
python_version = sys.version.split()[0]
|
python_version = sys.version.split()[0]
|
||||||
|
|
||||||
if sys.version_info < (3, 8):
|
if sys.version_info < (3, 9):
|
||||||
print(f"Sherlock requires Python 3.8+\nYou are using Python {python_version}, which is not supported by Sherlock.")
|
print(f"Sherlock requires Python 3.9+\nYou are using Python {python_version}, which is not supported by Sherlock.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
from sherlock import sherlock
|
from sherlock_project import sherlock
|
||||||
sherlock.main()
|
sherlock.main()
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
This module defines the objects for notifying the caller about the
|
This module defines the objects for notifying the caller about the
|
||||||
results of queries.
|
results of queries.
|
||||||
"""
|
"""
|
||||||
from sherlock.result import QueryStatus
|
from sherlock_project.result import QueryStatus
|
||||||
from colorama import Fore, Style
|
from colorama import Fore, Style
|
||||||
import webbrowser
|
import webbrowser
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,149 @@
|
|||||||
|
{
|
||||||
|
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||||
|
"title": "Sherlock Target Manifest",
|
||||||
|
"description": "Social media targets to probe for the existence of known usernames",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"$schema": { "type": "string" }
|
||||||
|
},
|
||||||
|
"patternProperties": {
|
||||||
|
"^(?!\\$).*?$": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "Target name and associated information (key should be human readable name)",
|
||||||
|
"required": ["url", "urlMain", "errorType", "username_claimed"],
|
||||||
|
"properties": {
|
||||||
|
"url": { "type": "string" },
|
||||||
|
"urlMain": { "type": "string" },
|
||||||
|
"urlProbe": { "type": "string" },
|
||||||
|
"username_claimed": { "type": "string" },
|
||||||
|
"regexCheck": { "type": "string" },
|
||||||
|
"isNSFW": { "type": "boolean" },
|
||||||
|
"headers": { "type": "object" },
|
||||||
|
"request_payload": { "type": "object" },
|
||||||
|
"__comment__": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
|
||||||
|
},
|
||||||
|
"tags": {
|
||||||
|
"oneOf": [
|
||||||
|
{ "$ref": "#/$defs/tag" },
|
||||||
|
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"request_method": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["GET", "POST", "HEAD", "PUT"]
|
||||||
|
},
|
||||||
|
"errorType": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["message", "response_url", "status_code"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["message", "response_url", "status_code"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"errorMsg": {
|
||||||
|
"oneOf": [
|
||||||
|
{ "type": "string" },
|
||||||
|
{ "type": "array", "items": { "type": "string" } }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"errorCode": {
|
||||||
|
"oneOf": [
|
||||||
|
{ "type": "integer" },
|
||||||
|
{ "type": "array", "items": { "type": "integer" } }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"errorUrl": { "type": "string" },
|
||||||
|
"response_url": { "type": "string" }
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"errorMsg": {
|
||||||
|
"oneOf": [
|
||||||
|
{ "properties": { "errorType": { "const": "message" } } },
|
||||||
|
{
|
||||||
|
"properties": {
|
||||||
|
"errorType": {
|
||||||
|
"type": "array",
|
||||||
|
"contains": { "const": "message" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"errorUrl": {
|
||||||
|
"oneOf": [
|
||||||
|
{ "properties": { "errorType": { "const": "response_url" } } },
|
||||||
|
{
|
||||||
|
"properties": {
|
||||||
|
"errorType": {
|
||||||
|
"type": "array",
|
||||||
|
"contains": { "const": "response_url" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"errorCode": {
|
||||||
|
"oneOf": [
|
||||||
|
{ "properties": { "errorType": { "const": "status_code" } } },
|
||||||
|
{
|
||||||
|
"properties": {
|
||||||
|
"errorType": {
|
||||||
|
"type": "array",
|
||||||
|
"contains": { "const": "status_code" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"allOf": [
|
||||||
|
{
|
||||||
|
"if": {
|
||||||
|
"anyOf": [
|
||||||
|
{ "properties": { "errorType": { "const": "message" } } },
|
||||||
|
{
|
||||||
|
"properties": {
|
||||||
|
"errorType": {
|
||||||
|
"type": "array",
|
||||||
|
"contains": { "const": "message" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"then": { "required": ["errorMsg"] }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"if": {
|
||||||
|
"anyOf": [
|
||||||
|
{ "properties": { "errorType": { "const": "response_url" } } },
|
||||||
|
{
|
||||||
|
"properties": {
|
||||||
|
"errorType": {
|
||||||
|
"type": "array",
|
||||||
|
"contains": { "const": "response_url" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"then": { "required": ["errorUrl"] }
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"additionalProperties": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"$defs": {
|
||||||
|
"tag": { "type": "string", "enum": ["adult", "gaming"] }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -7,36 +7,42 @@ This module contains the main logic to search for usernames at social
|
|||||||
networks.
|
networks.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
try:
|
||||||
|
from sherlock_project.__init__ import import_error_test_var # noqa: F401
|
||||||
|
except ImportError:
|
||||||
|
print("Did you run Sherlock with `python3 sherlock/sherlock.py ...`?")
|
||||||
|
print("This is an outdated method. Please see https://sherlockproject.xyz/installation for up to date instructions.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
import csv
|
import csv
|
||||||
import signal
|
import signal
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
|
||||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||||
|
from json import loads as json_loads
|
||||||
from time import monotonic
|
from time import monotonic
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
from requests_futures.sessions import FuturesSession
|
||||||
|
|
||||||
# Removing __version__ here will trigger update message for users
|
from sherlock_project.__init__ import (
|
||||||
# Do not remove until ready to trigger that message
|
|
||||||
# When removed, also remove all the noqa: E402 comments for linting
|
|
||||||
__version__ = "0.14.4"
|
|
||||||
del __version__
|
|
||||||
|
|
||||||
from .__init__ import ( # noqa: E402
|
|
||||||
__longname__,
|
__longname__,
|
||||||
__version__
|
__shortname__,
|
||||||
|
__version__,
|
||||||
|
forge_api_latest_release,
|
||||||
)
|
)
|
||||||
|
|
||||||
from requests_futures.sessions import FuturesSession # noqa: E402
|
from sherlock_project.result import QueryStatus
|
||||||
from sherlock.result import QueryStatus # noqa: E402
|
from sherlock_project.result import QueryResult
|
||||||
from sherlock.result import QueryResult # noqa: E402
|
from sherlock_project.notify import QueryNotify
|
||||||
from sherlock.notify import QueryNotify # noqa: E402
|
from sherlock_project.notify import QueryNotifyPrint
|
||||||
from sherlock.notify import QueryNotifyPrint # noqa: E402
|
from sherlock_project.sites import SitesInformation
|
||||||
from sherlock.sites import SitesInformation # noqa: E402
|
from colorama import init
|
||||||
from colorama import init # noqa: E402
|
from argparse import ArgumentTypeError
|
||||||
from argparse import ArgumentTypeError # noqa: E402
|
|
||||||
|
|
||||||
|
|
||||||
class SherlockFuturesSession(FuturesSession):
|
class SherlockFuturesSession(FuturesSession):
|
||||||
@@ -162,12 +168,13 @@ def multiple_usernames(username):
|
|||||||
|
|
||||||
|
|
||||||
def sherlock(
|
def sherlock(
|
||||||
username,
|
username: str,
|
||||||
site_data,
|
site_data: dict[str, dict[str, str]],
|
||||||
query_notify: QueryNotify,
|
query_notify: QueryNotify,
|
||||||
proxy=None,
|
dump_response: bool = False,
|
||||||
timeout=60,
|
proxy: Optional[str] = None,
|
||||||
):
|
timeout: int = 60,
|
||||||
|
) -> dict[str, dict[str, str | QueryResult]]:
|
||||||
"""Run Sherlock Analysis.
|
"""Run Sherlock Analysis.
|
||||||
|
|
||||||
Checks for existence of username on various social media sites.
|
Checks for existence of username on various social media sites.
|
||||||
@@ -228,7 +235,7 @@ def sherlock(
|
|||||||
# A user agent is needed because some sites don't return the correct
|
# A user agent is needed because some sites don't return the correct
|
||||||
# information since they think that we are bots (Which we actually are...)
|
# information since they think that we are bots (Which we actually are...)
|
||||||
headers = {
|
headers = {
|
||||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/116.0",
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:129.0) Gecko/20100101 Firefox/129.0",
|
||||||
}
|
}
|
||||||
|
|
||||||
if "headers" in net_info:
|
if "headers" in net_info:
|
||||||
@@ -342,6 +349,8 @@ def sherlock(
|
|||||||
|
|
||||||
# Get the expected error type
|
# Get the expected error type
|
||||||
error_type = net_info["errorType"]
|
error_type = net_info["errorType"]
|
||||||
|
if isinstance(error_type, str):
|
||||||
|
error_type: list[str] = [error_type]
|
||||||
|
|
||||||
# Retrieve future and ensure it has finished
|
# Retrieve future and ensure it has finished
|
||||||
future = net_info["request_future"]
|
future = net_info["request_future"]
|
||||||
@@ -374,8 +383,10 @@ def sherlock(
|
|||||||
# be highly targetted. Comment at the end of each fingerprint to
|
# be highly targetted. Comment at the end of each fingerprint to
|
||||||
# indicate target and date fingerprinted.
|
# indicate target and date fingerprinted.
|
||||||
WAFHitMsgs = [
|
WAFHitMsgs = [
|
||||||
'.loading-spinner{visibility:hidden}body.no-js .challenge-running{display:none}body.dark{background-color:#222;color:#d9d9d9}body.dark a{color:#fff}body.dark a:hover{color:#ee730a;text-decoration:underline}body.dark .lds-ring div{border-color:#999 transparent transparent}body.dark .font-red{color:#b20f03}body.dark', # 2024-05-13 Cloudflare
|
r'.loading-spinner{visibility:hidden}body.no-js .challenge-running{display:none}body.dark{background-color:#222;color:#d9d9d9}body.dark a{color:#fff}body.dark a:hover{color:#ee730a;text-decoration:underline}body.dark .lds-ring div{border-color:#999 transparent transparent}body.dark .font-red{color:#b20f03}body.dark', # 2024-05-13 Cloudflare
|
||||||
'{return l.onPageView}}),Object.defineProperty(r,"perimeterxIdentifiers",{enumerable:' # 2024-04-09 PerimeterX / Human Security
|
r'<span id="challenge-error-text">', # 2024-11-11 Cloudflare error page
|
||||||
|
r'AwsWafIntegration.forceRefreshToken', # 2024-11-11 Cloudfront (AWS)
|
||||||
|
r'{return l.onPageView}}),Object.defineProperty(r,"perimeterxIdentifiers",{enumerable:' # 2024-04-09 PerimeterX / Human Security
|
||||||
]
|
]
|
||||||
|
|
||||||
if error_text is not None:
|
if error_text is not None:
|
||||||
@@ -384,61 +395,91 @@ def sherlock(
|
|||||||
elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
|
elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
|
||||||
query_status = QueryStatus.WAF
|
query_status = QueryStatus.WAF
|
||||||
|
|
||||||
elif error_type == "message":
|
|
||||||
# error_flag True denotes no error found in the HTML
|
|
||||||
# error_flag False denotes error found in the HTML
|
|
||||||
error_flag = True
|
|
||||||
errors = net_info.get("errorMsg")
|
|
||||||
# errors will hold the error message
|
|
||||||
# it can be string or list
|
|
||||||
# by isinstance method we can detect that
|
|
||||||
# and handle the case for strings as normal procedure
|
|
||||||
# and if its list we can iterate the errors
|
|
||||||
if isinstance(errors, str):
|
|
||||||
# Checks if the error message is in the HTML
|
|
||||||
# if error is present we will set flag to False
|
|
||||||
if errors in r.text:
|
|
||||||
error_flag = False
|
|
||||||
else:
|
|
||||||
# If it's list, it will iterate all the error message
|
|
||||||
for error in errors:
|
|
||||||
if error in r.text:
|
|
||||||
error_flag = False
|
|
||||||
break
|
|
||||||
if error_flag:
|
|
||||||
query_status = QueryStatus.CLAIMED
|
|
||||||
else:
|
|
||||||
query_status = QueryStatus.AVAILABLE
|
|
||||||
elif error_type == "status_code":
|
|
||||||
error_codes = net_info.get("errorCode")
|
|
||||||
query_status = QueryStatus.CLAIMED
|
|
||||||
|
|
||||||
# Type consistency, allowing for both singlets and lists in manifest
|
|
||||||
if isinstance(error_codes, int):
|
|
||||||
error_codes = [error_codes]
|
|
||||||
|
|
||||||
if error_codes is not None and r.status_code in error_codes:
|
|
||||||
query_status = QueryStatus.AVAILABLE
|
|
||||||
elif r.status_code >= 300 or r.status_code < 200:
|
|
||||||
query_status = QueryStatus.AVAILABLE
|
|
||||||
elif error_type == "response_url":
|
|
||||||
# For this detection method, we have turned off the redirect.
|
|
||||||
# So, there is no need to check the response URL: it will always
|
|
||||||
# match the request. Instead, we will ensure that the response
|
|
||||||
# code indicates that the request was successful (i.e. no 404, or
|
|
||||||
# forward to some odd redirect).
|
|
||||||
if 200 <= r.status_code < 300:
|
|
||||||
query_status = QueryStatus.CLAIMED
|
|
||||||
else:
|
|
||||||
query_status = QueryStatus.AVAILABLE
|
|
||||||
else:
|
else:
|
||||||
# It should be impossible to ever get here...
|
if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
|
||||||
raise ValueError(
|
error_context = f"Unknown error type '{error_type}' for {social_network}"
|
||||||
f"Unknown Error Type '{error_type}' for " f"site '{social_network}'"
|
query_status = QueryStatus.UNKNOWN
|
||||||
)
|
else:
|
||||||
|
if "message" in error_type:
|
||||||
|
# error_flag True denotes no error found in the HTML
|
||||||
|
# error_flag False denotes error found in the HTML
|
||||||
|
error_flag = True
|
||||||
|
errors = net_info.get("errorMsg")
|
||||||
|
# errors will hold the error message
|
||||||
|
# it can be string or list
|
||||||
|
# by isinstance method we can detect that
|
||||||
|
# and handle the case for strings as normal procedure
|
||||||
|
# and if its list we can iterate the errors
|
||||||
|
if isinstance(errors, str):
|
||||||
|
# Checks if the error message is in the HTML
|
||||||
|
# if error is present we will set flag to False
|
||||||
|
if errors in r.text:
|
||||||
|
error_flag = False
|
||||||
|
else:
|
||||||
|
# If it's list, it will iterate all the error message
|
||||||
|
for error in errors:
|
||||||
|
if error in r.text:
|
||||||
|
error_flag = False
|
||||||
|
break
|
||||||
|
if error_flag:
|
||||||
|
query_status = QueryStatus.CLAIMED
|
||||||
|
else:
|
||||||
|
query_status = QueryStatus.AVAILABLE
|
||||||
|
|
||||||
|
if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
|
||||||
|
error_codes = net_info.get("errorCode")
|
||||||
|
query_status = QueryStatus.CLAIMED
|
||||||
|
|
||||||
|
# Type consistency, allowing for both singlets and lists in manifest
|
||||||
|
if isinstance(error_codes, int):
|
||||||
|
error_codes = [error_codes]
|
||||||
|
|
||||||
|
if error_codes is not None and r.status_code in error_codes:
|
||||||
|
query_status = QueryStatus.AVAILABLE
|
||||||
|
elif r.status_code >= 300 or r.status_code < 200:
|
||||||
|
query_status = QueryStatus.AVAILABLE
|
||||||
|
|
||||||
|
if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
|
||||||
|
# For this detection method, we have turned off the redirect.
|
||||||
|
# So, there is no need to check the response URL: it will always
|
||||||
|
# match the request. Instead, we will ensure that the response
|
||||||
|
# code indicates that the request was successful (i.e. no 404, or
|
||||||
|
# forward to some odd redirect).
|
||||||
|
if 200 <= r.status_code < 300:
|
||||||
|
query_status = QueryStatus.CLAIMED
|
||||||
|
else:
|
||||||
|
query_status = QueryStatus.AVAILABLE
|
||||||
|
|
||||||
|
if dump_response:
|
||||||
|
print("+++++++++++++++++++++")
|
||||||
|
print(f"TARGET NAME : {social_network}")
|
||||||
|
print(f"USERNAME : {username}")
|
||||||
|
print(f"TARGET URL : {url}")
|
||||||
|
print(f"TEST METHOD : {error_type}")
|
||||||
|
try:
|
||||||
|
print(f"STATUS CODES : {net_info['errorCode']}")
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
print("Results...")
|
||||||
|
try:
|
||||||
|
print(f"RESPONSE CODE : {r.status_code}")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
print(f"ERROR TEXT : {net_info['errorMsg']}")
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
print(">>>>> BEGIN RESPONSE TEXT")
|
||||||
|
try:
|
||||||
|
print(r.text)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
print("<<<<< END RESPONSE TEXT")
|
||||||
|
print("VERDICT : " + str(query_status))
|
||||||
|
print("+++++++++++++++++++++")
|
||||||
|
|
||||||
# Notify caller about results of query.
|
# Notify caller about results of query.
|
||||||
result = QueryResult(
|
result: QueryResult = QueryResult(
|
||||||
username=username,
|
username=username,
|
||||||
site_name=social_network,
|
site_name=social_network,
|
||||||
site_url_user=url,
|
site_url_user=url,
|
||||||
@@ -502,7 +543,7 @@ def main():
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--version",
|
"--version",
|
||||||
action="version",
|
action="version",
|
||||||
version=f"Sherlock v{__version__}",
|
version=f"{__shortname__} v{__version__}",
|
||||||
help="Display version information and dependencies.",
|
help="Display version information and dependencies.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@@ -558,13 +599,20 @@ def main():
|
|||||||
default=None,
|
default=None,
|
||||||
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
|
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dump-response",
|
||||||
|
action="store_true",
|
||||||
|
dest="dump_response",
|
||||||
|
default=False,
|
||||||
|
help="Dump the HTTP response to stdout for targeted debugging.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--json",
|
"--json",
|
||||||
"-j",
|
"-j",
|
||||||
metavar="JSON_FILE",
|
metavar="JSON_FILE",
|
||||||
dest="json_file",
|
dest="json_file",
|
||||||
default=None,
|
default=None,
|
||||||
help="Load data from a JSON file or an online, valid, JSON file.",
|
help="Load data from a JSON file or an online, valid, JSON file. Upstream PR numbers also accepted.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--timeout",
|
"--timeout",
|
||||||
@@ -627,6 +675,32 @@ def main():
|
|||||||
help="Include checking of NSFW sites from default list.",
|
help="Include checking of NSFW sites from default list.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# TODO deprecated in favor of --txt, retained for workflow compatibility, to be removed
|
||||||
|
# in future release
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-txt",
|
||||||
|
action="store_true",
|
||||||
|
dest="no_txt",
|
||||||
|
default=False,
|
||||||
|
help="Disable creation of a txt file - WILL BE DEPRECATED",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--txt",
|
||||||
|
action="store_true",
|
||||||
|
dest="output_txt",
|
||||||
|
default=False,
|
||||||
|
help="Enable creation of a txt file",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--ignore-exclusions",
|
||||||
|
action="store_true",
|
||||||
|
dest="ignore_exclusions",
|
||||||
|
default=False,
|
||||||
|
help="Ignore upstream exclusions (may return more false positives)",
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# If the user presses CTRL-C, exit gracefully without throwing errors
|
# If the user presses CTRL-C, exit gracefully without throwing errors
|
||||||
@@ -634,17 +708,14 @@ def main():
|
|||||||
|
|
||||||
# Check for newer version of Sherlock. If it exists, let the user know about it
|
# Check for newer version of Sherlock. If it exists, let the user know about it
|
||||||
try:
|
try:
|
||||||
r = requests.get(
|
latest_release_raw = requests.get(forge_api_latest_release, timeout=10).text
|
||||||
"https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/__init__.py"
|
latest_release_json = json_loads(latest_release_raw)
|
||||||
)
|
latest_remote_tag = latest_release_json["tag_name"]
|
||||||
|
|
||||||
remote_version = str(re.findall('__version__ *= *"(.*)"', r.text)[0])
|
if latest_remote_tag[1:] != __version__:
|
||||||
local_version = __version__
|
|
||||||
|
|
||||||
if remote_version != local_version:
|
|
||||||
print(
|
print(
|
||||||
"Update Available!\n"
|
f"Update available! {__version__} --> {latest_remote_tag[1:]}"
|
||||||
+ f"You are running version {local_version}. Version {remote_version} is available at https://github.com/sherlock-project/sherlock"
|
f"\n{latest_release_json['html_url']}"
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
@@ -675,10 +746,32 @@ def main():
|
|||||||
try:
|
try:
|
||||||
if args.local:
|
if args.local:
|
||||||
sites = SitesInformation(
|
sites = SitesInformation(
|
||||||
os.path.join(os.path.dirname(__file__), "resources/data.json")
|
os.path.join(os.path.dirname(__file__), "resources/data.json"),
|
||||||
|
honor_exclusions=False,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
sites = SitesInformation(args.json_file)
|
json_file_location = args.json_file
|
||||||
|
if args.json_file:
|
||||||
|
# If --json parameter is a number, interpret it as a pull request number
|
||||||
|
if args.json_file.isnumeric():
|
||||||
|
pull_number = args.json_file
|
||||||
|
pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}"
|
||||||
|
pull_request_raw = requests.get(pull_url, timeout=10).text
|
||||||
|
pull_request_json = json_loads(pull_request_raw)
|
||||||
|
|
||||||
|
# Check if it's a valid pull request
|
||||||
|
if "message" in pull_request_json:
|
||||||
|
print(f"ERROR: Pull request #{pull_number} not found.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
head_commit_sha = pull_request_json["head"]["sha"]
|
||||||
|
json_file_location = f"https://raw.githubusercontent.com/sherlock-project/sherlock/{head_commit_sha}/sherlock_project/resources/data.json"
|
||||||
|
|
||||||
|
sites = SitesInformation(
|
||||||
|
data_file_path=json_file_location,
|
||||||
|
honor_exclusions=not args.ignore_exclusions,
|
||||||
|
do_not_exclude=args.site_list,
|
||||||
|
)
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
print(f"ERROR: {error}")
|
print(f"ERROR: {error}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
@@ -732,6 +825,7 @@ def main():
|
|||||||
username,
|
username,
|
||||||
site_data,
|
site_data,
|
||||||
query_notify,
|
query_notify,
|
||||||
|
dump_response=args.dump_response,
|
||||||
proxy=args.proxy,
|
proxy=args.proxy,
|
||||||
timeout=args.timeout,
|
timeout=args.timeout,
|
||||||
)
|
)
|
||||||
@@ -746,14 +840,15 @@ def main():
|
|||||||
else:
|
else:
|
||||||
result_file = f"{username}.txt"
|
result_file = f"{username}.txt"
|
||||||
|
|
||||||
with open(result_file, "w", encoding="utf-8") as file:
|
if args.output_txt:
|
||||||
exists_counter = 0
|
with open(result_file, "w", encoding="utf-8") as file:
|
||||||
for website_name in results:
|
exists_counter = 0
|
||||||
dictionary = results[website_name]
|
for website_name in results:
|
||||||
if dictionary.get("status").status == QueryStatus.CLAIMED:
|
dictionary = results[website_name]
|
||||||
exists_counter += 1
|
if dictionary.get("status").status == QueryStatus.CLAIMED:
|
||||||
file.write(dictionary["url_user"] + "\n")
|
exists_counter += 1
|
||||||
file.write(f"Total Websites Username Detected On : {exists_counter}\n")
|
file.write(dictionary["url_user"] + "\n")
|
||||||
|
file.write(f"Total Websites Username Detected On : {exists_counter}\n")
|
||||||
|
|
||||||
if args.csv:
|
if args.csv:
|
||||||
result_file = f"{username}.csv"
|
result_file = f"{username}.csv"
|
||||||
@@ -830,8 +925,8 @@ def main():
|
|||||||
{
|
{
|
||||||
"username": usernames,
|
"username": usernames,
|
||||||
"name": names,
|
"name": names,
|
||||||
"url_main": url_main,
|
"url_main": [f'=HYPERLINK(\"{u}\")' for u in url_main],
|
||||||
"url_user": url_user,
|
"url_user": [f'=HYPERLINK(\"{u}\")' for u in url_user],
|
||||||
"exists": exists,
|
"exists": exists,
|
||||||
"http_status": http_status,
|
"http_status": http_status,
|
||||||
"response_time_s": response_time_s,
|
"response_time_s": response_time_s,
|
||||||
@@ -7,6 +7,10 @@ import json
|
|||||||
import requests
|
import requests
|
||||||
import secrets
|
import secrets
|
||||||
|
|
||||||
|
|
||||||
|
MANIFEST_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.json"
|
||||||
|
EXCLUSIONS_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/refs/heads/exclusions/false_positive_exclusions.txt"
|
||||||
|
|
||||||
class SiteInformation:
|
class SiteInformation:
|
||||||
def __init__(self, name, url_home, url_username_format, username_claimed,
|
def __init__(self, name, url_home, url_username_format, username_claimed,
|
||||||
information, is_nsfw, username_unclaimed=secrets.token_urlsafe(10)):
|
information, is_nsfw, username_unclaimed=secrets.token_urlsafe(10)):
|
||||||
@@ -67,12 +71,17 @@ class SiteInformation:
|
|||||||
Return Value:
|
Return Value:
|
||||||
Nicely formatted string to get information about this object.
|
Nicely formatted string to get information about this object.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return f"{self.name} ({self.url_home})"
|
return f"{self.name} ({self.url_home})"
|
||||||
|
|
||||||
|
|
||||||
class SitesInformation:
|
class SitesInformation:
|
||||||
def __init__(self, data_file_path=None):
|
def __init__(
|
||||||
|
self,
|
||||||
|
data_file_path: str|None = None,
|
||||||
|
honor_exclusions: bool = True,
|
||||||
|
do_not_exclude: list[str] = [],
|
||||||
|
):
|
||||||
"""Create Sites Information Object.
|
"""Create Sites Information Object.
|
||||||
|
|
||||||
Contains information about all supported websites.
|
Contains information about all supported websites.
|
||||||
@@ -110,7 +119,7 @@ class SitesInformation:
|
|||||||
# The default data file is the live data.json which is in the GitHub repo. The reason why we are using
|
# The default data file is the live data.json which is in the GitHub repo. The reason why we are using
|
||||||
# this instead of the local one is so that the user has the most up-to-date data. This prevents
|
# this instead of the local one is so that the user has the most up-to-date data. This prevents
|
||||||
# users from creating issue about false positives which has already been fixed or having outdated data
|
# users from creating issue about false positives which has already been fixed or having outdated data
|
||||||
data_file_path = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/resources/data.json"
|
data_file_path = MANIFEST_URL
|
||||||
|
|
||||||
# Ensure that specified data file has correct extension.
|
# Ensure that specified data file has correct extension.
|
||||||
if not data_file_path.lower().endswith(".json"):
|
if not data_file_path.lower().endswith(".json"):
|
||||||
@@ -120,7 +129,7 @@ class SitesInformation:
|
|||||||
if data_file_path.lower().startswith("http"):
|
if data_file_path.lower().startswith("http"):
|
||||||
# Reference is to a URL.
|
# Reference is to a URL.
|
||||||
try:
|
try:
|
||||||
response = requests.get(url=data_file_path)
|
response = requests.get(url=data_file_path, timeout=30)
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
raise FileNotFoundError(
|
raise FileNotFoundError(
|
||||||
f"Problem while attempting to access data file URL '{data_file_path}': {error}"
|
f"Problem while attempting to access data file URL '{data_file_path}': {error}"
|
||||||
@@ -152,9 +161,31 @@ class SitesInformation:
|
|||||||
raise FileNotFoundError(f"Problem while attempting to access "
|
raise FileNotFoundError(f"Problem while attempting to access "
|
||||||
f"data file '{data_file_path}'."
|
f"data file '{data_file_path}'."
|
||||||
)
|
)
|
||||||
|
|
||||||
site_data.pop('$schema', None)
|
site_data.pop('$schema', None)
|
||||||
|
|
||||||
|
if honor_exclusions:
|
||||||
|
try:
|
||||||
|
response = requests.get(url=EXCLUSIONS_URL, timeout=10)
|
||||||
|
if response.status_code == 200:
|
||||||
|
exclusions = response.text.splitlines()
|
||||||
|
exclusions = [exclusion.strip() for exclusion in exclusions]
|
||||||
|
|
||||||
|
for site in do_not_exclude:
|
||||||
|
if site in exclusions:
|
||||||
|
exclusions.remove(site)
|
||||||
|
|
||||||
|
for exclusion in exclusions:
|
||||||
|
try:
|
||||||
|
site_data.pop(exclusion, None)
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
# If there was any problem loading the exclusions, just continue without them
|
||||||
|
print("Warning: Could not load exclusions, continuing without them.")
|
||||||
|
honor_exclusions = False
|
||||||
|
|
||||||
self.sites = {}
|
self.sites = {}
|
||||||
|
|
||||||
# Add all site information from the json file to internal site list.
|
# Add all site information from the json file to internal site list.
|
||||||
@@ -194,7 +225,7 @@ class SitesInformation:
|
|||||||
for site in self.sites:
|
for site in self.sites:
|
||||||
if self.sites[site].is_nsfw and site.casefold() not in do_not_remove:
|
if self.sites[site].is_nsfw and site.casefold() not in do_not_remove:
|
||||||
continue
|
continue
|
||||||
sites[site] = self.sites[site]
|
sites[site] = self.sites[site]
|
||||||
self.sites = sites
|
self.sites = sites
|
||||||
|
|
||||||
def site_name_list(self):
|
def site_name_list(self):
|
||||||
+34
-6
@@ -2,22 +2,50 @@ import os
|
|||||||
import json
|
import json
|
||||||
import urllib
|
import urllib
|
||||||
import pytest
|
import pytest
|
||||||
from sherlock.sites import SitesInformation
|
from sherlock_project.sites import SitesInformation
|
||||||
|
|
||||||
|
def fetch_local_manifest(honor_exclusions: bool = True) -> dict[str, dict[str, str]]:
|
||||||
|
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"), honor_exclusions=honor_exclusions)
|
||||||
|
sites_iterable: dict[str, dict[str, str]] = {site.name: site.information for site in sites_obj}
|
||||||
|
return sites_iterable
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
def sites_obj():
|
def sites_obj():
|
||||||
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json"))
|
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
|
||||||
yield sites_obj
|
yield sites_obj
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def sites_info():
|
def sites_info():
|
||||||
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json"))
|
yield fetch_local_manifest()
|
||||||
sites_iterable = {site.name: site.information for site in sites_obj}
|
|
||||||
yield sites_iterable
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def remote_schema():
|
def remote_schema():
|
||||||
schema_url: str = 'https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/resources/data.schema.json'
|
schema_url: str = 'https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.schema.json'
|
||||||
with urllib.request.urlopen(schema_url) as remoteschema:
|
with urllib.request.urlopen(schema_url) as remoteschema:
|
||||||
schemadat = json.load(remoteschema)
|
schemadat = json.load(remoteschema)
|
||||||
yield schemadat
|
yield schemadat
|
||||||
|
|
||||||
|
def pytest_addoption(parser):
|
||||||
|
parser.addoption(
|
||||||
|
"--chunked-sites",
|
||||||
|
action="store",
|
||||||
|
default=None,
|
||||||
|
help="For tests utilizing chunked sites, include only the (comma-separated) site(s) specified.",
|
||||||
|
)
|
||||||
|
|
||||||
|
def pytest_generate_tests(metafunc):
|
||||||
|
if "chunked_sites" in metafunc.fixturenames:
|
||||||
|
sites_info = fetch_local_manifest(honor_exclusions=False)
|
||||||
|
|
||||||
|
# Ingest and apply site selections
|
||||||
|
site_filter: str | None = metafunc.config.getoption("--chunked-sites")
|
||||||
|
if site_filter:
|
||||||
|
selected_sites: list[str] = [site.strip() for site in site_filter.split(",")]
|
||||||
|
sites_info = {
|
||||||
|
site: data for site, data in sites_info.items()
|
||||||
|
if site in selected_sites
|
||||||
|
}
|
||||||
|
|
||||||
|
params = [{name: data} for name, data in sites_info.items()]
|
||||||
|
ids = list(sites_info.keys())
|
||||||
|
metafunc.parametrize("chunked_sites", params, ids=ids)
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import sherlock
|
import sherlock_project
|
||||||
|
|
||||||
#from sherlock.sites import SitesInformation
|
#from sherlock.sites import SitesInformation
|
||||||
#local_manifest = data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json")
|
#local_manifest = data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json")
|
||||||
|
|
||||||
def test_username_via_message():
|
def test_username_via_message():
|
||||||
sherlock.__main__("--version")
|
sherlock_project.__main__("--version")
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ class Interactives:
|
|||||||
def run_cli(args:str = "") -> str:
|
def run_cli(args:str = "") -> str:
|
||||||
"""Pass arguments to Sherlock as a normal user on the command line"""
|
"""Pass arguments to Sherlock as a normal user on the command line"""
|
||||||
# Adapt for platform differences (Windows likes to be special)
|
# Adapt for platform differences (Windows likes to be special)
|
||||||
if platform.system == "Windows":
|
if platform.system() == "Windows":
|
||||||
command:str = f"py -m sherlock {args}"
|
command:str = f"py -m sherlock_project {args}"
|
||||||
else:
|
else:
|
||||||
command:str = f"sherlock {args}"
|
command:str = f"sherlock {args}"
|
||||||
|
|
||||||
@@ -20,12 +20,11 @@ class Interactives:
|
|||||||
raise InteractivesSubprocessError(e.output.decode())
|
raise InteractivesSubprocessError(e.output.decode())
|
||||||
|
|
||||||
|
|
||||||
# -> list[str] is prefered, but will require deprecation of support for Python 3.8
|
def walk_sherlock_for_files_with(pattern: str) -> list[str]:
|
||||||
def walk_sherlock_for_files_with(pattern: str) -> list:
|
|
||||||
"""Check all files within the Sherlock package for matching patterns"""
|
"""Check all files within the Sherlock package for matching patterns"""
|
||||||
pattern:re.Pattern = re.compile(pattern)
|
pattern:re.Pattern = re.compile(pattern)
|
||||||
matching_files:list[str] = []
|
matching_files:list[str] = []
|
||||||
for root, dirs, files in os.walk("sherlock"):
|
for root, dirs, files in os.walk("sherlock_project"):
|
||||||
for file in files:
|
for file in files:
|
||||||
file_path = os.path.join(root,file)
|
file_path = os.path.join(root,file)
|
||||||
if "__pycache__" in file_path:
|
if "__pycache__" in file_path:
|
||||||
|
|||||||
@@ -5,9 +5,9 @@ from jsonschema import validate
|
|||||||
|
|
||||||
def test_validate_manifest_against_local_schema():
|
def test_validate_manifest_against_local_schema():
|
||||||
"""Ensures that the manifest matches the local schema, for situations where the schema is being changed."""
|
"""Ensures that the manifest matches the local schema, for situations where the schema is being changed."""
|
||||||
json_relative: str = '../sherlock/resources/data.json'
|
json_relative: str = '../sherlock_project/resources/data.json'
|
||||||
schema_relative: str = '../sherlock/resources/data.schema.json'
|
schema_relative: str = '../sherlock_project/resources/data.schema.json'
|
||||||
|
|
||||||
json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
|
json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
|
||||||
schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative)
|
schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative)
|
||||||
|
|
||||||
@@ -22,7 +22,7 @@ def test_validate_manifest_against_local_schema():
|
|||||||
@pytest.mark.online
|
@pytest.mark.online
|
||||||
def test_validate_manifest_against_remote_schema(remote_schema):
|
def test_validate_manifest_against_remote_schema(remote_schema):
|
||||||
"""Ensures that the manifest matches the remote schema, so as to not unexpectedly break clients."""
|
"""Ensures that the manifest matches the remote schema, so as to not unexpectedly break clients."""
|
||||||
json_relative: str = '../sherlock/resources/data.json'
|
json_relative: str = '../sherlock_project/resources/data.json'
|
||||||
json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
|
json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
|
||||||
|
|
||||||
with open(json_path, 'r') as f:
|
with open(json_path, 'r') as f:
|
||||||
|
|||||||
@@ -2,9 +2,9 @@ import pytest
|
|||||||
import random
|
import random
|
||||||
import string
|
import string
|
||||||
import re
|
import re
|
||||||
from sherlock.sherlock import sherlock
|
from sherlock_project.sherlock import sherlock
|
||||||
from sherlock.notify import QueryNotify
|
from sherlock_project.notify import QueryNotify
|
||||||
from sherlock.result import QueryStatus
|
from sherlock_project.result import QueryStatus
|
||||||
#from sherlock_interactives import Interactives
|
#from sherlock_interactives import Interactives
|
||||||
|
|
||||||
|
|
||||||
@@ -44,7 +44,7 @@ class TestLiveTargets:
|
|||||||
|
|
||||||
# Known positives should only use sites trusted to be reliable and unchanging
|
# Known positives should only use sites trusted to be reliable and unchanging
|
||||||
@pytest.mark.parametrize('site,username',[
|
@pytest.mark.parametrize('site,username',[
|
||||||
('BodyBuilding', 'blue'),
|
('Keybase', 'blue'),
|
||||||
('devRant', 'blue'),
|
('devRant', 'blue'),
|
||||||
])
|
])
|
||||||
def test_known_positives_via_response_url(self, sites_info, site, username):
|
def test_known_positives_via_response_url(self, sites_info, site, username):
|
||||||
|
|||||||
+1
-1
@@ -1,5 +1,5 @@
|
|||||||
import pytest
|
import pytest
|
||||||
from sherlock import sherlock
|
from sherlock_project import sherlock
|
||||||
from sherlock_interactives import Interactives
|
from sherlock_interactives import Interactives
|
||||||
from sherlock_interactives import InteractivesSubprocessError
|
from sherlock_interactives import InteractivesSubprocessError
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,100 @@
|
|||||||
|
import pytest
|
||||||
|
import re
|
||||||
|
import rstr
|
||||||
|
|
||||||
|
from sherlock_project.sherlock import sherlock
|
||||||
|
from sherlock_project.notify import QueryNotify
|
||||||
|
from sherlock_project.result import QueryResult, QueryStatus
|
||||||
|
|
||||||
|
|
||||||
|
FALSE_POSITIVE_ATTEMPTS: int = 2 # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit
|
||||||
|
FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND: int = 15 # If a pattern uses quantifiers such as `+` `*` or `{n,}`, limit the upper bound (0 to disable)
|
||||||
|
FALSE_POSITIVE_DEFAULT_PATTERN: str = r'^[a-zA-Z0-9]{7,20}$' # Used in absence of a regexCheck entry
|
||||||
|
|
||||||
|
|
||||||
|
def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND) -> str:
|
||||||
|
"""Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`."""
|
||||||
|
def replace_upper_bound(match: re.Match) -> str: # type: ignore
|
||||||
|
lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore
|
||||||
|
nonlocal upper_bound
|
||||||
|
upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore # noqa: F823
|
||||||
|
return f'{{{lower_bound},{upper_bound}}}'
|
||||||
|
|
||||||
|
pattern = re.sub(r'(?<!\\)\{(\d+),\}', replace_upper_bound, pattern) # {n,} # type: ignore
|
||||||
|
pattern = re.sub(r'(?<!\\)\+', f'{{1,{upper_bound}}}', pattern) # +
|
||||||
|
pattern = re.sub(r'(?<!\\)\*', f'{{0,{upper_bound}}}', pattern) # *
|
||||||
|
|
||||||
|
return pattern
|
||||||
|
|
||||||
|
def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, pattern: str) -> QueryStatus:
|
||||||
|
"""Check if a site is likely to produce false positives."""
|
||||||
|
status: QueryStatus = QueryStatus.UNKNOWN
|
||||||
|
|
||||||
|
for _ in range(FALSE_POSITIVE_ATTEMPTS):
|
||||||
|
query_notify: QueryNotify = QueryNotify()
|
||||||
|
username: str = rstr.xeger(pattern)
|
||||||
|
|
||||||
|
result: QueryResult | str = sherlock(
|
||||||
|
username=username,
|
||||||
|
site_data=sites_info,
|
||||||
|
query_notify=query_notify,
|
||||||
|
)[site]['status']
|
||||||
|
|
||||||
|
if not hasattr(result, 'status'):
|
||||||
|
raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
|
||||||
|
if type(result.status) is not QueryStatus: # type: ignore
|
||||||
|
raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
|
||||||
|
status = result.status # type: ignore
|
||||||
|
|
||||||
|
if status in (QueryStatus.AVAILABLE, QueryStatus.WAF):
|
||||||
|
return status
|
||||||
|
|
||||||
|
return status
|
||||||
|
|
||||||
|
|
||||||
|
def false_negative_check(sites_info: dict[str, dict[str, str]], site: str) -> QueryStatus:
|
||||||
|
"""Check if a site is likely to produce false negatives."""
|
||||||
|
status: QueryStatus = QueryStatus.UNKNOWN
|
||||||
|
query_notify: QueryNotify = QueryNotify()
|
||||||
|
|
||||||
|
result: QueryResult | str = sherlock(
|
||||||
|
username=sites_info[site]['username_claimed'],
|
||||||
|
site_data=sites_info,
|
||||||
|
query_notify=query_notify,
|
||||||
|
)[site]['status']
|
||||||
|
|
||||||
|
if not hasattr(result, 'status'):
|
||||||
|
raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
|
||||||
|
if type(result.status) is not QueryStatus: # type: ignore
|
||||||
|
raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
|
||||||
|
status = result.status # type: ignore
|
||||||
|
|
||||||
|
return status
|
||||||
|
|
||||||
|
@pytest.mark.validate_targets
|
||||||
|
@pytest.mark.online
|
||||||
|
class Test_All_Targets:
|
||||||
|
|
||||||
|
@pytest.mark.validate_targets_fp
|
||||||
|
def test_false_pos(self, chunked_sites: dict[str, dict[str, str]]):
|
||||||
|
"""Iterate through all sites in the manifest to discover possible false-positive inducting targets."""
|
||||||
|
pattern: str
|
||||||
|
for site in chunked_sites:
|
||||||
|
try:
|
||||||
|
pattern = chunked_sites[site]['regexCheck']
|
||||||
|
except KeyError:
|
||||||
|
pattern = FALSE_POSITIVE_DEFAULT_PATTERN
|
||||||
|
|
||||||
|
if FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND > 0:
|
||||||
|
pattern = set_pattern_upper_bound(pattern)
|
||||||
|
|
||||||
|
result: QueryStatus = false_positive_check(chunked_sites, site, pattern)
|
||||||
|
assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}"
|
||||||
|
|
||||||
|
@pytest.mark.validate_targets_fn
|
||||||
|
def test_false_neg(self, chunked_sites: dict[str, dict[str, str]]):
|
||||||
|
"""Iterate through all sites in the manifest to discover possible false-negative inducting targets."""
|
||||||
|
for site in chunked_sites:
|
||||||
|
result: QueryStatus = false_negative_check(chunked_sites, site)
|
||||||
|
assert result is QueryStatus.CLAIMED, f"{site} produced false negative, result was {result}"
|
||||||
|
|
||||||
@@ -1,17 +1,16 @@
|
|||||||
import os
|
import os
|
||||||
from sherlock_interactives import Interactives
|
from sherlock_interactives import Interactives
|
||||||
import sherlock
|
import sherlock_project
|
||||||
|
|
||||||
def test_versioning() -> None:
|
def test_versioning() -> None:
|
||||||
# Ensure __version__ matches version presented to the user
|
# Ensure __version__ matches version presented to the user
|
||||||
assert sherlock.__version__ in Interactives.run_cli("--version")
|
assert sherlock_project.__version__ in Interactives.run_cli("--version")
|
||||||
# Ensure __init__ is single source of truth for __version__ in package
|
# Ensure __init__ is single source of truth for __version__ in package
|
||||||
# Temporarily allows sherlock.py so as to not trigger early upgrades
|
# Temporarily allows sherlock.py so as to not trigger early upgrades
|
||||||
found:list = Interactives.walk_sherlock_for_files_with(r'__version__ *= *')
|
found:list = Interactives.walk_sherlock_for_files_with(r'__version__ *= *')
|
||||||
expected:list = [
|
expected:list = [
|
||||||
# Normalization is REQUIRED for Windows ( / vs \ )
|
# Normalization is REQUIRED for Windows ( / vs \ )
|
||||||
os.path.normpath("sherlock/__init__.py"),
|
os.path.normpath("sherlock_project/__init__.py"),
|
||||||
os.path.normpath("sherlock/sherlock.py"),
|
|
||||||
]
|
]
|
||||||
# Sorting is REQUIRED for Mac
|
# Sorting is REQUIRED for Mac
|
||||||
assert sorted(found) == sorted(expected)
|
assert sorted(found) == sorted(expected)
|
||||||
|
|||||||
@@ -7,8 +7,6 @@ envlist =
|
|||||||
py312
|
py312
|
||||||
py311
|
py311
|
||||||
py310
|
py310
|
||||||
py39
|
|
||||||
py38
|
|
||||||
|
|
||||||
[testenv]
|
[testenv]
|
||||||
description = Attempt to build and install the package
|
description = Attempt to build and install the package
|
||||||
@@ -16,9 +14,10 @@ deps =
|
|||||||
coverage
|
coverage
|
||||||
jsonschema
|
jsonschema
|
||||||
pytest
|
pytest
|
||||||
|
rstr
|
||||||
allowlist_externals = coverage
|
allowlist_externals = coverage
|
||||||
commands =
|
commands =
|
||||||
coverage run --source=sherlock --module pytest -v
|
coverage run --source=sherlock_project --module pytest -v
|
||||||
coverage report --show-missing
|
coverage report --show-missing
|
||||||
|
|
||||||
[testenv:offline]
|
[testenv:offline]
|
||||||
@@ -37,8 +36,7 @@ commands =
|
|||||||
|
|
||||||
[gh-actions]
|
[gh-actions]
|
||||||
python =
|
python =
|
||||||
|
3.13: py313
|
||||||
3.12: py312
|
3.12: py312
|
||||||
3.11: py311
|
3.11: py311
|
||||||
3.10: py310
|
3.10: py310
|
||||||
3.9: py39
|
|
||||||
3.8: py38
|
|
||||||
|
|||||||
Reference in New Issue
Block a user