From 9882478fb5cf84e78d8e416dcf56f9c44964fa6f Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 14:56:44 -0400 Subject: [PATCH 1/7] feat: add instapaper --- sherlock_project/resources/data.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index a7a75178..386bb36c 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1142,6 +1142,12 @@ "urlProbe": "https://imginn.com/{}", "username_claimed": "instagram" }, + "Instapaper": { + "errorType": "status_code", + "url": "https://www.instapaper.com/p/{}", + "urlMain": "https://www.instapaper.com/", + "username_claimed": "john" + }, "Instructables": { "errorType": "status_code", "url": "https://www.instructables.com/member/{}", From 97ba4e861601482b3811a222fc3d916a9db557a8 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 15:39:01 -0400 Subject: [PATCH 2/7] fix(ci): validation issue --- .../workflows/validate_modified_targets.yml | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/.github/workflows/validate_modified_targets.yml b/.github/workflows/validate_modified_targets.yml index 266c5108..4a263122 100644 --- a/.github/workflows/validate_modified_targets.yml +++ b/.github/workflows/validate_modified_targets.yml @@ -14,6 +14,7 @@ jobs: - name: Checkout repository uses: actions/checkout@v5 with: + ref: ${{ github.event.pull_request.head.sha }} fetch-depth: 0 - name: Set up Python @@ -38,13 +39,21 @@ jobs: # Discover changes git show origin/${{ github.base_ref }}:sherlock_project/resources/data.json > data.json.base + cp sherlock_project/resources/data.json data.json.head + CHANGED=$( - jq -r --slurpfile base data.json.base --slurpfile head sherlock_project/resources/data.json ' - [ - ($head[0] | keys_unsorted[]) as $key - | select(($base[0][$key] != $head[0][$key]) or ($base[0][$key] | not)) - | $key - ] | unique | join(",")' + python - <<'EOF' + import json + with open("data.json.base") as f: base = json.load(f) + with open("data.json.head") as f: head = json.load(f) + + changed = [] + for k, v in head.items(): + if k not in base or base[k] != v: + changed.append(k) + + print(",".join(sorted(changed))) + EOF ) # Preserve changelist From fa0564166134257c611a75dd5a64fc9e92fb88b7 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 17:43:00 -0400 Subject: [PATCH 3/7] ci: improve validation --- .../workflows/validate_modified_targets.yml | 30 +++++++++++++++++-- pyproject.toml | 4 +++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/.github/workflows/validate_modified_targets.yml b/.github/workflows/validate_modified_targets.yml index 4a263122..44a6fdbd 100644 --- a/.github/workflows/validate_modified_targets.yml +++ b/.github/workflows/validate_modified_targets.yml @@ -62,12 +62,38 @@ jobs: - name: Validate modified targets if: steps.discover-modified.outputs.changed_targets != '' + continue-on-error: true run: | $(poetry env activate) - pytest -q --tb no -rA -m validate_targets -n 20 --chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" + pytest -q --tb no -rA -m validate_targets -n 20 --chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" --junitxml=validation_results.xml deactivate - - name: Announce skip if no modified targets + - name: Prepare validation summary + if: steps.discover-modified.outputs.changed_targets != '' + id: prepare-summary + run: | + $(poetry env activate) + summary=$( + python devel/summarize_site_validation.py validation_results.xml > summary.md || echo "Failed to generate summary of test results" + ) + deactivate + echo "summary<> $GITHUB_OUTPUT + echo "$summary" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + - name: Announce validation results + if: steps.discover-modified.outputs.changed_targets != '' + uses: actions/github-script@v8 + with: + script: | + github.rest.issues.createComment({ + issue_number: context.payload.pull_request.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: `${{ steps.prepare-summary.outputs.summary }}` + }); + + - name: This step shows as ran when no modifications are found if: steps.discover-modified.outputs.changed_targets == '' run: | echo "No modified targets found" diff --git a/pyproject.toml b/pyproject.toml index b1ca18d7..1d66dac6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,5 +60,9 @@ rstr = "^3.2.2" pytest = "^8.4.2" pytest-xdist = "^3.8.0" + +[tool.poetry.group.ci.dependencies] +defusedxml = "^0.7.1" + [tool.poetry.scripts] sherlock = 'sherlock_project.sherlock:main' From cd6fa5bb305429a370ad6977bb062a3f5005c365 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 18:04:42 -0400 Subject: [PATCH 4/7] ci: fix the thing --- .github/workflows/validate_modified_targets.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/validate_modified_targets.yml b/.github/workflows/validate_modified_targets.yml index 44a6fdbd..99844216 100644 --- a/.github/workflows/validate_modified_targets.yml +++ b/.github/workflows/validate_modified_targets.yml @@ -73,24 +73,26 @@ jobs: id: prepare-summary run: | $(poetry env activate) + python devel/summarize_site_validation.py validation_results.xml summary=$( - python devel/summarize_site_validation.py validation_results.xml > summary.md || echo "Failed to generate summary of test results" + python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results" ) deactivate - echo "summary<> $GITHUB_OUTPUT - echo "$summary" >> $GITHUB_OUTPUT - echo "EOF" >> $GITHUB_OUTPUT + echo "$summary" > validation_summary.md + cat validation_summary.md - name: Announce validation results if: steps.discover-modified.outputs.changed_targets != '' uses: actions/github-script@v8 with: script: | + const fs = require('fs'); + const body = fs.readFileSync('validation_summary.md', 'utf8'); github.rest.issues.createComment({ issue_number: context.payload.pull_request.number, owner: context.repo.owner, repo: context.repo.repo, - body: `${{ steps.prepare-summary.outputs.summary }}` + body: body, }); - name: This step shows as ran when no modifications are found From 7ca90ba72824baf21fbd3775cc68b010550f444f Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 18:06:25 -0400 Subject: [PATCH 5/7] ci: test result summarization --- devel/summarize_site_validation.py | 72 ++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 devel/summarize_site_validation.py diff --git a/devel/summarize_site_validation.py b/devel/summarize_site_validation.py new file mode 100644 index 00000000..91a23e36 --- /dev/null +++ b/devel/summarize_site_validation.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# This module summarizes the results of site validation tests queued by +# workflow validate_modified_targets for presentation in Issue comments. + +from defusedxml import ElementTree as ET +import sys +from pathlib import Path + +def summarize_junit_xml(xml_path: Path) -> str: + tree = ET.parse(xml_path) + root = tree.getroot() + suite = root.find('testsuite') + + pass_message: str = ":heavy_check_mark:   Pass" + fail_message: str = ":x:   Fail" + + if suite is None: + raise ValueError("Invalid JUnit XML: No testsuite found") + + summary_lines: list[str] = [] + summary_lines.append("#### Automatic validation of changes\n") + summary_lines.append("| | F- Check | F+ Check |") + summary_lines.append("|---|---|---|") + + failures = int(suite.get('failures', 0)) + errors_detected: bool = False + + results: dict[str, dict[str, str]] = {} + + for testcase in suite.findall('testcase'): + test_name = testcase.get('name').split('[')[0] + site_name = testcase.get('name').split('[')[1].rstrip(']') + failure = testcase.find('failure') + error = testcase.find('error') + + if site_name not in results: + results[site_name] = {} + + if test_name == "test_false_neg": + results[site_name]['F- Check'] = pass_message if failure is None and error is None else fail_message + elif test_name == "test_false_pos": + results[site_name]['F+ Check'] = pass_message if failure is None and error is None else fail_message + + if error is not None: + errors_detected = True + + for result in results: + summary_lines.append(f"| {result} | {results[result].get('F- Check', 'Error!')} | {results[result].get('F+ Check', 'Error!')} |") + + if failures > 0: + summary_lines.append("\n___\n" + + "\nFailures were detected on at least one updated target. Commits containing accuracy failures" + + " will often not be merged (unless a rationale is provided, such as false negatives due to regional differences).") + + if errors_detected: + summary_lines.append("\n___\n" + + "\n**Errors were detected during validation. Please review the workflow logs.**") + + return "\n".join(summary_lines) + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: summarize_site_validation.py ") + sys.exit(1) + + xml_path: Path = Path(sys.argv[1]) + if not xml_path.is_file(): + print(f"Error: File '{xml_path}' does not exist.") + sys.exit(1) + + summary: str = summarize_junit_xml(xml_path) + print(summary) From 0fa2e1afc78508474b428d9b918f3001c74bada9 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 18:09:44 -0400 Subject: [PATCH 6/7] chore: cleanup everything --- .github/workflows/validate_modified_targets.yml | 2 -- devel/summarize_site_validation.py | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/validate_modified_targets.yml b/.github/workflows/validate_modified_targets.yml index 99844216..a98483cc 100644 --- a/.github/workflows/validate_modified_targets.yml +++ b/.github/workflows/validate_modified_targets.yml @@ -73,13 +73,11 @@ jobs: id: prepare-summary run: | $(poetry env activate) - python devel/summarize_site_validation.py validation_results.xml summary=$( python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results" ) deactivate echo "$summary" > validation_summary.md - cat validation_summary.md - name: Announce validation results if: steps.discover-modified.outputs.changed_targets != '' diff --git a/devel/summarize_site_validation.py b/devel/summarize_site_validation.py index 91a23e36..89d39750 100644 --- a/devel/summarize_site_validation.py +++ b/devel/summarize_site_validation.py @@ -19,7 +19,7 @@ def summarize_junit_xml(xml_path: Path) -> str: summary_lines: list[str] = [] summary_lines.append("#### Automatic validation of changes\n") - summary_lines.append("| | F- Check | F+ Check |") + summary_lines.append("| Target | F+ Check | F- Check |") summary_lines.append("|---|---|---|") failures = int(suite.get('failures', 0)) @@ -45,7 +45,7 @@ def summarize_junit_xml(xml_path: Path) -> str: errors_detected = True for result in results: - summary_lines.append(f"| {result} | {results[result].get('F- Check', 'Error!')} | {results[result].get('F+ Check', 'Error!')} |") + summary_lines.append(f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |") if failures > 0: summary_lines.append("\n___\n" + From dc61cdc7a4de06fbaa21347f5bc733a395c85695 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 18:10:33 -0400 Subject: [PATCH 7/7] chore: set request method --- sherlock_project/resources/data.json | 1 + 1 file changed, 1 insertion(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 386bb36c..db013b75 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1144,6 +1144,7 @@ }, "Instapaper": { "errorType": "status_code", + "request_method": "GET", "url": "https://www.instapaper.com/p/{}", "urlMain": "https://www.instapaper.com/", "username_claimed": "john"