From 3dd4af63bbd55ced5c6d26f8d699661838a0d6dc Mon Sep 17 00:00:00 2001 From: ziad hany Date: Tue, 14 Oct 2025 17:40:15 +0300 Subject: [PATCH 1/4] Add support for collecting GitHub vulnerability-related issues and pull requests Add tests for this functionality Signed-off-by: ziad hany --- vulnerabilities/importers/__init__.py | 2 + .../pipelines/v2_importers/github_issue_pr.py | 92 +++++++++++++++++++ .../v2_importers/test_github_issue_pr.py | 80 ++++++++++++++++ .../expected_advisory_output.json | 64 +++++++++++++ .../github_issue_pr/issues_and_pr.json | 24 +++++ 5 files changed, 262 insertions(+) create mode 100644 vulnerabilities/pipelines/v2_importers/github_issue_pr.py create mode 100644 vulnerabilities/tests/pipelines/v2_importers/test_github_issue_pr.py create mode 100644 vulnerabilities/tests/test_data/github_issue_pr/expected_advisory_output.json create mode 100644 vulnerabilities/tests/test_data/github_issue_pr/issues_and_pr.json diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 72e4ea4b3..2c7f61463 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -48,6 +48,7 @@ from vulnerabilities.pipelines.v2_importers import ( elixir_security_importer as elixir_security_importer_v2, ) +from vulnerabilities.pipelines.v2_importers import github_issue_pr as github_issue_pr_v2 from vulnerabilities.pipelines.v2_importers import epss_importer_v2 from vulnerabilities.pipelines.v2_importers import fireeye_importer_v2 from vulnerabilities.pipelines.v2_importers import github_osv_importer as github_osv_importer_v2 @@ -135,5 +136,6 @@ ubuntu_usn.UbuntuUSNImporter, fireeye.FireyeImporter, oss_fuzz.OSSFuzzImporter, + github_issue_pr_v2.GithubPipelineIssuePR, ] ) diff --git a/vulnerabilities/pipelines/v2_importers/github_issue_pr.py b/vulnerabilities/pipelines/v2_importers/github_issue_pr.py new file mode 100644 index 000000000..ec33e925e --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/github_issue_pr.py @@ -0,0 +1,92 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import re +from collections import defaultdict + +from github import Github + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import ReferenceV2 +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerablecode.settings import env + +GITHUB_TOKEN = env.str("GITHUB_TOKEN") + + +class GithubPipelineIssuePR(VulnerableCodeBaseImporterPipelineV2): + """ + Pipeline to collect GitHub issues and PRs related to vulnerabilities. + """ + + pipeline_id = "collect_issues_pr" + + @classmethod + def steps(cls): + return ( + cls.fetch_entries, + cls.collect_and_store_advisories, + ) + + def fetch_entries(self): + """Clone the repository.""" + self.repo_url = "https://github.com/torvalds/linux" + repo_name = "django/django" + + g = Github(login_or_token=GITHUB_TOKEN) + + base_query = f"repo:{repo_name} (CVE OR PYSEC OR GHSA)" + self.issues = g.search_issues(f"{base_query} is:issue") + self.pull_requestes = g.search_issues(f"{base_query} is:pr") + + def advisories_count(self) -> int: + """ + Return total number of advisories discovered (issues + PRs). + """ + return self.issues.totalCount + self.pull_requestes.totalCount + + def collect_issues_and_prs(self): + """ + Group issues and PRs by vulnerability identifiers (like CVE-xxxx-yyyy). + Returns a dict mapping vuln_id -> [(type, html_url)]. + """ + self.log("Grouping GitHub issues and PRs by vulnerability identifiers.") + + grouped_items = defaultdict(list) + pattern = re.compile(r"(CVE-\d{4}-\d+|PYSEC-\d{4}-\d+|GHSA-[\w-]+)", re.IGNORECASE) + + for issue in self.issues: + matches = pattern.findall(issue.title + " " + (issue.body or "")) + for match in matches: + grouped_items[match].append(("Issue", issue.html_url)) + + for pr in self.pull_requestes: + matches = pattern.findall(pr.title + " " + (pr.body or "")) + for match in matches: + grouped_items[match].append(("PR", pr.html_url)) + + self.log(f"Grouped {len(grouped_items)} unique vulnerability identifiers.") + return grouped_items + + def collect_advisories(self): + """ + Generate AdvisoryData objects for each vulnerability ID grouped with its related GitHub issues and PRs. + """ + self.log("Generating AdvisoryData objects from GitHub issues and PRs.") + grouped_data = self.collect_issues_and_prs() + + for vuln_id, refs in grouped_data.items(): + references = [ReferenceV2(reference_id=ref_id, url=url) for ref_id, url in refs] + + yield AdvisoryData( + advisory_id=vuln_id, + aliases=[vuln_id], + references_v2=references, + url=self.repo_url, + ) diff --git a/vulnerabilities/tests/pipelines/v2_importers/test_github_issue_pr.py b/vulnerabilities/tests/pipelines/v2_importers/test_github_issue_pr.py new file mode 100644 index 000000000..e2b80f00f --- /dev/null +++ b/vulnerabilities/tests/pipelines/v2_importers/test_github_issue_pr.py @@ -0,0 +1,80 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import json +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest + +from vulnerabilities.pipelines.v2_importers.github_issue_pr import GithubPipelineIssuePR +from vulnerabilities.tests import util_tests + + +@pytest.fixture +def pipeline(): + pipeline = GithubPipelineIssuePR() + pipeline.repo_url = "https://github.com/test/repo" + pipeline.log = MagicMock() + return pipeline + + +@pytest.mark.django_db +def test_collect_issues_and_prs(pipeline): + pipeline.issues = [ + SimpleNamespace( + title="Fix for CVE-2023-1234 found", + body="This resolves a security issue", + html_url="http://example.com/issue1", + ), + SimpleNamespace( + title="No vulnerability mentioned", + body="This is unrelated", + html_url="http://example.com/issue2", + ), + ] + + pipeline.pull_requestes = [ + SimpleNamespace( + title="Patch addressing GHSA-zzz-111", + body="Also fixes PYSEC-2024-5678", + html_url="http://example.com/pr1", + ) + ] + + result = pipeline.collect_issues_and_prs() + expected = { + "CVE-2023-1234": [("Issue", "http://example.com/issue1")], + "GHSA-zzz-111": [("PR", "http://example.com/pr1")], + "PYSEC-2024-5678": [("PR", "http://example.com/pr1")], + } + + assert result == expected + + +TEST_DATA = Path(__file__).parent.parent.parent / "test_data" / "github_issue_pr" + + +@pytest.mark.django_db +def test_collect_advisories_from_json(): + input_file = TEST_DATA / "issues_and_pr.json" + expected_file = TEST_DATA / "expected_advisory_output.json" + + issues_and_prs = json.loads(input_file.read_text(encoding="utf-8")) + + pipeline = GithubPipelineIssuePR() + pipeline.repo_url = "https://github.com/test/repo" + pipeline.log = MagicMock() + + pipeline.collect_issues_and_prs = MagicMock(return_value=issues_and_prs) + + result = [adv.to_dict() for adv in pipeline.collect_advisories()] + + util_tests.check_results_against_json(result, expected_file) diff --git a/vulnerabilities/tests/test_data/github_issue_pr/expected_advisory_output.json b/vulnerabilities/tests/test_data/github_issue_pr/expected_advisory_output.json new file mode 100644 index 000000000..3ac486d96 --- /dev/null +++ b/vulnerabilities/tests/test_data/github_issue_pr/expected_advisory_output.json @@ -0,0 +1,64 @@ +[ + { + "advisory_id": "CVE-2023-1234", + "aliases": [ + "CVE-2023-1234" + ], + "summary": "", + "affected_packages": [], + "references_v2": [ + { + "reference_id": "Issue", + "reference_type": "", + "url": "https://example.com/issue1" + }, + { + "reference_id": "PR", + "reference_type": "", + "url": "https://example.com/pr1" + } + ], + "severities": [], + "date_published": null, + "weaknesses": [], + "url": "https://github.com/test/repo" + }, + { + "advisory_id": "GHSA-zzz-111", + "aliases": [ + "GHSA-zzz-111" + ], + "summary": "", + "affected_packages": [], + "references_v2": [ + { + "reference_id": "PR", + "reference_type": "", + "url": "https://example.com/pr1" + } + ], + "severities": [], + "date_published": null, + "weaknesses": [], + "url": "https://github.com/test/repo" + }, + { + "advisory_id": "PYSEC-2024-5678", + "aliases": [ + "PYSEC-2024-5678" + ], + "summary": "", + "affected_packages": [], + "references_v2": [ + { + "reference_id": "PR", + "reference_type": "", + "url": "https://example.com/pr1" + } + ], + "severities": [], + "date_published": null, + "weaknesses": [], + "url": "https://github.com/test/repo" + } +] \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/github_issue_pr/issues_and_pr.json b/vulnerabilities/tests/test_data/github_issue_pr/issues_and_pr.json new file mode 100644 index 000000000..2f68eab98 --- /dev/null +++ b/vulnerabilities/tests/test_data/github_issue_pr/issues_and_pr.json @@ -0,0 +1,24 @@ +{ + "CVE-2023-1234": [ + [ + "Issue", + "https://example.com/issue1" + ], + [ + "PR", + "https://example.com/pr1" + ] + ], + "GHSA-zzz-111": [ + [ + "PR", + "https://example.com/pr1" + ] + ], + "PYSEC-2024-5678": [ + [ + "PR", + "https://example.com/pr1" + ] + ] +} \ No newline at end of file From 3b39e3c10d72264a280398efe500e84fb200624d Mon Sep 17 00:00:00 2001 From: ziad hany Date: Sat, 24 Jan 2026 00:25:56 +0200 Subject: [PATCH 2/4] Resolve migration conflicts Rename the pipeline name Add the missing pygithub dependency Signed-off-by: ziad hany --- requirements.txt | 1 + vulnerabilities/importers/__init__.py | 4 ++-- .../pipelines/v2_importers/github_issue_pr.py | 6 ++--- .../v2_importers/test_github_issue_pr.py | 23 +++++++++---------- .../expected_advisory_output.json | 15 +++++------- 5 files changed, 23 insertions(+), 26 deletions(-) diff --git a/requirements.txt b/requirements.txt index dbda64e0c..dd5f63be8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -126,3 +126,4 @@ wcwidth==0.2.5 websocket-client==0.59.0 yarl==1.7.2 zipp==3.19.1 +PyGithub==2.8.1 diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 2c7f61463..d9b95bc88 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -48,9 +48,9 @@ from vulnerabilities.pipelines.v2_importers import ( elixir_security_importer as elixir_security_importer_v2, ) -from vulnerabilities.pipelines.v2_importers import github_issue_pr as github_issue_pr_v2 from vulnerabilities.pipelines.v2_importers import epss_importer_v2 from vulnerabilities.pipelines.v2_importers import fireeye_importer_v2 +from vulnerabilities.pipelines.v2_importers import github_issue_pr as github_issue_pr_v2 from vulnerabilities.pipelines.v2_importers import github_osv_importer as github_osv_importer_v2 from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2 from vulnerabilities.pipelines.v2_importers import istio_importer as istio_importer_v2 @@ -101,6 +101,7 @@ epss_importer_v2.EPSSImporterPipeline, nginx_importer_v2.NginxImporterPipeline, mattermost_importer_v2.MattermostImporterPipeline, + github_issue_pr_v2.GithubPipelineIssuePRPipeline, nvd_importer.NVDImporterPipeline, github_importer.GitHubAPIImporterPipeline, gitlab_importer.GitLabImporterPipeline, @@ -136,6 +137,5 @@ ubuntu_usn.UbuntuUSNImporter, fireeye.FireyeImporter, oss_fuzz.OSSFuzzImporter, - github_issue_pr_v2.GithubPipelineIssuePR, ] ) diff --git a/vulnerabilities/pipelines/v2_importers/github_issue_pr.py b/vulnerabilities/pipelines/v2_importers/github_issue_pr.py index ec33e925e..d5de59740 100644 --- a/vulnerabilities/pipelines/v2_importers/github_issue_pr.py +++ b/vulnerabilities/pipelines/v2_importers/github_issue_pr.py @@ -20,12 +20,12 @@ GITHUB_TOKEN = env.str("GITHUB_TOKEN") -class GithubPipelineIssuePR(VulnerableCodeBaseImporterPipelineV2): +class GithubPipelineIssuePRPipeline(VulnerableCodeBaseImporterPipelineV2): """ Pipeline to collect GitHub issues and PRs related to vulnerabilities. """ - pipeline_id = "collect_issues_pr" + pipeline_id = "collect_github_issues_pr" @classmethod def steps(cls): @@ -86,7 +86,7 @@ def collect_advisories(self): yield AdvisoryData( advisory_id=vuln_id, - aliases=[vuln_id], + aliases=[], references_v2=references, url=self.repo_url, ) diff --git a/vulnerabilities/tests/pipelines/v2_importers/test_github_issue_pr.py b/vulnerabilities/tests/pipelines/v2_importers/test_github_issue_pr.py index e2b80f00f..a3dc57d87 100644 --- a/vulnerabilities/tests/pipelines/v2_importers/test_github_issue_pr.py +++ b/vulnerabilities/tests/pipelines/v2_importers/test_github_issue_pr.py @@ -14,13 +14,15 @@ import pytest -from vulnerabilities.pipelines.v2_importers.github_issue_pr import GithubPipelineIssuePR +from vulnerabilities.pipelines.v2_importers.github_issue_pr import GithubPipelineIssuePRPipeline from vulnerabilities.tests import util_tests +TEST_DATA = Path(__file__).parent.parent.parent / "test_data" / "github_issue_pr" + @pytest.fixture def pipeline(): - pipeline = GithubPipelineIssuePR() + pipeline = GithubPipelineIssuePRPipeline() pipeline.repo_url = "https://github.com/test/repo" pipeline.log = MagicMock() return pipeline @@ -32,12 +34,12 @@ def test_collect_issues_and_prs(pipeline): SimpleNamespace( title="Fix for CVE-2023-1234 found", body="This resolves a security issue", - html_url="http://example.com/issue1", + html_url="https://example.com/issue1", ), SimpleNamespace( title="No vulnerability mentioned", body="This is unrelated", - html_url="http://example.com/issue2", + html_url="https://example.com/issue2", ), ] @@ -45,23 +47,20 @@ def test_collect_issues_and_prs(pipeline): SimpleNamespace( title="Patch addressing GHSA-zzz-111", body="Also fixes PYSEC-2024-5678", - html_url="http://example.com/pr1", + html_url="https://example.com/pr1", ) ] result = pipeline.collect_issues_and_prs() expected = { - "CVE-2023-1234": [("Issue", "http://example.com/issue1")], - "GHSA-zzz-111": [("PR", "http://example.com/pr1")], - "PYSEC-2024-5678": [("PR", "http://example.com/pr1")], + "CVE-2023-1234": [("Issue", "https://example.com/issue1")], + "GHSA-zzz-111": [("PR", "https://example.com/pr1")], + "PYSEC-2024-5678": [("PR", "https://example.com/pr1")], } assert result == expected -TEST_DATA = Path(__file__).parent.parent.parent / "test_data" / "github_issue_pr" - - @pytest.mark.django_db def test_collect_advisories_from_json(): input_file = TEST_DATA / "issues_and_pr.json" @@ -69,7 +68,7 @@ def test_collect_advisories_from_json(): issues_and_prs = json.loads(input_file.read_text(encoding="utf-8")) - pipeline = GithubPipelineIssuePR() + pipeline = GithubPipelineIssuePRPipeline() pipeline.repo_url = "https://github.com/test/repo" pipeline.log = MagicMock() diff --git a/vulnerabilities/tests/test_data/github_issue_pr/expected_advisory_output.json b/vulnerabilities/tests/test_data/github_issue_pr/expected_advisory_output.json index 3ac486d96..525261280 100644 --- a/vulnerabilities/tests/test_data/github_issue_pr/expected_advisory_output.json +++ b/vulnerabilities/tests/test_data/github_issue_pr/expected_advisory_output.json @@ -1,9 +1,7 @@ [ { "advisory_id": "CVE-2023-1234", - "aliases": [ - "CVE-2023-1234" - ], + "aliases": [], "summary": "", "affected_packages": [], "references_v2": [ @@ -18,6 +16,7 @@ "url": "https://example.com/pr1" } ], + "patches": [], "severities": [], "date_published": null, "weaknesses": [], @@ -25,9 +24,7 @@ }, { "advisory_id": "GHSA-zzz-111", - "aliases": [ - "GHSA-zzz-111" - ], + "aliases": [], "summary": "", "affected_packages": [], "references_v2": [ @@ -37,6 +34,7 @@ "url": "https://example.com/pr1" } ], + "patches": [], "severities": [], "date_published": null, "weaknesses": [], @@ -44,9 +42,7 @@ }, { "advisory_id": "PYSEC-2024-5678", - "aliases": [ - "PYSEC-2024-5678" - ], + "aliases": [], "summary": "", "affected_packages": [], "references_v2": [ @@ -56,6 +52,7 @@ "url": "https://example.com/pr1" } ], + "patches": [], "severities": [], "date_published": null, "weaknesses": [], From 94cc08bc8a2cdad68a7cca5e7a4f3b5e49e5173a Mon Sep 17 00:00:00 2001 From: ziad hany Date: Sat, 24 Jan 2026 00:32:14 +0200 Subject: [PATCH 3/4] Add the missing pygithub dependency to setup.cfg Signed-off-by: ziad hany --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index c104497ab..fb9433d6d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -90,6 +90,7 @@ install_requires = # networking GitPython>=3.1.17 + PyGithub>=2.8.1 requests>=2.25.1 fetchcode>=0.6.0 From 575fedc93671b9ce47fee17629a24e16eb2de000 Mon Sep 17 00:00:00 2001 From: ziad hany Date: Sat, 24 Jan 2026 00:45:40 +0200 Subject: [PATCH 4/4] Fix CI & resolve dependency conflict Signed-off-by: ziad hany --- requirements.txt | 2 +- setup.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index dd5f63be8..e53583c89 100644 --- a/requirements.txt +++ b/requirements.txt @@ -126,4 +126,4 @@ wcwidth==0.2.5 websocket-client==0.59.0 yarl==1.7.2 zipp==3.19.1 -PyGithub==2.8.1 +PyGithub==2.6.1 diff --git a/setup.cfg b/setup.cfg index fb9433d6d..7016aa57e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -90,7 +90,7 @@ install_requires = # networking GitPython>=3.1.17 - PyGithub>=2.8.1 + PyGithub>=2.6.1 requests>=2.25.1 fetchcode>=0.6.0