Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Removed custom paginated and added iterator #1801

Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions vulnerabilities/improvers/default.py
Original file line number Diff line number Diff line change
@@ -44,9 +44,9 @@ def interesting_advisories(self) -> QuerySet:
return (
Advisory.objects.filter(Q(created_by=self.importer.qualified_name))
.order_by("-date_collected")
.paginated()
.iterator()
)
return Advisory.objects.all().order_by("-date_collected").paginated()
return Advisory.objects.all().order_by("-date_collected").iterator()

def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]:
if not advisory_data:
6 changes: 3 additions & 3 deletions vulnerabilities/improvers/valid_versions.py
Original file line number Diff line number Diff line change
@@ -64,8 +64,8 @@ class ValidVersionImprover(Improver):
@property
def interesting_advisories(self) -> QuerySet:
if issubclass(self.importer, VulnerableCodeBaseImporterPipeline):
return Advisory.objects.filter(Q(created_by=self.importer.pipeline_id)).paginated()
return Advisory.objects.filter(Q(created_by=self.importer.qualified_name)).paginated()
return Advisory.objects.filter(Q(created_by=self.importer.pipeline_id)).iterator()
return Advisory.objects.filter(Q(created_by=self.importer.qualified_name)).iterator()

def get_package_versions(
self, package_url: PackageURL, until: Optional[datetime] = None
@@ -222,7 +222,7 @@ class NginxBasicImprover(Improver):

@property
def interesting_advisories(self) -> QuerySet:
return Advisory.objects.filter(created_by=NginxImporterPipeline.pipeline_id).paginated()
return Advisory.objects.filter(created_by=NginxImporterPipeline.pipeline_id).iterator()

def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]:
all_versions = list(self.fetch_nginx_version_from_git_tags())
2 changes: 1 addition & 1 deletion vulnerabilities/improvers/vulnerability_status.py
Original file line number Diff line number Diff line change
@@ -40,7 +40,7 @@ def interesting_advisories(self) -> QuerySet:
return (
Advisory.objects.filter(Q(created_by=NVDImporterPipeline.pipeline_id))
.distinct("aliases")
.paginated()
.iterator()
)

def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]:
2 changes: 1 addition & 1 deletion vulnerabilities/management/commands/export.py
Original file line number Diff line number Diff line change
@@ -159,7 +159,7 @@ def packages_by_type_ns_name():
"fixing_vulnerabilities__weaknesses",
"fixing_vulnerabilities__severities",
)
.paginated()
.iterator()
)

for tp_ns_name, packages in groupby(qs, key=by_purl_type_ns_name):
14 changes: 0 additions & 14 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
@@ -74,20 +74,6 @@ def get_or_none(self, *args, **kwargs):
with suppress(self.model.DoesNotExist, ValidationError):
return self.get(*args, **kwargs)

def paginated(self, per_page=5000):
"""
Iterate over a (large) QuerySet by chunks of ``per_page`` items.
This technique is essential for preventing memory issues when iterating
See these links for inspiration:
https://nextlinklabs.com/resources/insights/django-big-data-iteration
https://stackoverflow.com/questions/4222176/why-is-iterating-through-a-large-django-queryset-consuming-massive-amounts-of-me/
"""
paginator = Paginator(self, per_page=per_page)
for page_number in paginator.page_range:
page = paginator.page(page_number)
for obj in page.object_list:
yield obj


class VulnerabilityQuerySet(BaseQuerySet):
def affecting_vulnerabilities(self):
2 changes: 1 addition & 1 deletion vulnerabilities/pipelines/__init__.py
Original file line number Diff line number Diff line change
@@ -170,7 +170,7 @@ def import_new_advisories(self):

imported_advisory_count = 0
progress = LoopProgress(total_iterations=new_advisories_count, logger=self.log)
for advisory in progress.iter(new_advisories.paginated()):
for advisory in progress.iter(new_advisories.iterator()):
self.import_advisory(advisory=advisory)
if advisory.date_imported:
imported_advisory_count += 1
3 changes: 1 addition & 2 deletions vulnerabilities/pipelines/add_cvss31_to_CVEs.py
Original file line number Diff line number Diff line change
@@ -44,10 +44,9 @@ def process_cve_advisory_mapping(self):
progress_step=5,
)

batch_size = 1000
results = []

for severity in progress.iter(nvd_severities.paginated(per_page=batch_size)):
for severity in progress.iter(nvd_severities.iterator(chunk_size=2000)):
print(severity.url)
cve_pattern = re.compile(r"(CVE-\d{4}-\d{4,7})").search
cve_match = cve_pattern(severity.url)
2 changes: 1 addition & 1 deletion vulnerabilities/pipelines/collect_commits.py
Original file line number Diff line number Diff line change
@@ -52,7 +52,7 @@ def collect_and_store_fix_commits(self):
)

for apv in progress.iter(
affected_by_package_related_vulnerabilities.paginated(per_page=500)
affected_by_package_related_vulnerabilities.iterator(chunk_size=2000)
):
vulnerability = apv.vulnerability
for reference in vulnerability.references.all():
4 changes: 2 additions & 2 deletions vulnerabilities/pipelines/compute_package_risk.py
Original file line number Diff line number Diff line change
@@ -54,7 +54,7 @@ def compute_and_store_vulnerability_risk_score(self):
updated_vulnerability_count = 0
batch_size = 5000

for vulnerability in progress.iter(affected_vulnerabilities.paginated(per_page=batch_size)):
for vulnerability in progress.iter(affected_vulnerabilities.iterator(chunk_size=2000)):
severities = vulnerability.severities.all()
references = vulnerability.references.all()
exploits = vulnerability.exploits.all()
@@ -110,7 +110,7 @@ def compute_and_store_package_risk_score(self):
updated_package_count = 0
batch_size = 10000

for package in progress.iter(affected_packages.paginated(per_page=batch_size)):
for package in progress.iter(affected_packages.iterator(chunk_size=2000)):
risk_score = compute_package_risk(package)

if not risk_score:
2 changes: 1 addition & 1 deletion vulnerabilities/pipelines/flag_ghost_packages.py
Original file line number Diff line number Diff line change
@@ -49,7 +49,7 @@ def detect_and_flag_ghost_packages(logger=None):
)

grouped_packages = groupby(
interesting_packages_qs.paginated(),
interesting_packages_qs.iterator(),
key=lambda pkg: (pkg.type, pkg.namespace, pkg.name),
)