-
Notifications
You must be signed in to change notification settings - Fork 17
WIP: new check: 'out-of-tree-commit' #96
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
{ builtAttrs, packageSet, namePositions }@attrs: | ||
|
||
final: prev: | ||
let | ||
inherit (prev) lib; | ||
fd = builtins.getEnv "FETCHURL_TRACEFD"; | ||
tracedFetchUrl = args: builtins.traceFd | ||
(lib.toInt fd) | ||
(builtins.toJSON args) | ||
(prev.fetchurl args); | ||
fetchurl = | ||
if fd == "" then | ||
prev.fetchurl | ||
else | ||
tracedFetchUrl; | ||
in | ||
{ fetchurl = fetchurl; } | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,17 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import concurrent.futures | ||
import io | ||
import json | ||
import os | ||
import re | ||
import subprocess | ||
import sys | ||
import tempfile | ||
import textwrap | ||
from collections import defaultdict | ||
import urllib.request | ||
from collections import defaultdict, namedtuple | ||
from dataclasses import asdict, dataclass, field, is_dataclass | ||
from pathlib import Path | ||
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple | ||
|
@@ -112,6 +116,65 @@ def run_external_checks( | |
return dict(result) | ||
|
||
|
||
def run_fetchurl_checks( | ||
fetchurl_calls: List[Dict[str, Any]], excluded_rules: List[str] | ||
) -> Dict[str, Report]: | ||
|
||
if "out-of-repo-commit" in excluded_rules: | ||
return {} | ||
|
||
github_fetch = namedtuple("github_fetch", ["url", "owner", "repo", "sha"]) | ||
|
||
def github_commit_fetches() -> Iterable[github_fetch]: | ||
for call in fetchurl_calls: | ||
url = call.get("url", "") | ||
if not isinstance(url, str): | ||
continue | ||
m1 = re.search( | ||
r"github\.com/(.*)/(.*)/(?:archive|commit|pull/\d+/commits)/(.{40})\.", | ||
url, | ||
) | ||
if m1 is not None: | ||
yield (url,) + m1.groups() | ||
|
||
def _fetch_data(url: str, owner: str, repo: str, sha: str) -> Tuple[str, str]: | ||
branch_commit_url = f"https://github.com/{owner}/{repo}/branch_commits/{sha}" | ||
try: | ||
resp = urllib.request.urlopen(branch_commit_url, timeout=20) | ||
if resp.status != 200: | ||
return None | ||
return (url, resp.read()) | ||
except urllib.error.HTTPError: | ||
return None | ||
|
||
github_responses = [] | ||
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: | ||
future_to_url = ( | ||
executor.submit(_fetch_data, *value) for value in github_commit_fetches() | ||
) | ||
for future in concurrent.futures.as_completed(future_to_url): | ||
github_responses.append(future.result()) | ||
|
||
reports = [] | ||
for url, response in github_responses: | ||
if b"This commit does not belong to any branch on this repository." in response: | ||
reports.append( | ||
Report( | ||
name="out-of-repo-commit", | ||
msg=textwrap.dedent( | ||
f""" | ||
The dependency chain for these expressions includes {url}, | ||
which references a commit which does not belong to any | ||
branch on this repository, and is in fact associated with | ||
a fork of this repository. | ||
""" | ||
).strip(), | ||
locations=[], | ||
) | ||
) | ||
return {"<unknown>": reports} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here's an example of what it looks like:
Currently, a big technical limitation is that I don't know how to associate the fetchurl call with the attr -- that is, if you're simultaneously running nixpkgs-hammering on two or more attrs and the dependency chain for one of the attrs triggers a report of this type, I don't know how to determine which attr triggered the report. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps we could print the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that seems reasonable. As best I can tell, we can't compute the closure from within the nix language, and instead that needs to be done from the cmdline ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With IFD, you might run those commands using We will probably want to print the why-depends result in the check output anyway, to make it clear from which derivation it comes from. |
||
|
||
|
||
def concatenate_messages( | ||
*report_sources: Dict[str, List[Report]] | ||
) -> Dict[Attr, List[Report]]: | ||
|
@@ -130,13 +193,30 @@ def escape_attr(attr: str) -> str: | |
return ".".join(f'"{section}"' for section in attr.split(".")) | ||
|
||
|
||
def nix_eval_json(expr: str, show_trace: bool = False) -> Dict[Attr, List[Report]]: | ||
def nix_eval_json( | ||
expr: str, show_trace: bool = False | ||
) -> Tuple[Dict[Optional[Attr], List[Report]], List[Dict[str, Any]]]: | ||
args = ["nix-instantiate", "--strict", "--json", "--eval", "-"] | ||
if "NIX_PLUGINS" in os.environ: | ||
args.extend(["--option", "plugin-files"] + os.environ["NIX_PLUGINS"].split(":")) | ||
|
||
if show_trace: | ||
args.append("--show-trace") | ||
|
||
json_text = subprocess.check_output(args, text=True, input=expr) | ||
with tempfile.TemporaryFile() as fetchurl_calls_file: | ||
fileno = fetchurl_calls_file.fileno() | ||
json_text = subprocess.check_output( | ||
args, | ||
text=True, | ||
input=expr, | ||
env=dict(**os.environ, FETCHURL_TRACEFD=str(fileno)), | ||
pass_fds=(fileno,), | ||
) | ||
fetchurl_calls_file.seek(0) | ||
fetchurl_calls = [ | ||
json.loads(line) | ||
for line in fetchurl_calls_file.read().decode("utf-8").splitlines() | ||
] | ||
|
||
def maybe_path(s: Optional[str]) -> Optional[Path]: | ||
if s: | ||
|
@@ -158,8 +238,7 @@ def nix_eval_json(expr: str, show_trace: bool = False) -> Dict[Attr, List[Report | |
location=maybe_location(data["location"]), | ||
) | ||
result[attr] = load_reports(data["report"]) | ||
|
||
return result | ||
return result, fetchurl_calls | ||
|
||
|
||
def load_reports(report_datas: List[Dict[str, Any]]) -> List[Report]: | ||
|
@@ -312,7 +391,7 @@ def main(args): | |
# Our overlays need to know the built attributes so that they can check only them. | ||
# We do it by using functions that return overlays so we need to instantiate them. | ||
overlay_expressions = [] | ||
for overlay_generator in overlay_generators_path.glob("*"): | ||
for overlay_generator in sorted(overlay_generators_path.glob("*")): | ||
if overlay_generator.stem in args.excluded_rules: | ||
continue | ||
|
||
|
@@ -356,12 +435,17 @@ def main(args): | |
if args.show_trace: | ||
print("Nix expression:", all_messages_nix, file=sys.stderr) | ||
|
||
overlay_data = nix_eval_json(all_messages_nix, args.show_trace) | ||
overlay_data, fetchurl_calls = nix_eval_json(all_messages_nix, args.show_trace) | ||
overlay_reports = {attr.name: reports for attr, reports in overlay_data.items()} | ||
external_reports = run_external_checks( | ||
list(overlay_data.keys()), args.excluded_rules | ||
list(attr for attr in overlay_data.keys() if attr is not None), | ||
args.excluded_rules, | ||
) | ||
fetchurl_reports = run_fetchurl_checks(fetchurl_calls, args.excluded_rules) | ||
|
||
all_messages = concatenate_messages( | ||
overlay_reports, external_reports, fetchurl_reports | ||
) | ||
all_messages = concatenate_messages(overlay_reports, external_reports) | ||
|
||
if args.json: | ||
print(json.dumps(all_messages, cls=JSONEncoder)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That is an undocumented path which returns html. It could change at any time and I think info should be added as a comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you know if there's another endpoint that's documented? I suppose a graphql or something could work too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When I researched that a while ago I didn't find anything else.