Skip to content

WIP: new check: 'out-of-tree-commit' #96

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 11 additions & 2 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,24 @@
inputs.nixpkgs.follows = "nixpkgs";
};

traceFd = {
url = "github:rmcgibbo/nix-traceFd/master";
flake = false;
inputs.nixpkgs.follows = "nixpkgs";
};

nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";

utils.url = "github:numtide/flake-utils";
};

outputs = { self, flake-compat, naersk, nixpkgs, utils }: utils.lib.eachDefaultSystem (system: let
outputs = { self, flake-compat, naersk, nixpkgs, utils, traceFd }: utils.lib.eachDefaultSystem (system: let
pkgs = import nixpkgs { inherit system; };
naersk-lib = naersk.lib."${system}";
in rec {

traceFdBuilt = pkgs.callPackage traceFd {};

packages.rust-checks = naersk-lib.buildPackage {
name = "rust-checks";
root = ./rust-checks;
Expand Down Expand Up @@ -53,7 +61,8 @@
pkgs.nixUnstable
packages.rust-checks
]} \
--set AST_CHECK_NAMES ${pkgs.lib.concatStringsSep ":" rust-check-names}
--set AST_CHECK_NAMES ${pkgs.lib.concatStringsSep ":" rust-check-names} \
--set NIX_PLUGINS ${traceFdBuilt}/lib/nix/plugins/libtraceFd.so
ln -s ${./overlays} $out/overlays
ln -s ${./lib} $out/lib
'';
Expand Down
18 changes: 18 additions & 0 deletions overlays/intercept-fetchurl.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{ builtAttrs, packageSet, namePositions }@attrs:

final: prev:
let
inherit (prev) lib;
fd = builtins.getEnv "FETCHURL_TRACEFD";
tracedFetchUrl = args: builtins.traceFd
(lib.toInt fd)
(builtins.toJSON args)
(prev.fetchurl args);
fetchurl =
if fd == "" then
prev.fetchurl
else
tracedFetchUrl;
in
{ fetchurl = fetchurl; }

102 changes: 93 additions & 9 deletions tools/nixpkgs-hammer
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
#!/usr/bin/env python3

import argparse
import concurrent.futures
import io
import json
import os
import re
import subprocess
import sys
import tempfile
import textwrap
from collections import defaultdict
import urllib.request
from collections import defaultdict, namedtuple
from dataclasses import asdict, dataclass, field, is_dataclass
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
Expand Down Expand Up @@ -112,6 +116,65 @@ def run_external_checks(
return dict(result)


def run_fetchurl_checks(
fetchurl_calls: List[Dict[str, Any]], excluded_rules: List[str]
) -> Dict[str, Report]:

if "out-of-repo-commit" in excluded_rules:
return {}

github_fetch = namedtuple("github_fetch", ["url", "owner", "repo", "sha"])

def github_commit_fetches() -> Iterable[github_fetch]:
for call in fetchurl_calls:
url = call.get("url", "")
if not isinstance(url, str):
continue
m1 = re.search(
r"github\.com/(.*)/(.*)/(?:archive|commit|pull/\d+/commits)/(.{40})\.",
url,
)
if m1 is not None:
yield (url,) + m1.groups()

def _fetch_data(url: str, owner: str, repo: str, sha: str) -> Tuple[str, str]:
branch_commit_url = f"https://github.com/{owner}/{repo}/branch_commits/{sha}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is an undocumented path which returns html. It could change at any time and I think info should be added as a comment.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you know if there's another endpoint that's documented? I suppose a graphql or something could work too.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I researched that a while ago I didn't find anything else.

try:
resp = urllib.request.urlopen(branch_commit_url, timeout=20)
if resp.status != 200:
return None
return (url, resp.read())
except urllib.error.HTTPError:
return None

github_responses = []
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
future_to_url = (
executor.submit(_fetch_data, *value) for value in github_commit_fetches()
)
for future in concurrent.futures.as_completed(future_to_url):
github_responses.append(future.result())

reports = []
for url, response in github_responses:
if b"This commit does not belong to any branch on this repository." in response:
reports.append(
Report(
name="out-of-repo-commit",
msg=textwrap.dedent(
f"""
The dependency chain for these expressions includes {url},
which references a commit which does not belong to any
branch on this repository, and is in fact associated with
a fork of this repository.
"""
).strip(),
locations=[],
)
)
return {"<unknown>": reports}
Copy link
Collaborator Author

@rmcgibbo rmcgibbo Mar 20, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's an example of what it looks like:

nix run -f ~/projects/nixpkgs-hammering -c nixpkgs-hammer -f . openexr -e attribute-ordering -e no-build-output -e unused-argument -e maintainers-missing
When evaluating attribute ‘openexr’:
No issues found.

When evaluating attribute ‘<unknown>’:
warning: out-of-repo-commit
The dependency chain for these expressions includes https://github.com/AcademySoftwareFoundation/openexr/commit/6442fb71a86c09fb0a8118b6dbd93bcec4883a3c.patch,
which references a commit which does not belong to any
branch on this repository, and is in fact associated with
a fork of this repository.
See: https://github.com/jtojnar/nixpkgs-hammering/blob/master/explanations/out-of-repo-commit.md

Currently, a big technical limitation is that I don't know how to associate the fetchurl call with the attr -- that is, if you're simultaneously running nixpkgs-hammering on two or more attrs and the dependency chain for one of the attrs triggers a report of this type, I don't know how to determine which attr triggered the report.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we could print the outPath of the fetchurl call, get a drv file from that and then check which of the attributes has that in closure.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that seems reasonable. As best I can tell, we can't compute the closure from within the nix language, and instead that needs to be done from the cmdline (nix why-depends, nix show-derivation -r or nix-store --query --graphml, or similar). Is that also your understanding?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With IFD, you might run those commands using runCommand or write a new store-querying builting. But yeah, I am not aware of any current facility to query Nix store within Nix language. (Well, you could compute the closure by literally reading the *.drv store paths but letting Nix get the values from the SQLite database will probably be much faster.)

We will probably want to print the why-depends result in the check output anyway, to make it clear from which derivation it comes from.



def concatenate_messages(
*report_sources: Dict[str, List[Report]]
) -> Dict[Attr, List[Report]]:
Expand All @@ -130,13 +193,30 @@ def escape_attr(attr: str) -> str:
return ".".join(f'"{section}"' for section in attr.split("."))


def nix_eval_json(expr: str, show_trace: bool = False) -> Dict[Attr, List[Report]]:
def nix_eval_json(
expr: str, show_trace: bool = False
) -> Tuple[Dict[Optional[Attr], List[Report]], List[Dict[str, Any]]]:
args = ["nix-instantiate", "--strict", "--json", "--eval", "-"]
if "NIX_PLUGINS" in os.environ:
args.extend(["--option", "plugin-files"] + os.environ["NIX_PLUGINS"].split(":"))

if show_trace:
args.append("--show-trace")

json_text = subprocess.check_output(args, text=True, input=expr)
with tempfile.TemporaryFile() as fetchurl_calls_file:
fileno = fetchurl_calls_file.fileno()
json_text = subprocess.check_output(
args,
text=True,
input=expr,
env=dict(**os.environ, FETCHURL_TRACEFD=str(fileno)),
pass_fds=(fileno,),
)
fetchurl_calls_file.seek(0)
fetchurl_calls = [
json.loads(line)
for line in fetchurl_calls_file.read().decode("utf-8").splitlines()
]

def maybe_path(s: Optional[str]) -> Optional[Path]:
if s:
Expand All @@ -158,8 +238,7 @@ def nix_eval_json(expr: str, show_trace: bool = False) -> Dict[Attr, List[Report
location=maybe_location(data["location"]),
)
result[attr] = load_reports(data["report"])

return result
return result, fetchurl_calls


def load_reports(report_datas: List[Dict[str, Any]]) -> List[Report]:
Expand Down Expand Up @@ -312,7 +391,7 @@ def main(args):
# Our overlays need to know the built attributes so that they can check only them.
# We do it by using functions that return overlays so we need to instantiate them.
overlay_expressions = []
for overlay_generator in overlay_generators_path.glob("*"):
for overlay_generator in sorted(overlay_generators_path.glob("*")):
if overlay_generator.stem in args.excluded_rules:
continue

Expand Down Expand Up @@ -356,12 +435,17 @@ def main(args):
if args.show_trace:
print("Nix expression:", all_messages_nix, file=sys.stderr)

overlay_data = nix_eval_json(all_messages_nix, args.show_trace)
overlay_data, fetchurl_calls = nix_eval_json(all_messages_nix, args.show_trace)
overlay_reports = {attr.name: reports for attr, reports in overlay_data.items()}
external_reports = run_external_checks(
list(overlay_data.keys()), args.excluded_rules
list(attr for attr in overlay_data.keys() if attr is not None),
args.excluded_rules,
)
fetchurl_reports = run_fetchurl_checks(fetchurl_calls, args.excluded_rules)

all_messages = concatenate_messages(
overlay_reports, external_reports, fetchurl_reports
)
all_messages = concatenate_messages(overlay_reports, external_reports)

if args.json:
print(json.dumps(all_messages, cls=JSONEncoder))
Expand Down