Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix up the commit trailers functionality #1576

Merged
merged 5 commits into from
Apr 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 80 additions & 22 deletions git/objects/commit.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import os
from io import BytesIO
import logging
from collections import defaultdict


# typing ------------------------------------------------------------------
Expand Down Expand Up @@ -335,8 +336,72 @@ def stats(self) -> Stats:
return Stats._list_from_string(self.repo, text)

@property
def trailers(self) -> Dict:
"""Get the trailers of the message as dictionary
def trailers(self) -> Dict[str, str]:
"""Get the trailers of the message as a dictionary

:note: This property is deprecated, please use either ``Commit.trailers_list`` or ``Commit.trailers_dict``.

:return:
Dictionary containing whitespace stripped trailer information.
Only contains the latest instance of each trailer key.
"""
return {
k: v[0] for k, v in self.trailers_dict.items()
}

@property
def trailers_list(self) -> List[Tuple[str, str]]:
"""Get the trailers of the message as a list

Git messages can contain trailer information that are similar to RFC 822
e-mail headers (see: https://git-scm.com/docs/git-interpret-trailers).

This functions calls ``git interpret-trailers --parse`` onto the message
to extract the trailer information, returns the raw trailer data as a list.

Valid message with trailer::

Subject line

some body information

another information

key1: value1.1
key1: value1.2
key2 : value 2 with inner spaces


Returned list will look like this::

[
("key1", "value1.1"),
("key1", "value1.2"),
("key2", "value 2 with inner spaces"),
]


:return:
List containing key-value tuples of whitespace stripped trailer information.
"""
cmd = ["git", "interpret-trailers", "--parse"]
proc: Git.AutoInterrupt = self.repo.git.execute(cmd, as_process=True, istream=PIPE) # type: ignore
trailer: str = proc.communicate(str(self.message).encode())[0].decode("utf8")
trailer = trailer.strip()

if not trailer:
return []

trailer_list = []
for t in trailer.split("\n"):
key, val = t.split(":", 1)
trailer_list.append((key.strip(), val.strip()))

return trailer_list

@property
def trailers_dict(self) -> Dict[str, List[str]]:
"""Get the trailers of the message as a dictionary

Git messages can contain trailer information that are similar to RFC 822
e-mail headers (see: https://git-scm.com/docs/git-interpret-trailers).
Expand All @@ -345,42 +410,35 @@ def trailers(self) -> Dict:
to extract the trailer information. The key value pairs are stripped of
leading and trailing whitespaces before they get saved into a dictionary.

Valid message with trailer:

.. code-block::
Valid message with trailer::

Subject line

some body information

another information

key1: value1
key1: value1.1
key1: value1.2
key2 : value 2 with inner spaces

dictionary will look like this:

.. code-block::
Returned dictionary will look like this::

{
"key1": "value1",
"key2": "value 2 with inner spaces"
"key1": ["value1.1", "value1.2"],
"key2": ["value 2 with inner spaces"],
}

:return: Dictionary containing whitespace stripped trailer information

:return:
Dictionary containing whitespace stripped trailer information.
Mapping trailer keys to a list of their corresponding values.
"""
d = {}
cmd = ["git", "interpret-trailers", "--parse"]
proc: Git.AutoInterrupt = self.repo.git.execute(cmd, as_process=True, istream=PIPE) # type: ignore
trailer: str = proc.communicate(str(self.message).encode())[0].decode()
if trailer.endswith("\n"):
trailer = trailer[0:-1]
if trailer != "":
for line in trailer.split("\n"):
key, value = line.split(":", 1)
d[key.strip()] = value.strip()
return d
d = defaultdict(list)
for key, val in self.trailers_list:
d[key].append(val)
return dict(d)

@classmethod
def _iter_from_process_or_stream(cls, repo: "Repo", proc_or_stream: Union[Popen, IO]) -> Iterator["Commit"]:
Expand Down
77 changes: 41 additions & 36 deletions test/test_commit.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,52 +494,57 @@ def test_datetimes(self):

def test_trailers(self):
KEY_1 = "Hello"
VALUE_1 = "World"
VALUE_1_1 = "World"
VALUE_1_2 = "Another-World"
KEY_2 = "Key"
VALUE_2 = "Value with inner spaces"

# Check if KEY 1 & 2 with Value 1 & 2 is extracted from multiple msg variations
msgs = []
msgs.append(f"Subject\n\n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n")
msgs.append(f"Subject\n \nSome body of a function\n \n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n")
msgs.append(
f"Subject\n \nSome body of a function\n\nnon-key: non-value\n\n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n"
)
msgs.append(
f"Subject\n \nSome multiline\n body of a function\n\nnon-key: non-value\n\n{KEY_1}: {VALUE_1}\n{KEY_2} : {VALUE_2}\n"
)

# Check the following trailer example is extracted from multiple msg variations
TRAILER = f"{KEY_1}: {VALUE_1_1}\n{KEY_2}: {VALUE_2}\n{KEY_1}: {VALUE_1_2}"
msgs = [
f"Subject\n\n{TRAILER}\n",
f"Subject\n \nSome body of a function\n \n{TRAILER}\n",
f"Subject\n \nSome body of a function\n\nnon-key: non-value\n\n{TRAILER}\n",
(
# check when trailer has inconsistent whitespace
f"Subject\n \nSome multiline\n body of a function\n\nnon-key: non-value\n\n"
f"{KEY_1}:{VALUE_1_1}\n{KEY_2} : {VALUE_2}\n{KEY_1}: {VALUE_1_2}\n"
),
]
for msg in msgs:
commit = self.rorepo.commit("master")
commit = copy.copy(commit)
commit = copy.copy(self.rorepo.commit("master"))
commit.message = msg
assert KEY_1 in commit.trailers.keys()
assert KEY_2 in commit.trailers.keys()
assert commit.trailers[KEY_1] == VALUE_1
assert commit.trailers[KEY_2] == VALUE_2

# Check that trailer stays empty for multiple msg combinations
msgs = []
msgs.append(f"Subject\n")
msgs.append(f"Subject\n\nBody with some\nText\n")
msgs.append(f"Subject\n\nBody with\nText\n\nContinuation but\n doesn't contain colon\n")
msgs.append(f"Subject\n\nBody with\nText\n\nContinuation but\n only contains one :\n")
msgs.append(f"Subject\n\nBody with\nText\n\nKey: Value\nLine without colon\n")
msgs.append(f"Subject\n\nBody with\nText\n\nLine without colon\nKey: Value\n")
assert commit.trailers_list == [
(KEY_1, VALUE_1_1),
(KEY_2, VALUE_2),
(KEY_1, VALUE_1_2),
]
assert commit.trailers_dict == {
KEY_1: [VALUE_1_1, VALUE_1_2],
KEY_2: [VALUE_2],
}

# check that trailer stays empty for multiple msg combinations
msgs = [
f"Subject\n",
f"Subject\n\nBody with some\nText\n",
f"Subject\n\nBody with\nText\n\nContinuation but\n doesn't contain colon\n",
f"Subject\n\nBody with\nText\n\nContinuation but\n only contains one :\n",
f"Subject\n\nBody with\nText\n\nKey: Value\nLine without colon\n",
f"Subject\n\nBody with\nText\n\nLine without colon\nKey: Value\n",
]

for msg in msgs:
commit = self.rorepo.commit("master")
commit = copy.copy(commit)
commit = copy.copy(self.rorepo.commit("master"))
commit.message = msg
assert len(commit.trailers.keys()) == 0
assert commit.trailers_list == []
assert commit.trailers_dict == {}

# check that only the last key value paragraph is evaluated
commit = self.rorepo.commit("master")
commit = copy.copy(commit)
commit.message = f"Subject\n\nMultiline\nBody\n\n{KEY_1}: {VALUE_1}\n\n{KEY_2}: {VALUE_2}\n"
assert KEY_1 not in commit.trailers.keys()
assert KEY_2 in commit.trailers.keys()
assert commit.trailers[KEY_2] == VALUE_2
commit = copy.copy(self.rorepo.commit("master"))
commit.message = f"Subject\n\nMultiline\nBody\n\n{KEY_1}: {VALUE_1_1}\n\n{KEY_2}: {VALUE_2}\n"
assert commit.trailers_list == [(KEY_2, VALUE_2)]
assert commit.trailers_dict == {KEY_2: [VALUE_2]}

def test_commit_co_authors(self):
commit = copy.copy(self.rorepo.commit("4251bd5"))
Expand Down