Skip to content

Commit e8e852a

Browse files
authored
Adds blame interaction graphs (#452)
This PR adds a graph abstraction for blame interaction data with different views on the blame data. Using graphs allows us to analyze blame interactions from a graph-theoretic perspective.
1 parent 6ef689f commit e8e852a

40 files changed

+9697
-6346
lines changed

.github/workflows/ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828

2929
- name: Install system dependencies for CI
3030
run: |
31-
sudo apt-get install time git libgit2-dev ninja-build libyaml-dev
31+
sudo apt-get install time git libgit2-dev ninja-build libyaml-dev libgraphviz-dev
3232
3333
- name: Install dependencies
3434
run: |

.github/workflows/reviewdog.yml

+4
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ jobs:
1818
with:
1919
python-version: 3.9
2020

21+
- name: Install system dependencies for CI
22+
run: |
23+
sudo apt-get install libgraphviz-dev
24+
2125
- name: Install dependencies
2226
run: |
2327
python -m pip install --upgrade pip

.isort.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[settings]
2-
known_third_party = PyQt5,_pytest,argparse_utils,benchbuild,click,cryptography,distro,git,github,graphviz,jinja2,matplotlib,numpy,packaging,pandas,pkg_resources,plotly,plumbum,pydriller,pygit2,pygtrie,pylatex,pytest,requests,rich,scipy,seaborn,setuptools,sklearn,tabulate,yaml
2+
known_third_party = PyQt5,_pytest,argparse_utils,benchbuild,click,cryptography,distro,git,github,graphviz,jinja2,matplotlib,networkx,numpy,packaging,pandas,pkg_resources,plotly,plumbum,pydriller,pygit2,pygtrie,pylatex,pytest,requests,rich,scipy,seaborn,setuptools,sklearn,tabulate,yaml
33
multi_line_output=3
44
use_parentheses = True
55
include_trailing_comma: True

.pylintrc

+5-1
Original file line numberDiff line numberDiff line change
@@ -254,10 +254,14 @@ function-naming-style=snake_case
254254
#function-rgx=
255255

256256
# Good variable names which should always be accepted, separated by a comma.
257-
good-names=i,
257+
good-names=a,
258+
b,
259+
i,
258260
j,
259261
k,
260262
n,
263+
x,
264+
y,
261265
ax,
262266
ex,
263267
Run,

mypy.ini

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ ignore_missing_imports = True
2121
[mypy-matplotlib.*]
2222
ignore_missing_imports = True
2323

24-
[mypy-pandas.*]
24+
[mypy-networkx.*]
2525
ignore_missing_imports = True
2626

27-
[mypy-numpy.*]
27+
[mypy-pandas.*]
2828
ignore_missing_imports = True
2929

3030
[mypy-plotly.*]

requirements.txt

+2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Jinja2>=3.0.1
77
jupyter>=1.0.0
88
kaleido>=0.2.1
99
matplotlib>=3.1.2
10+
networkx>=2.5
1011
numpy>=1.21
1112
packaging>=20.1
1213
pandas>=0.22.0
@@ -17,6 +18,7 @@ pre-commit>=2.3.0
1718
PyDriller>=2.0
1819
pygit2>=0.28.2
1920
PyGithub>=1.47
21+
pygraphviz>=1.7
2022
pygtrie
2123
pylatex>=1.4.1
2224
PyQt5>=5.10.0,<5.14.0

tests/TEST_INPUTS/results/xz/BRE-BR-xz-xz-2f0bc9cd40_9e238675-ee7c-4325-8e9f-8ccf6fd3f05c_success.yaml

+586-586
Large diffs are not rendered by default.

tests/TEST_INPUTS/results/xz/BRE-BR-xz-xz-c5c7ceb08a_77a6c5bc-e5c7-4532-8814-70dbcc6b5dda_success.yaml

+3,437-3,437
Large diffs are not rendered by default.

tests/TEST_INPUTS/results/xz/BRE-BR-xz-xz-ef364d3abc_feeeecb2-1826-49e5-a188-d4d883f06d00_success.yaml

+2,061-2,061
Large diffs are not rendered by default.

tests/paper_mgmt/test_case_study.py

+35
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from tests.test_utils import run_in_test_environment, UnitTestInputs
99
from varats.data.reports.commit_report import CommitReport as CR
1010
from varats.paper_mgmt.paper_config import get_paper_config, load_paper_config
11+
from varats.projects.discover_projects import initialize_projects
1112
from varats.report.report import FileStatusExtension, ReportFilename
1213
from varats.utils.git_util import FullCommitHash, ShortCommitHash
1314
from varats.utils.settings import vara_cfg
@@ -16,6 +17,40 @@
1617
class TestCaseStudyRevisionLookupFunctions(unittest.TestCase):
1718
"""Test if revision look up functions find the correct revisions."""
1819

20+
@classmethod
21+
def setUpClass(cls):
22+
initialize_projects()
23+
24+
@run_in_test_environment(
25+
UnitTestInputs.PAPER_CONFIGS, UnitTestInputs.RESULT_FILES
26+
)
27+
def test_newest_processed_revision(self) -> None:
28+
"""Check whether the newest processed revision is correctly
29+
identified."""
30+
vara_cfg()['paper_config']['current_config'] = "test_revision_lookup"
31+
load_paper_config()
32+
33+
newest_processed = MCS.newest_processed_revision_for_case_study(
34+
get_paper_config().get_case_studies('brotli')[0], CR
35+
)
36+
37+
self.assertEqual(
38+
FullCommitHash('21ac39f7c8ca61c855be0bc38900abe7b5a0f67f'),
39+
newest_processed
40+
)
41+
42+
@run_in_test_environment(UnitTestInputs.PAPER_CONFIGS)
43+
def test_newest_processed_revision_no_results(self) -> None:
44+
"""Check None is returned when no results are available."""
45+
vara_cfg()['paper_config']['current_config'] = "test_revision_lookup"
46+
load_paper_config()
47+
48+
newest_processed = MCS.newest_processed_revision_for_case_study(
49+
get_paper_config().get_case_studies('brotli')[0], CR
50+
)
51+
52+
self.assertIsNone(newest_processed)
53+
1954
@run_in_test_environment(
2055
UnitTestInputs.PAPER_CONFIGS, UnitTestInputs.RESULT_FILES
2156
)

tests/plots/test_chord_plot_utils.py

+164
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
"""Test chord plot utils."""
2+
import typing as tp
3+
import unittest
4+
5+
import numpy as np
6+
import numpy.typing as nptp
7+
8+
from varats.plots.chord_plot_utils import (
9+
_angular_to_cartesian,
10+
_make_equilateral_triangle,
11+
_make_bezier_curve,
12+
_modulo_ab,
13+
get_color_at,
14+
_make_arc,
15+
_calculate_ideogram_data,
16+
_calculate_ribbon_data,
17+
_calculate_arc_bounds,
18+
_calculate_node_placements,
19+
)
20+
21+
22+
class TestChordPlotUtils(unittest.TestCase):
23+
"""Test chord plot utils."""
24+
25+
def _assert_point_equals(self, point_a, point_b):
26+
self.assertAlmostEqual(point_a[0], point_b[0])
27+
self.assertAlmostEqual(point_a[1], point_b[1])
28+
29+
def _assert_points_equal(self, points_a, points_b):
30+
self.assertEqual(len(points_a), len(points_b))
31+
for a, b in zip(points_a, points_b):
32+
self._assert_point_equals(a, b)
33+
34+
def test_angular_to_cartesian(self):
35+
result = _angular_to_cartesian([0, 0])
36+
self._assert_point_equals([0, 0], result)
37+
38+
result = _angular_to_cartesian([1, 0])
39+
self._assert_point_equals([1, 0], result)
40+
41+
result = _angular_to_cartesian([1, 0.5 * np.pi])
42+
self._assert_point_equals([0, 1], result)
43+
44+
result = _angular_to_cartesian([0.5, np.pi])
45+
self._assert_point_equals([-0.5, 0], result)
46+
47+
def test_equilateral_triangle(self):
48+
result = _make_equilateral_triangle(np.array([0, 0]), np.array([1, 0]))
49+
self._assert_point_equals([0.5, 0.5 * np.sqrt(3)], result)
50+
51+
def test_make_bezier_curve_parabola(self):
52+
result = _make_bezier_curve(
53+
np.array([[0, 0], [0.5, 0.5 * np.sqrt(3)], [1, 0]]), 5
54+
)
55+
# A quadratic Bézier curve is a parabola of the form a*t^2 + b*t + c.
56+
# Since the first and last control point in this test have x=0,
57+
# we can simplify this formula:
58+
a = -np.sqrt(3)
59+
b = np.sqrt(3)
60+
61+
def curve(t: float) -> float:
62+
return a * t * t + b * t
63+
64+
self._assert_points_equal([(t, curve(t)) for t in np.linspace(0, 1, 5)],
65+
result)
66+
67+
def test_make_arc(self):
68+
result = _make_arc(np.array([1, 0]), np.array([-1, 0]), 5)
69+
self._assert_points_equal([
70+
(np.cos(t), np.sin(t)) for t in np.linspace(0, np.pi, 5)
71+
], result)
72+
73+
def test_modulo_ab(self):
74+
self.assertAlmostEqual(3, _modulo_ab(3, 2, 4))
75+
self.assertAlmostEqual(3, _modulo_ab(1, 2, 4))
76+
self.assertAlmostEqual(3, _modulo_ab(5, 2, 4))
77+
self.assertAlmostEqual(2, _modulo_ab(2, 2, 4))
78+
self.assertAlmostEqual(2, _modulo_ab(4, 2, 4))
79+
80+
def test_get_color_at(self):
81+
colorscale = [[0, "rgb(100.0, 0.0, 0.0)"], [1, "rgb(0.0, 0.0, 0.0)"]]
82+
self.assertEqual("rgb(100.0, 0.0, 0.0)", get_color_at(colorscale, -0.5))
83+
self.assertEqual("rgb(0.0, 0.0, 0.0)", get_color_at(colorscale, 1.5))
84+
self.assertEqual("rgb(50.0, 0.0, 0.0)", get_color_at(colorscale, 0.5))
85+
self.assertEqual("rgb(75.0, 0.0, 0.0)", get_color_at(colorscale, 0.25))
86+
87+
def test_calculate_ideogram_data(self):
88+
ends, colors = _calculate_ideogram_data([1, 2, 3])
89+
self._assert_points_equal([[0, np.pi / 3], [np.pi / 3, np.pi],
90+
[np.pi, 2 * np.pi]], ends)
91+
self.assertEqual(3, len(colors))
92+
93+
def test_calculate_ribbon_data(self):
94+
node_a = "a"
95+
node_b = "b"
96+
node_c = "c"
97+
node_data = {"color": 0, "info": ""}
98+
edges = [
99+
(node_a, node_c, {
100+
"color": 0,
101+
"info": "",
102+
"size": 1
103+
}),
104+
(node_b, node_c, {
105+
"color": 0,
106+
"info": "",
107+
"size": 2
108+
}),
109+
]
110+
nodes = [
111+
(node_a, node_data),
112+
(node_b, node_data),
113+
(node_c, node_data),
114+
]
115+
node_sizes = [1, 2, 3]
116+
ideo_ends, ideo_colors = _calculate_ideogram_data([1, 2, 3])
117+
bounds, colors = _calculate_ribbon_data(
118+
nodes, edges, node_sizes, ideo_ends, ideo_colors
119+
)
120+
self._assert_points_equal([[0, np.pi / 3], [5 * np.pi / 3, 2 * np.pi]],
121+
bounds[0])
122+
self._assert_points_equal([[np.pi / 3, np.pi], [np.pi, 5 * np.pi / 3]],
123+
bounds[1])
124+
self.assertEqual(2, len(colors))
125+
126+
def test_calculate_node_placements(self):
127+
placements = _calculate_node_placements([1, 2, 3])
128+
self.assertEqual(0.5, placements[0])
129+
self.assertEqual(2, placements[1])
130+
self.assertEqual(4.5, placements[2])
131+
132+
def test_calculate_arc_bounds(self):
133+
node_a = "a"
134+
node_b = "b"
135+
node_c = "c"
136+
node_data = {"fill_color": 0, "line_color": 0, "info": "", "size": 0}
137+
edges = [
138+
(node_a, node_c, {
139+
"color": 0,
140+
"info": "",
141+
"size": 1
142+
}),
143+
(node_b, node_a, {
144+
"color": 0,
145+
"info": "",
146+
"size": 1
147+
}),
148+
(node_b, node_c, {
149+
"color": 0,
150+
"info": "",
151+
"size": 2
152+
}),
153+
]
154+
nodes = [
155+
(node_a, node_data),
156+
(node_b, node_data),
157+
(node_c, node_data),
158+
]
159+
node_placements = _calculate_node_placements([1, 2, 3])
160+
161+
bounds = _calculate_arc_bounds(nodes, edges, node_placements)
162+
self._assert_points_equal([[0.5, 0], [4.5, 0]], bounds[0])
163+
self._assert_points_equal([[2, 0], [0.5, 0]], bounds[1])
164+
self._assert_points_equal([[2, 0], [4.5, 0]], bounds[2])
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
"""Test blame interaction graphs."""
2+
3+
import unittest
4+
5+
import pytest
6+
7+
from tests.test_utils import run_in_test_environment, UnitTestInputs
8+
from varats.data.reports.blame_interaction_graph import (
9+
create_blame_interaction_graph,
10+
create_file_based_interaction_graph,
11+
get_author_data,
12+
)
13+
from varats.data.reports.blame_report import BlameReport
14+
from varats.paper_mgmt.case_study import (
15+
newest_processed_revision_for_case_study,
16+
)
17+
from varats.paper_mgmt.paper_config import load_paper_config, get_paper_config
18+
from varats.projects.discover_projects import initialize_projects
19+
from varats.utils.settings import vara_cfg
20+
21+
22+
class TestBlameInteractionGraphs(unittest.TestCase):
23+
"""Test if blame interaction graphs are constructed correctly."""
24+
25+
@classmethod
26+
def setUpClass(cls):
27+
initialize_projects()
28+
29+
@run_in_test_environment(
30+
UnitTestInputs.PAPER_CONFIGS, UnitTestInputs.RESULT_FILES
31+
)
32+
def test_blame_interaction_graph(self) -> None:
33+
"""Test whether blame interaction graphs are created correctly."""
34+
vara_cfg()['paper_config']['current_config'] = "test_casestudy_status"
35+
load_paper_config()
36+
37+
revision = newest_processed_revision_for_case_study(
38+
get_paper_config().get_case_studies("xz")[0], BlameReport
39+
)
40+
blame_interaction_graph = create_blame_interaction_graph("xz", revision)
41+
42+
self.assertEqual(blame_interaction_graph.project_name, "xz")
43+
44+
cig = blame_interaction_graph.commit_interaction_graph()
45+
self.assertEqual(124, len(cig.nodes))
46+
self.assertEqual(928, len(cig.edges))
47+
48+
aig = blame_interaction_graph.author_interaction_graph()
49+
self.assertEqual(1, len(aig.nodes))
50+
self.assertEqual(0, len(aig.edges))
51+
52+
caig = blame_interaction_graph.commit_author_interaction_graph()
53+
self.assertEqual(125, len(caig.nodes))
54+
self.assertEqual(92, len(caig.edges))
55+
56+
@pytest.mark.slow
57+
@run_in_test_environment(
58+
UnitTestInputs.PAPER_CONFIGS, UnitTestInputs.RESULT_FILES
59+
)
60+
def test_file_based_interaction_graph(self) -> None:
61+
"""Test whether file-based interaction graphs are created correctly."""
62+
vara_cfg()['paper_config']['current_config'] = "test_casestudy_status"
63+
load_paper_config()
64+
65+
revision = newest_processed_revision_for_case_study(
66+
get_paper_config().get_case_studies("xz")[0], BlameReport
67+
)
68+
blame_interaction_graph = create_file_based_interaction_graph(
69+
"xz", revision
70+
)
71+
72+
self.assertEqual(blame_interaction_graph.project_name, "xz")
73+
74+
cig = blame_interaction_graph.commit_interaction_graph()
75+
self.assertEqual(482, len(cig.nodes))
76+
self.assertEqual(16518, len(cig.edges))
77+
78+
aig = blame_interaction_graph.author_interaction_graph()
79+
self.assertEqual(4, len(aig.nodes))
80+
self.assertEqual(6, len(aig.edges))
81+
82+
caig = blame_interaction_graph.commit_author_interaction_graph()
83+
self.assertEqual(486, len(caig.nodes))
84+
self.assertEqual(509, len(caig.edges))
85+
86+
@run_in_test_environment(
87+
UnitTestInputs.PAPER_CONFIGS, UnitTestInputs.RESULT_FILES
88+
)
89+
def test_get_author_data(self) -> None:
90+
"""Check whether author data is retrieved correctly from the author
91+
interaction graph."""
92+
vara_cfg()['paper_config']['current_config'] = "test_casestudy_status"
93+
load_paper_config()
94+
95+
revision = newest_processed_revision_for_case_study(
96+
get_paper_config().get_case_studies("xz")[0], BlameReport
97+
)
98+
blame_interaction_graph = create_blame_interaction_graph("xz", revision)
99+
100+
self.assertEqual(blame_interaction_graph.project_name, "xz")
101+
102+
aig = blame_interaction_graph.author_interaction_graph()
103+
author_data = get_author_data(aig, "Lasse Collin")
104+
self.assertEqual(author_data["node_attrs"]["author"], "Lasse Collin")
105+
self.assertEqual(author_data["neighbors"], set())
106+
self.assertEqual(0, len(author_data["in_attrs"]))
107+
self.assertEqual(0, len(author_data["out_attrs"]))

0 commit comments

Comments
 (0)