|
1 | 1 | """Use the GraphQL api to grab issues/PRs that match a query."""
|
2 | 2 | import datetime
|
3 | 3 | import os
|
| 4 | +import re |
| 5 | +import shlex |
| 6 | +import subprocess |
4 | 7 | import sys
|
5 | 8 | import urllib
|
6 | 9 | from pathlib import Path
|
7 | 10 | from subprocess import PIPE
|
8 | 11 | from subprocess import run
|
| 12 | +from tempfile import TemporaryDirectory |
9 | 13 |
|
10 | 14 | import dateutil
|
11 | 15 | import numpy as np
|
@@ -153,6 +157,129 @@ def get_activity(
|
153 | 157 | return query_data
|
154 | 158 |
|
155 | 159 |
|
| 160 | +def generate_all_activity_md( |
| 161 | + target, |
| 162 | + pattern=r"(v?\d+\.\d+\.\d+)$", |
| 163 | + kind=None, |
| 164 | + auth=None, |
| 165 | + tags=None, |
| 166 | + include_issues=False, |
| 167 | + include_opened=False, |
| 168 | + strip_brackets=False, |
| 169 | + branch=None, |
| 170 | +): |
| 171 | + """Generate a full markdown changelog of GitHub activity of a repo based on release tags. |
| 172 | +
|
| 173 | + Parameters |
| 174 | + ---------- |
| 175 | + target : string |
| 176 | + The GitHub organization/repo for which you want to grab recent issues/PRs. |
| 177 | + Can either be *just* and organization (e.g., `jupyter`) or a combination |
| 178 | + organization and repo (e.g., `jupyter/notebook`). If the former, all |
| 179 | + repositories for that org will be used. If the latter, only the specified |
| 180 | + repository will be used. Can also be a URL to a GitHub org or repo. |
| 181 | + pattern: str |
| 182 | + The expression used to match a release tag. |
| 183 | + kind : ["issue", "pr"] | None |
| 184 | + Return only issues or PRs. If None, both will be returned. |
| 185 | + auth : string | None |
| 186 | + An authentication token for GitHub. If None, then the environment |
| 187 | + variable `GITHUB_ACCESS_TOKEN` will be tried. |
| 188 | + tags : list of strings | None |
| 189 | + A list of the tags to use in generating subsets of PRs for the markdown report. |
| 190 | + Must be one of: |
| 191 | +
|
| 192 | + ['enhancement', 'bugs', 'maintenance', 'documentation', 'api_change'] |
| 193 | +
|
| 194 | + If None, all of the above tags will be used. |
| 195 | + include_issues : bool |
| 196 | + Include Issues in the markdown output. Default is False. |
| 197 | + include_opened : bool |
| 198 | + Include a list of opened items in the markdown output. Default is False. |
| 199 | + strip_brackets : bool |
| 200 | + If True, strip any text between brackets at the beginning of the issue/PR title. |
| 201 | + E.g., [MRG], [DOC], etc. |
| 202 | + branch : string | None |
| 203 | + The branch or reference name to filter pull requests by. |
| 204 | +
|
| 205 | + Returns |
| 206 | + ------- |
| 207 | + entry: str |
| 208 | + The markdown changelog entry for all of the release tags in the repo. |
| 209 | + """ |
| 210 | + # Get the sha and tag name for each tag in the target repo |
| 211 | + with TemporaryDirectory() as td: |
| 212 | + |
| 213 | + subprocess.run( |
| 214 | + shlex.split(f"git clone https://github.com/{target} repo"), cwd=td |
| 215 | + ) |
| 216 | + repo = os.path.join(td, "repo") |
| 217 | + subprocess.run(shlex.split("git fetch origin --tags"), cwd=repo) |
| 218 | + |
| 219 | + cmd = 'git log --tags --simplify-by-decoration --pretty="format:%h | %D"' |
| 220 | + data = ( |
| 221 | + subprocess.check_output(shlex.split(cmd), cwd=repo) |
| 222 | + .decode("utf-8") |
| 223 | + .splitlines() |
| 224 | + ) |
| 225 | + |
| 226 | + # Clean up the raw data |
| 227 | + pattern = f"tag: {pattern}" |
| 228 | + |
| 229 | + def filter(datum): |
| 230 | + _, tag = datum |
| 231 | + # Handle the HEAD tag if it exists |
| 232 | + if "," in tag: |
| 233 | + tag = tag.split(", ")[1] |
| 234 | + return re.match(pattern, tag) is not None |
| 235 | + |
| 236 | + data = [d.split(" | ") for (i, d) in enumerate(data)] |
| 237 | + data = [d for d in data if filter(d)] |
| 238 | + |
| 239 | + # Generate a changelog entry for each version and sha range |
| 240 | + output = "" |
| 241 | + |
| 242 | + for i in range(len(data) - 1): |
| 243 | + curr_data = data[i] |
| 244 | + prev_data = data[i + 1] |
| 245 | + |
| 246 | + since = prev_data[0] |
| 247 | + until = curr_data[0] |
| 248 | + |
| 249 | + # Handle the HEAD tag if it exists |
| 250 | + if "," in curr_data[1]: |
| 251 | + curr_data[1] = curr_data[1].split(",")[1] |
| 252 | + |
| 253 | + match = re.search(pattern, curr_data[1]) |
| 254 | + tag = match.groups()[0] |
| 255 | + |
| 256 | + print(f"\n({i + 1}/{len(data)})", since, until, tag, file=sys.stderr) |
| 257 | + md = generate_activity_md( |
| 258 | + target, |
| 259 | + since=since, |
| 260 | + heading_level=2, |
| 261 | + until=until, |
| 262 | + auth=auth, |
| 263 | + kind=kind, |
| 264 | + include_issues=include_issues, |
| 265 | + include_opened=include_opened, |
| 266 | + strip_brackets=strip_brackets, |
| 267 | + branch=branch, |
| 268 | + ) |
| 269 | + |
| 270 | + if not md: |
| 271 | + continue |
| 272 | + |
| 273 | + # Replace the header line with our version tag |
| 274 | + md = "\n".join(md.splitlines()[1:]) |
| 275 | + |
| 276 | + output += f""" |
| 277 | +## {tag} |
| 278 | +{md} |
| 279 | +""" |
| 280 | + return output |
| 281 | + |
| 282 | + |
156 | 283 | def generate_activity_md(
|
157 | 284 | target,
|
158 | 285 | since=None,
|
@@ -208,13 +335,12 @@ def generate_activity_md(
|
208 | 335 | By default, top-level heading is h1, sections are h2.
|
209 | 336 | With heading_level=2 those are increased to h2 and h3, respectively.
|
210 | 337 | branch : string | None
|
211 |
| - The branch or reference name to filter pull requests by |
| 338 | + The branch or reference name to filter pull requests by. |
212 | 339 |
|
213 | 340 | Returns
|
214 | 341 | -------
|
215 |
| - query_data : pandas DataFrame |
216 |
| - A munged collection of data returned from your query. This |
217 |
| - will be a combination of issues and PRs. |
| 342 | + entry: str |
| 343 | + The markdown changelog entry. |
218 | 344 | """
|
219 | 345 | org, repo = _parse_target(target)
|
220 | 346 |
|
|
0 commit comments