|
| 1 | +import json |
| 2 | +import os |
| 3 | +import subprocess |
| 4 | +import sys |
| 5 | +from datetime import datetime |
| 6 | + |
| 7 | +from bs4 import BeautifulSoup |
| 8 | + |
| 9 | + |
| 10 | +json_file_path = "tutorials-review-data.json" |
| 11 | + |
| 12 | +# paths to skip from the post-processing script |
| 13 | +paths_to_skip = [ |
| 14 | + "beginner/examples_autograd/two_layer_net_custom_function", # not present in the repo |
| 15 | + "beginner/examples_nn/two_layer_net_module", # not present in the repo |
| 16 | + "beginner/examples_tensor/two_layer_net_numpy", # not present in the repo |
| 17 | + "beginner/examples_tensor/two_layer_net_tensor", # not present in the repo |
| 18 | + "beginner/examples_autograd/two_layer_net_autograd", # not present in the repo |
| 19 | + "beginner/examples_nn/two_layer_net_optim", # not present in the repo |
| 20 | + "beginner/examples_nn/two_layer_net_nn", # not present in the repo |
| 21 | + "intermediate/coding_ddpg", # not present in the repo - will delete the carryover |
| 22 | +] |
| 23 | +# Mapping of source directories to build directories |
| 24 | +source_to_build_mapping = { |
| 25 | + "beginner": "beginner_source", |
| 26 | + "recipes": "recipes_source", |
| 27 | + "distributed": "distributed", |
| 28 | + "intermediate": "intermediate_source", |
| 29 | + "prototype": "prototype_source", |
| 30 | + "advanced": "advanced_source", |
| 31 | + "": "", # root dir for index.rst |
| 32 | +} |
| 33 | + |
| 34 | +def get_git_log_date(file_path, git_log_args): |
| 35 | + try: |
| 36 | + result = subprocess.run( |
| 37 | + ["git", "log"] + git_log_args + ["--", file_path], |
| 38 | + capture_output=True, |
| 39 | + text=True, |
| 40 | + check=True, |
| 41 | + ) |
| 42 | + if result.stdout: |
| 43 | + date_str = result.stdout.splitlines()[0] |
| 44 | + return datetime.strptime(date_str, "%a, %d %b %Y %H:%M:%S %z") |
| 45 | + except subprocess.CalledProcessError: |
| 46 | + pass |
| 47 | + raise ValueError(f"Could not find date for {file_path}") |
| 48 | + |
| 49 | +def get_creation_date(file_path): |
| 50 | + return get_git_log_date(file_path, ["--diff-filter=A", "--format=%aD"]).strftime("%b %d, %Y") |
| 51 | + |
| 52 | + |
| 53 | +def get_last_updated_date(file_path): |
| 54 | + return get_git_log_date(file_path, ["-1", "--format=%aD"]).strftime("%b %d, %Y") |
| 55 | + |
| 56 | +# Try to find the source file with the given base path and the extensions .rst and .py |
| 57 | +def find_source_file(base_path): |
| 58 | + for ext in [".rst", ".py"]: |
| 59 | + source_file_path = base_path + ext |
| 60 | + if os.path.exists(source_file_path): |
| 61 | + return source_file_path |
| 62 | + return None |
| 63 | + |
| 64 | + |
| 65 | +# Function to process a JSON file and insert the "Last Verified" information into the HTML files |
| 66 | +def process_json_file(build_dir , json_file_path): |
| 67 | + with open(json_file_path, "r", encoding="utf-8") as json_file: |
| 68 | + json_data = json.load(json_file) |
| 69 | + |
| 70 | + for entry in json_data: |
| 71 | + path = entry["Path"] |
| 72 | + last_verified = entry["Last Verified"] |
| 73 | + status = entry.get("Status", "") |
| 74 | + if path in paths_to_skip: |
| 75 | + print(f"Skipping path: {path}") |
| 76 | + continue |
| 77 | + if status in ["needs update", "not verified"]: |
| 78 | + formatted_last_verified = "Not Verified" |
| 79 | + elif last_verified: |
| 80 | + try: |
| 81 | + last_verified_date = datetime.strptime(last_verified, "%Y-%m-%d") |
| 82 | + formatted_last_verified = last_verified_date.strftime("%b %d, %Y") |
| 83 | + except ValueError: |
| 84 | + formatted_last_verified = "Unknown" |
| 85 | + else: |
| 86 | + formatted_last_verified = "Not Verified" |
| 87 | + if status == "deprecated": |
| 88 | + formatted_last_verified += "Deprecated" |
| 89 | + |
| 90 | + for build_subdir, source_subdir in source_to_build_mapping.items(): |
| 91 | + if path.startswith(build_subdir): |
| 92 | + html_file_path = os.path.join(build_dir, path + ".html") |
| 93 | + base_source_path = os.path.join( |
| 94 | + source_subdir, path[len(build_subdir) + 1 :] |
| 95 | + ) |
| 96 | + source_file_path = find_source_file(base_source_path) |
| 97 | + break |
| 98 | + else: |
| 99 | + print(f"Warning: No mapping found for path {path}") |
| 100 | + continue |
| 101 | + |
| 102 | + if not os.path.exists(html_file_path): |
| 103 | + print( |
| 104 | + f"Warning: HTML file not found for path {html_file_path}." |
| 105 | + "If this is a new tutorial, please add it to the audit JSON file and set the Verified status and todays's date." |
| 106 | + ) |
| 107 | + continue |
| 108 | + |
| 109 | + if not source_file_path: |
| 110 | + print(f"Warning: Source file not found for path {base_source_path}.") |
| 111 | + continue |
| 112 | + |
| 113 | + created_on = get_creation_date(source_file_path) |
| 114 | + last_updated = get_last_updated_date(source_file_path) |
| 115 | + |
| 116 | + with open(html_file_path, "r", encoding="utf-8") as file: |
| 117 | + soup = BeautifulSoup(file, "html.parser") |
| 118 | + # Check if the <p> tag with class "date-info-last-verified" already exists |
| 119 | + existing_date_info = soup.find("p", {"class": "date-info-last-verified"}) |
| 120 | + if existing_date_info: |
| 121 | + print( |
| 122 | + f"Warning: <p> tag with class 'date-info-last-verified' already exists in {html_file_path}" |
| 123 | + ) |
| 124 | + continue |
| 125 | + |
| 126 | + h1_tag = soup.find("h1") # Find the h1 tag to insert the dates |
| 127 | + if h1_tag: |
| 128 | + date_info_tag = soup.new_tag("p", **{"class": "date-info-last-verified"}) |
| 129 | + date_info_tag["style"] = "color: #6c6c6d; font-size: small;" |
| 130 | + # Add the "Created On", "Last Updated", and "Last Verified" information |
| 131 | + date_info_tag.string = ( |
| 132 | + f"Created On: {created_on} | " |
| 133 | + f"Last Updated: {last_updated} | " |
| 134 | + f"Last Verified: {formatted_last_verified}" |
| 135 | + ) |
| 136 | + # Insert the new tag after the <h1> tag |
| 137 | + h1_tag.insert_after(date_info_tag) |
| 138 | + # Save back to the HTML. |
| 139 | + with open(html_file_path, "w", encoding="utf-8") as file: |
| 140 | + file.write(str(soup)) |
| 141 | + else: |
| 142 | + print(f"Warning: <h1> tag not found in {html_file_path}") |
| 143 | + |
| 144 | + |
| 145 | +def main(): |
| 146 | + if len(sys.argv) < 2: |
| 147 | + print("Error: Build directory not provided. Exiting.") |
| 148 | + exit(1) |
| 149 | + build_dir = sys.argv[1] |
| 150 | + print(f"Build directory: {build_dir}") |
| 151 | + process_json_file(build_dir , json_file_path) |
| 152 | + print( |
| 153 | + "Finished processing JSON file. Please check the output for any warnings. " |
| 154 | + "Pages like `nlp/index.html` are generated only during the full `make docs` " |
| 155 | + "or `make html` build. Warnings about these files when you run `make html-noplot` " |
| 156 | + "can be ignored." |
| 157 | + ) |
| 158 | + |
| 159 | +if __name__ == "__main__": |
| 160 | + main() |
0 commit comments