-
Notifications
You must be signed in to change notification settings - Fork 249
/
Copy pathpush-pkg-info.py
135 lines (112 loc) · 4.58 KB
/
push-pkg-info.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python3
import os, glob, sys, re, json
from datetime import datetime
from es_utils import send_payload
from cmsutils import cmsswIB2Week
from hashlib import sha1
def get_current_time():
"""Returns current time in milliseconds."""
current_time = datetime.utcnow() - datetime(1970, 1, 1)
current_time = round(current_time.total_seconds() * 1000)
return current_time
def extract_packages(package_file):
"""Extracts package information from json file."""
with open(package_file, "r") as f:
package_data = json.load(f)
package_dict = {}
for package_key in package_data:
package_info = package_data[package_key]
package_name = package_info["name"]
if process_type == "release":
package_version = package_info["version"]
package_version = package_version.rsplit("-", 1)[0]
else:
package_version = package_info["realversion"]
package_dict[package_name] = package_version
return package_dict
def parse_ib_folder_name(folder_name):
match = re.match(r"^(CMSSW_\d+_\d+)(_?[^_]+)?(_X)?_(\d{4}-\d{2}-\d{2}-\d{4})$", folder_name)
if not match:
print(f"Folder name '{folder_name}' doesn't match the expected pattern")
return None
version = match.group(1)
flavor_part = match.group(2) if match.group(2) else ""
flavor = flavor_part.lstrip("_")
if not flavor:
flavor = "X"
elif flavor == "X":
flavor = "DEFAULT"
date = match.group(4)
return version, flavor, date
def parse_releases_path(path):
architecture_pattern = r"/cms/([^/]+)/"
version_pattern = r"/([^/]+)\.json$"
date = ""
architecture_match = re.search(architecture_pattern, path)
if architecture_match:
architecture = architecture_match.group(1)
else:
architecture = "Not found"
version_match = re.search(version_pattern, path)
if version_match:
full_version = version_match.group(1)
version_match = re.search(r"(CMSSW_\d+_\d+(_\d+)*)(_(.*))?", full_version)
if version_match:
release_cycle = version_match.group(1)
flavor = version_match.group(4) if version_match.group(4) else ""
release_name = f"{release_cycle}_{flavor}" if flavor else release_cycle
else:
release_cycle = "Not found"
flavor = ""
release_name = "Not found"
else:
release_cycle = "Not found"
flavor = ""
release_name = "Not found"
return architecture, release_name, release_cycle, flavor, date
def extract_and_upload(directory):
result = {}
files = glob.glob(directory)
for package_file in files:
print("--> Processing file: ", package_file)
if process_type == "release":
architecture, name, release_cycle, flavor, date = parse_releases_path(file_path)
index = "cmssw-pkginfo"
if architecture != "Not found" and release_name != "Not found":
packages = extract_packages(package_file)
else:
release_cycle, flavor, date = parse_ib_folder_name(package_file.split("/")[6])
architecture = package_file.split("/")[7]
name = package_file.split("/")[6]
weeknum, _ = cmsswIB2Week(name)
index = "cmssw-pkginfo-" + str(weeknum)
packages = extract_packages(package_file)
for package in packages:
payload = {
"name": name,
"release_cycle": release_cycle,
"flavor": flavor,
"date": date,
"architecture": architecture,
"@timestamp": get_current_time(),
package: packages[package],
}
unique_id = f"{release_cycle}_{flavor}_{date}_{architecture}_{package}"
id = sha1(unique_id.encode("utf-8")).hexdigest()
document = "cmssw-pkginfo"
# Upload one entry per package
print(payload)
send_payload(index, document, id, json.dumps(payload))
return result
process_type = sys.argv[1]
if process_type == "release":
print("Processing Releases...")
directory = "/data/cmssw/repos/cms/*_*_*/*/WEB/*/cms+cmssw+CMSSW*.json" # cmsrep path
else: # integration builds
print("Processing IBs...")
directory = (
"/data/sdt/SDT/jenkins-artifacts/build-any-ib/*/*_*_*/*/DEPS/cmssw-ib.json" # cmssdt path
)
extract_and_upload(directory)
# TODO: Check if data is already in OpenSearch before pushing it (get_payload_wscroll)
# TODO: Check if IB data in cmssdt goes first to cmsrep (I didn't find it)