-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathupsert.py
252 lines (227 loc) · 8.8 KB
/
upsert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
"""
Inserts / updates README.md based on README_template.md.
Also prints out some colored statistics in the terminal.
Use without parameters.
Assumes AoC formatted ranks within the input/personal-times-ranks-scores folder.
Currently, GitHub is not on mermaid version required for xycharts (10.6.1). Hence,
the template contains a reference to the `cumulative-rank-frequency.svg` which needs to
be kept up to date. Use an online tool to convert the xychart mermaid to svg and remove
the mermaid part from the README.md before committing.
(`https://mermaid.live/` does not offer svg export so take it directly from the DOM
and format it with `https://jsonformatter.org/xml-formatter`)
"""
import itertools as it
import math
import os
import re
def colored(text, color='w'):
color_code = ''
match color:
case 'r': color_code = '91m'
case 'g': color_code = '92m'
if color_code == '':
return text
return '\033[' + color_code + text + '\033[0m'
def get_color_within_gradient(gradient_by_color_pairs: list[tuple[str, str]],
relative_position: float) -> str:
if relative_position >= 1:
return gradient_by_color_pairs[-1][1]
if relative_position < 0:
return gradient_by_color_pairs[0][0]
grad_by_int_pairs = [
tuple(
tuple(int(coge, 16) for coge in [cog[:2], cog[2:4], cog[4:]])
for cog in gbycp
)
for gbycp in gradient_by_color_pairs
]
grad_interval_idx = math.floor(len(grad_by_int_pairs) * relative_position)
weight_on_upper_bound = relative_position * len(grad_by_int_pairs) - grad_interval_idx
int_color = 0
for lo, hi in zip(*grad_by_int_pairs[grad_interval_idx]):
int_color *= 256
int_color += math.floor((1 - weight_on_upper_bound) * lo + weight_on_upper_bound * hi)
res = f'{int_color:06x}'
return res
def color_for_value(value, minimum, maximum, small_is_good):
if not small_is_good:
maximum, minimum = minimum, maximum
if value == minimum:
return 'g'
if value == maximum:
return 'r'
return 'w'
def get_formatted_history_line(line, col_widths, colors=None):
if colors is None:
colors = ['w'] * len(col_widths)
return ' | '. join([
colored((' ' * cw + e)[-cw:], cl) for cw, e, cl in zip(col_widths, line, colors)
])
def get_formatted_history_tab(table, small_is_good=True, in_color=True):
val_tab = [[int(e.replace('%', '')) for e in l] for l in table[1:]]
col_widths = [max(len(e) for e in col) for col in zip(*table)]
col_extremes = [(min(col), max(col)) for col in zip(*val_tab)]
result = []
result.append(get_formatted_history_line(table[0], col_widths))
result.append('=' * (sum(col_widths) + (len(col_widths) - 1) * 3))
for l, val_row in zip(table[1:], val_tab):
if not in_color:
result.append(get_formatted_history_line(l, col_widths))
else:
colors = [
color_for_value(v, mi, ma, small_is_good)
for (mi, ma), v in zip(col_extremes, val_row)
]
colors[0] = 'w'
result.append(get_formatted_history_line(l, col_widths, colors))
return result
class Participation:
def __init__(self, year, line, star_num: int):
self.year = year
ints = [int(e) for e in re.findall(r'(\d+)', line)]
self.day = ints[0]
base_idx = 1 if star_num == 1 else 6
self.time = ((ints[base_idx] * 60) + ints[base_idx + 1]) * 60 + ints[base_idx + 2]
self.rank = ints[base_idx + 3]
self.score = ints[base_idx + 4]
def get_participations():
result = []
dir_path = 'input/personal-times-ranks-scores/'
for input_file in os.listdir(dir_path):
year = input_file.split('/')[-1].replace('.txt', '')
with open(dir_path + input_file, 'r') as f:
lines = f.read().strip().split('\n')
for l in lines[2:]:
for star_num in range(1, 3):
result += [Participation(year, l, star_num)]
return result
def get_years(participations):
return list(sorted(set(p.year for p in participations)))
def get_participations_by_year(participations, year):
return [p for p in participations if p.year == year]
def get_rank_quantile(participations, quantile_in_percent):
sp = sorted(participations, key=lambda p: p.rank)
idx = max(0, math.ceil(len(participations) * quantile_in_percent / 100) - 1)
return sp[idx].rank
def get_rank_frequency(participations, rank, digits=0) -> float | int:
f_in_perc = sum(p.rank <= rank for p in participations) * 100
if digits:
return f_in_perc * 10**digits // len(participations) / 10**digits
return f_in_perc // len(participations)
def get_rank_buckets():
return [100, 150, 200, 300, 500, 700, 1000, 1500, 2000, 3000, 5000]
def get_color_series():
return ['f84', '1d6', '77f', 'a4a', 'aa3', '3dd']
def get_history(participations):
years = get_years(participations)
rank_buckets = get_rank_buckets()
rank_labels = [str(r) for r in rank_buckets]
quantile_buckets = [0, 5, 10, 20, 30, 50, 75, 90, 100]
quantile_labels = [f'{q}%' for q in quantile_buckets]
quantile_labels[0] = 'Best'
quantile_labels[next(i for i, v in enumerate(quantile_buckets) if v == 50)] = 'Median'
quantile_labels[-1] = 'Worst'
quantile_tab = [['Year'] + quantile_labels]
for y in years:
py = get_participations_by_year(participations, y)
quantile_tab += [[y] + [f'{get_rank_quantile(py, q)}' for q in quantile_buckets]]
rank_tab = [['Year'] + rank_labels]
for y in years:
py = get_participations_by_year(participations, y)
rank_tab += [[y] + [f'{get_rank_frequency(py, r)}%' for r in rank_buckets]]
return quantile_tab, rank_tab
def print_history():
participations = get_participations()
quantile_tab, rank_tab = get_history(participations)
print('\n###################\n# RANKS\n###################\n')
print('\n'.join(get_formatted_history_tab(quantile_tab)))
print()
print('\n'.join(get_formatted_history_tab(rank_tab, small_is_good=False)))
def dashboard_overview(participations):
rank_buckets = [0] + get_rank_buckets() + [max(p.rank for p in participations)]
occurrence_by_rank = [
(sum(r_before < p.rank <= r for p in participations), r_before, r)
for r_before, r in it.pairwise(rank_buckets)
]
# mermaid always sorts pie charts -> use colors to make gradual semantics clear
# sort colors descending for occurrence / ascending for ranks (as mermaid does)
# also adjust series order because old mermaid versions do _not_ sort the legend
res = '```mermaid'
col_gradient = [
# extra long green [1, 500] due to multiple tiny intervals
('80ff80', '40a040'), ('40a040', '004000'),
('00a0ff', '003060'), # blue/cyan for [501, 1000]
('ffff00', '606000'), # yellow [1001, 2000]
('ff4040', '600000'), # red the rest
]
pie_num = len(occurrence_by_rank)
pie_colors_and_occ = [
((o, -r), get_color_within_gradient(col_gradient, i / (pie_num - 1)), (o, rb, r))
for i, (o, rb, r) in enumerate(occurrence_by_rank)
]
pie_colors_and_occ.sort(reverse=True)
pie_colors_str = ', '.join(
f'"pie{i+1}": "#{pc}"'
for i, (_, pc, _) in enumerate(pie_colors_and_occ)
)
res += '\n%%{init: {"themeVariables": {' + pie_colors_str + '}}}%%'
res += '\npie'
res += '\ntitle Ranking within Top-k Segment'
for *_, (o, r_before, r) in pie_colors_and_occ:
res += f'\n"{o} * [{r_before + 1}, {r}]": {o}'
res += '\n```'
return res
def dashboard_history_tables(participations):
quantile_tab, rank_tab = [
'\n'.join(get_formatted_history_tab(h, in_color=False))
for h in get_history(participations)
]
res = f'Rank by Quantile\n```\n{quantile_tab}\n```'
res += f'\nFrequency by Rank\n```\n{rank_tab}\n```'
return res
def dashboard_history_charts(participations):
years = get_years(participations)
color_series = get_color_series()
rank_buckets = get_rank_buckets()
chart_quantiles = [0, 25, 50, 75, 100]
res = '\n```mermaid'
res += '\nflowchart TD '
for y, c in zip(years, color_series):
res += f'\nstyle {y} fill:#{c},stroke:#333,stroke-width:2px,color:#fff'
res += '\n```'
res += '\n\n```mermaid'
res += '\n---'
res += '\nconfig:'
res += '\n themeVariables:'
res += '\n xyChart:'
plot_color_palette = ['#808080'] * len(chart_quantiles) \
+ [f'#{c}' for _, c in zip(years, color_series)]
res += f'\n plotColorPalette: "{', '.join(plot_color_palette)}"'
res += '\n---'
res += '\nxychart-beta'
res += '\ntitle "Frequency of Ranking within Top-k"'
res += '\nx-axis ' + str(rank_buckets)
res += '\ny-axis "Frequency (in %)" 0 --> 100'
for q in chart_quantiles:
res +=f'\nline "{q}%" {str([q] * len(rank_buckets))}'
for y in years:
res += f'\nline "{y}" ' + str([
get_rank_frequency(get_participations_by_year(participations, y), r)
for r in rank_buckets
])
res += '\n```'
return res
def export_dashboard():
with open('README_template.md', 'r') as f:
result = f.read()
f.close()
participations = get_participations()
result = result.replace('{overall}', dashboard_overview(participations))
result = result.replace('{history_charts}', dashboard_history_charts(participations))
result = result.replace('{history_tables}', dashboard_history_tables(participations))
with open('README.md', 'w') as f:
f.write(result)
f.close()
print('\nREADME.md inserted / updated!')
print_history()
export_dashboard()