forked from dotnet/runtime
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsuperpmi.py
executable file
·3593 lines (2790 loc) · 164 KB
/
superpmi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
#
## Licensed to the .NET Foundation under one or more agreements.
## The .NET Foundation licenses this file to you under the MIT license.
#
##
# Title : superpmi.py
#
# Notes:
#
# Script to orchestrate SuperPMI collections, replays, asm diffs, and SuperPMI
# data management. Note that some of the options provided by this script are
# also provided in our SuperPMI collect test. The test can be found here:
# https://github.com/dotnet/runtime/blob/main/src/tests/JIT/superpmi/superpmicollect.cs.
#
################################################################################
################################################################################
import argparse
import asyncio
import csv
import datetime
import locale
import logging
import os
import multiprocessing
import platform
import shutil
import subprocess
import sys
import tempfile
import queue
import re
import urllib
import urllib.request
import zipfile
from coreclr_arguments import *
from jitutil import TempDir, ChangeDir, remove_prefix, is_zero_length_file, is_nonzero_length_file, \
make_safe_filename, find_file, download_one_url, download_files, report_azure_error, \
require_azure_storage_libraries, authenticate_using_azure, \
create_unique_directory_name, create_unique_file_name, get_files_from_path
locale.setlocale(locale.LC_ALL, '') # Use '' for auto, or force e.g. to 'en_US.UTF-8'
################################################################################
# Azure Storage information
################################################################################
# We store several things in Azure Blob Storage:
# 1. SuperPMI collections
# 2. A copy of PMI.dll, as a fallback in case we need it but can't find it locally,
# so we don't need to download dotnet/jitutils and build it ourselves.
# (Note: if PMI is ever published as a package, we could just download that instead.)
# 3. A copy of coredistools. If, when doing asm diffs, a copy of the coredistools
# library is not found in the Core_Root directory, we download a cached copy.
# Note: it would be better to download and use the official coredistools
# NuGet packages (like the setup-stress-dependencies scripts do).
az_account_name = "clrjit2"
az_superpmi_container_name = "superpmi"
az_collections_root_folder = "collections"
az_blob_storage_account_uri = "https://" + az_account_name + ".blob.core.windows.net/"
az_blob_storage_superpmi_container_uri = az_blob_storage_account_uri + az_superpmi_container_name
az_jitrollingbuild_container_name = "jitrollingbuild"
az_builds_root_folder = "builds"
az_blob_storage_jitrollingbuild_container_uri = az_blob_storage_account_uri + az_jitrollingbuild_container_name
################################################################################
# Argument Parser
################################################################################
description = """\
Script to run SuperPMI replay, ASM diffs, and collections.
The script also manages the Azure store of pre-created SuperPMI collection files.
Help for each individual command can be shown by asking for help on the individual command, for example
`superpmi.py collect --help`.
"""
collect_description = """\
Automate a SuperPMI collection.
"""
replay_description = """\
Run SuperPMI replay on one or more collections.
"""
asm_diff_description = """\
Run SuperPMI ASM diffs on one or more collections.
"""
upload_description = """\
Upload a collection to SuperPMI Azure storage.
"""
upload_private_description = """\
Upload a collection to a local file system path.
"""
download_description = """\
Download collections from SuperPMI Azure storage.
Normally, collections are automatically downloaded to a local cache
as part of doing a 'replay' operation. This command allows you to
download without doing a 'replay'.
"""
list_collections_description = """\
List the existing collections in the SuperPMI Azure storage.
"""
merge_mch_description = """\
Utility command to merge MCH files. This is a thin wrapper around
'mcs -merge -recursive -dedup -thin' followed by 'mcs -toc'.
"""
spmi_log_file_help = "Write SuperPMI tool output to a log file. Requires --sequential."
jit_ee_version_help = """\
JIT/EE interface version (the JITEEVersionIdentifier GUID from jiteeversionguid.h in the format
'a5eec3a4-4176-43a7-8c2b-a05b551d4f49'). Default: if the mcs tool is found, assume it
was built with the same JIT/EE version as the JIT we are using, and run "mcs -printJITEEVersion"
to get that version. Otherwise, use "unknown-jit-ee-version".
"""
host_os_help = "OS (windows, OSX, Linux). Default: current OS."
arch_help = "Architecture (x64, x86, arm, arm64). Default: current architecture."
target_os_help = "Target OS, for use with cross-compilation JIT (windows, OSX, Linux). Default: current OS."
target_arch_help = "Target architecture, for use with cross-compilation JIT (x64, x86, arm, arm64). Passed as asm diffs target to SuperPMI. Default: current architecture."
mch_arch_help = "Architecture of MCH files to download, used for cross-compilation altjit (x64, x86, arm, arm64). Default: target architecture."
build_type_help = "Build type (Debug, Checked, Release). Default: Checked."
core_root_help = "Core_Root location. Optional; it will be deduced if possible from runtime repo root."
log_level_help = """\
Console log level (output verbosity level).
One of: critical, error, warning, info, debug.
Output from this level and higher is output to the console.
All output is always written to the log file.
Default: warning.
"""
log_file_help = "Output log file path. If not specified, a default location is chosen."
product_location_help = "Built Product directory location. Optional; it will be deduced if possible from runtime repo root."
spmi_location_help = """\
Directory in which to put SuperPMI files, such as downloaded MCH files, asm diffs, and repro .MC files.
Optional. Default is 'spmi' within the repo 'artifacts' directory.
If 'SUPERPMI_CACHE_DIRECTORY' environment variable is set to a path, it will use that directory.
"""
superpmi_collect_help = """\
Command to run SuperPMI collect over. Note that there cannot be any dotnet CLI commands
invoked inside this command, as they will fail due to the shim JIT being set.
"""
replay_mch_files_help = """\
MCH files, or directories containing MCH files, to use for replay. For each directory passed,
all recursively found MCH files in that directory root will be used. Files may either be a path
on disk or a URI to a MCH file to download. Use these MCH files instead of a collection from
the Azure Storage MCH file store. UNC paths will be downloaded and cached locally.
"""
private_store_help = """\
Specify the path to one or more private SuperPMI data stores. Default: use the semicolon separated
value of the SUPERPMI_PRIVATE_STORE environment variable, if it exists.
"""
filter_help = """\
Specify one or more filters to restrict the set of MCH files to download or use from the local cache.
A filter is a simple case-insensitive substring search against the MCH file path. If multiple filter
strings are specified, any maching path is accepted (it is "or", not "and").
"""
upload_mch_files_help = """\
MCH files, or directories containing MCH files, to upload. For each directory passed,
all recursively found MCH files in that directory root will be uploaded. MCT files are also uploaded.
"""
skip_cleanup_help = "Skip intermediate file removal."
break_on_assert_help = "Enable break on assert during SuperPMI replay."
break_on_error_help = "Enable break on error during SuperPMI replay."
force_download_help = """\
If downloading an MCH file, always download it. Don't use an existing file in the download location.
Normally, we don't download if the target directory exists. This forces download even if the
target directory already exists.
"""
download_no_progress_help = """\
If specified, then download progress will not be shown.
"""
merge_mch_pattern_help = """\
A pattern to describing files to merge, passed through directly to `mcs -merge`.
Acceptable patterns include `*.mch`, `file*.mch`, and `c:\\my\\directory\\*.mch`.
Only the final component can contain a `*` wildcard; the directory path cannot.
"""
error_limit_help = """
Specify the failure `limit` after which replay and asmdiffs will exit if it sees
more than `limit` failures.
"""
# Start of parser object creation.
parser = argparse.ArgumentParser(description=description)
subparsers = parser.add_subparsers(dest='mode', help="Command to invoke")
subparsers.required = True
# Create a parser for core_root. It can be specified directly,
# or computed from the script location and host OS, architecture, and build type:
#
# script location implies repo root,
# implies artifacts location,
# implies test location from host OS, architecture, build type,
# implies Core_Root path
#
# You normally use the default host OS, but for Azure Storage upload and other
# operations, it can be useful to allow it to be specified.
core_root_parser = argparse.ArgumentParser(add_help=False)
core_root_parser.add_argument("-arch", help=arch_help)
core_root_parser.add_argument("-build_type", default="Checked", help=build_type_help)
core_root_parser.add_argument("-host_os", help=host_os_help)
core_root_parser.add_argument("-core_root", help=core_root_help)
core_root_parser.add_argument("-log_level", help=log_level_help)
core_root_parser.add_argument("-log_file", help=log_file_help)
core_root_parser.add_argument("-spmi_location", help=spmi_location_help)
core_root_parser.add_argument("--no_progress", action="store_true", help=download_no_progress_help)
# Create a set of arguments common to target specification. Used for collect, replay, asmdiffs, upload, upload-private, download, list-collections.
target_parser = argparse.ArgumentParser(add_help=False)
target_parser.add_argument("-target_arch", help=target_arch_help)
target_parser.add_argument("-target_os", help=target_os_help)
target_parser.add_argument("-mch_arch", help=mch_arch_help)
# Create a set of arguments common to all commands that run SuperPMI.
superpmi_common_parser = argparse.ArgumentParser(add_help=False)
superpmi_common_parser.add_argument("--break_on_assert", action="store_true", help=break_on_assert_help)
superpmi_common_parser.add_argument("--break_on_error", action="store_true", help=break_on_error_help)
superpmi_common_parser.add_argument("--skip_cleanup", action="store_true", help=skip_cleanup_help)
superpmi_common_parser.add_argument("--sequential", action="store_true", help="Run SuperPMI in sequential mode. Default is to run in parallel for faster runs.")
superpmi_common_parser.add_argument("-spmi_log_file", help=spmi_log_file_help)
superpmi_common_parser.add_argument("-jit_name", help="Specify the filename of the jit to use, e.g., 'clrjit_universal_arm64_x64.dll'. Default is clrjit.dll/libclrjit.so")
superpmi_common_parser.add_argument("--altjit", action="store_true", help="Set the altjit variables on replay.")
superpmi_common_parser.add_argument("-error_limit", help=error_limit_help)
# subparser for collect
collect_parser = subparsers.add_parser("collect", description=collect_description, parents=[core_root_parser, target_parser, superpmi_common_parser])
# Add required arguments
collect_parser.add_argument("collection_command", nargs='?', help=superpmi_collect_help)
collect_parser.add_argument("collection_args", nargs='?', help="Arguments to pass to the SuperPMI collect command. This is a single string; quote it if necessary if the arguments contain spaces.")
collect_parser.add_argument("--pmi", action="store_true", help="Run PMI on a set of directories or assemblies.")
collect_parser.add_argument("--crossgen2", action="store_true", help="Run crossgen2 on a set of directories or assemblies.")
collect_parser.add_argument("-assemblies", dest="assemblies", nargs="+", default=[], help="A list of managed dlls or directories to recursively use while collecting with PMI or crossgen2. Required if --pmi or --crossgen2 is specified.")
collect_parser.add_argument("-exclude", dest="exclude", nargs="+", default=[], help="A list of files or directories to exclude from the files and directories specified by `-assemblies`.")
collect_parser.add_argument("-pmi_location", help="Path to pmi.dll to use during PMI run. Optional; pmi.dll will be downloaded from Azure Storage if necessary.")
collect_parser.add_argument("-output_mch_path", help="Location to place the final MCH file.")
collect_parser.add_argument("--merge_mch_files", action="store_true", help="Merge multiple MCH files. Use the -mch_files flag to pass a list of MCH files to merge.")
collect_parser.add_argument("-mch_files", metavar="MCH_FILE", nargs='+', help="Pass a sequence of MCH files which will be merged. Required by --merge_mch_files.")
collect_parser.add_argument("--use_zapdisable", action="store_true", help="Sets COMPlus_ZapDisable=1 and COMPlus_ReadyToRun=0 when doing collection to cause NGEN/ReadyToRun images to not be used, and thus causes JIT compilation and SuperPMI collection of these methods.")
collect_parser.add_argument("--tiered_compilation", action="store_true", help="Sets COMPlus_TieredCompilation=1 when doing collections.")
# Allow for continuing a collection in progress
collect_parser.add_argument("-temp_dir", help="Specify an existing temporary directory to use. Useful if continuing an ongoing collection process, or forcing a temporary directory to a particular hard drive. Optional; default is to create a temporary directory in the usual TEMP location.")
collect_parser.add_argument("--skip_collection_step", action="store_true", help="Do not run the collection step.")
collect_parser.add_argument("--skip_merge_step", action="store_true", help="Do not run the merge step.")
collect_parser.add_argument("--skip_clean_and_verify_step", action="store_true", help="Do not run the collection cleaning, TOC creation, and verifying step.")
collect_parser.add_argument("--skip_collect_mc_files", action="store_true", help="Do not collect .MC files")
# Create a set of arguments common to all SuperPMI replay commands, namely basic replay and ASM diffs.
# Note that SuperPMI collection also runs a replay to verify the final MCH file, so many arguments
# common to replay are also applicable to that replay as well.
replay_common_parser = argparse.ArgumentParser(add_help=False)
replay_common_parser.add_argument("-mch_files", metavar="MCH_FILE", nargs='+', help=replay_mch_files_help)
replay_common_parser.add_argument("-filter", nargs='+', help=filter_help)
replay_common_parser.add_argument("-product_location", help=product_location_help)
replay_common_parser.add_argument("--force_download", action="store_true", help=force_download_help)
replay_common_parser.add_argument("-jit_ee_version", help=jit_ee_version_help)
replay_common_parser.add_argument("-private_store", action="append", help=private_store_help)
# subparser for replay
replay_parser = subparsers.add_parser("replay", description=replay_description, parents=[core_root_parser, target_parser, superpmi_common_parser, replay_common_parser])
replay_parser.add_argument("-jit_path", help="Path to clrjit. Defaults to Core_Root JIT.")
replay_parser.add_argument("-jitoption", action="append", help="Pass option through to the jit. Format is key=value, where key is the option name without leading COMPlus_")
# subparser for asmdiffs
asm_diff_parser = subparsers.add_parser("asmdiffs", description=asm_diff_description, parents=[core_root_parser, target_parser, superpmi_common_parser, replay_common_parser])
asm_diff_parser.add_argument("-base_jit_path", help="Path to baseline clrjit. Defaults to baseline JIT from rolling build, by computing baseline git hash.")
asm_diff_parser.add_argument("-diff_jit_path", help="Path to diff clrjit. Defaults to Core_Root JIT.")
asm_diff_parser.add_argument("-git_hash", help="Use this git hash as the current hash for use to find a baseline JIT. Defaults to current git hash of source tree.")
asm_diff_parser.add_argument("-base_git_hash", help="Use this git hash as the baseline JIT hash. Default: search for the baseline hash.")
asm_diff_parser.add_argument("--diff_jit_dump", action="store_true", help="Generate JitDump output for diffs. Default: only generate asm, not JitDump.")
asm_diff_parser.add_argument("--gcinfo", action="store_true", help="Include GC info in disassembly (sets COMPlus_JitGCDump/COMPlus_NgenGCDump; requires instructions to be prefixed by offsets).")
asm_diff_parser.add_argument("--debuginfo", action="store_true", help="Include debug info after disassembly (sets COMPlus_JitDebugDump/COMPlus_NgenDebugDump).")
asm_diff_parser.add_argument("-base_jit_option", action="append", help="Option to pass to the baseline JIT. Format is key=value, where key is the option name without leading COMPlus_...")
asm_diff_parser.add_argument("-diff_jit_option", action="append", help="Option to pass to the diff JIT. Format is key=value, where key is the option name without leading COMPlus_...")
asm_diff_parser.add_argument("-tag", help="Specify a word to add to the directory name where the asm diffs will be placed")
asm_diff_parser.add_argument("-metrics", action="append", help="Metrics option to pass to jit-analyze. Can be specified multiple times, or pass comma-separated values.")
# subparser for upload
upload_parser = subparsers.add_parser("upload", description=upload_description, parents=[core_root_parser, target_parser])
upload_parser.add_argument("-mch_files", metavar="MCH_FILE", required=True, nargs='+', help=upload_mch_files_help)
upload_parser.add_argument("-az_storage_key", help="Key for the clrjit Azure Storage location. Default: use the value of the CLRJIT_AZ_KEY environment variable.")
upload_parser.add_argument("-jit_ee_version", help=jit_ee_version_help)
upload_parser.add_argument("--skip_cleanup", action="store_true", help=skip_cleanup_help)
# subparser for upload-private
upload_private_parser = subparsers.add_parser("upload-private", description=upload_private_description, parents=[core_root_parser, target_parser])
upload_private_parser.add_argument("-mch_files", metavar="MCH_FILE", required=True, nargs='+', help=upload_mch_files_help)
upload_private_parser.add_argument("-private_store", required=True, help="Target directory root of the private store in which to place the files.")
upload_private_parser.add_argument("-jit_ee_version", help=jit_ee_version_help)
upload_private_parser.add_argument("--skip_cleanup", action="store_true", help=skip_cleanup_help)
# subparser for download
download_parser = subparsers.add_parser("download", description=download_description, parents=[core_root_parser, target_parser])
download_parser.add_argument("-filter", nargs='+', help=filter_help)
download_parser.add_argument("-jit_ee_version", help=jit_ee_version_help)
download_parser.add_argument("--skip_cleanup", action="store_true", help=skip_cleanup_help)
download_parser.add_argument("--force_download", action="store_true", help=force_download_help)
download_parser.add_argument("-mch_files", metavar="MCH_FILE", nargs='+', help=replay_mch_files_help)
download_parser.add_argument("-private_store", action="append", help=private_store_help)
# subparser for list-collections
list_collections_parser = subparsers.add_parser("list-collections", description=list_collections_description, parents=[core_root_parser, target_parser])
list_collections_parser.add_argument("-jit_ee_version", help=jit_ee_version_help)
list_collections_parser.add_argument("--all", action="store_true", help="Show all MCH files, not just those for the specified (or default) JIT-EE version, OS, and architecture")
list_collections_parser.add_argument("--local", action="store_true", help="Show the local MCH download cache")
# subparser for merge-mch
merge_mch_parser = subparsers.add_parser("merge-mch", description=merge_mch_description, parents=[core_root_parser])
merge_mch_parser.add_argument("-output_mch_path", required=True, help="Location to place the final MCH file.")
merge_mch_parser.add_argument("-pattern", required=True, help=merge_mch_pattern_help)
################################################################################
# Helper functions
################################################################################
def run_and_log(command, log_level=logging.DEBUG):
""" Return a command and log its output to the debug logger
Args:
command (list) : Command to run
log_level (int) : log level to use for logging output (but not the "Invoking" text)
Returns:
Process return code
"""
logging.log(log_level, "Invoking: %s", " ".join(command))
proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
stdout_output, _ = proc.communicate()
for line in stdout_output.decode('utf-8', errors='replace').splitlines(): # There won't be any stderr output since it was piped to stdout
logging.log(log_level, line)
return proc.returncode
def write_file_to_log(filepath, log_level=logging.DEBUG):
""" Read the text of a file and write it to the logger. If the file doesn't exist, don't output anything.
Args:
filepath (string) : file to log
log_level (int) : log level to use for logging output
Returns:
Nothing
"""
if not os.path.exists(filepath):
return
logging.log(log_level, "============== Contents of %s", filepath)
with open(filepath) as file_handle:
lines = file_handle.readlines()
lines = [item.strip() for item in lines]
for line in lines:
logging.log(log_level, line)
logging.log(log_level, "============== End contents of %s", filepath)
# Functions to verify the OS and architecture. They take an instance of CoreclrArguments,
# which is used to find the list of legal OS and architectures
def check_host_os(coreclr_args, host_os):
return (host_os is not None) and (host_os in coreclr_args.valid_host_os)
def check_target_os(coreclr_args, target_os):
return (target_os is not None) and (target_os in coreclr_args.valid_host_os)
def check_arch(coreclr_args, arch):
return (arch is not None) and (arch in coreclr_args.valid_arches)
def check_target_arch(coreclr_args, target_arch):
return (target_arch is not None) and (target_arch in coreclr_args.valid_arches)
def check_mch_arch(coreclr_args, mch_arch):
return (mch_arch is not None) and (mch_arch in coreclr_args.valid_arches)
def create_artifacts_base_name(coreclr_args, mch_file):
""" Create an appropriate "base" name for use creating a directory name related to MCH file playback.
This will later be prepended by "asm." or "jitdump.", for example, and
create_unique_directory_name() should be called on the final name to ensure it is unique.
Use the MCH file base name as the main part of the directory name, removing
the trailing ".mch", if any.
If there is a tag specified (for asm diffs), prepend the tag.
Args:
coreclr_args : the parsed arguments
mch_file (str) : the MCH file name that is being replayed.
Returns:
A directory name to be used.
"""
artifacts_base_name = os.path.basename(mch_file)
if artifacts_base_name.lower().endswith(".mch"):
artifacts_base_name = artifacts_base_name[:-4]
if hasattr(coreclr_args, "tag") and coreclr_args.tag is not None:
artifacts_base_name = "{}.{}".format(coreclr_args.tag, artifacts_base_name)
return artifacts_base_name
def read_csv_metrics(path):
""" Read a metrics summary file produced by superpmi, and return the single row containing the information as a dictionary.
Args:
path (str) : path to .csv file
Returns:
A dictionary with each metric
"""
with open(path) as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
return row
return None
################################################################################
# Helper classes
################################################################################
class AsyncSubprocessHelper:
""" Class to help with async multiprocessing tasks.
"""
def __init__(self, items, subproc_count=multiprocessing.cpu_count(), verbose=False):
self.items = items
self.subproc_count = subproc_count
self.verbose = verbose
self.subproc_count_queue = None
if 'win32' in sys.platform:
# Windows specific event-loop policy & cmd
asyncio.set_event_loop(asyncio.ProactorEventLoop())
async def __get_item__(self, item, index, size, async_callback, *extra_args):
""" Wrapper to the async callback which will schedule based on the queue
"""
# Wait for the subproc_id queue to become free, meaning we have an available
# processor to run a task (specifically, we are below our maximum allowed
# parallelism). Then start running the sub process.
subproc_id = await self.subproc_count_queue.get()
print_prefix = ""
if self.verbose:
print_prefix = "[{}:{}]: ".format(index, size)
await async_callback(print_prefix, item, *extra_args)
# Add back to the queue, in case another process wants to run.
self.subproc_count_queue.put_nowait(subproc_id)
async def __run_to_completion__(self, async_callback, *extra_args):
""" async wrapper for run_to_completion
"""
# Create a queue with one entry for each of the threads we're
# going to allow. By default, this will be one entry per CPU.
# Using subproc_count_queue.get() will block when we're running
# a task on every CPU.
chunk_size = self.subproc_count
self.subproc_count_queue = asyncio.Queue(chunk_size)
for item in range(chunk_size):
self.subproc_count_queue.put_nowait(item)
# Create a 'tasks' list of async function calls, one for each item.
# When all these calls complete, we're done.
size = len(self.items)
count = 1
tasks = []
for item in self.items:
tasks.append(self.__get_item__(item, count, size, async_callback, *extra_args))
count += 1
# Inovke all the calls to __get_item__ concurrently and wait for them all to finish.
await asyncio.gather(*tasks)
def run_to_completion(self, async_callback, *extra_args):
""" Run until the item queue has been depleted
Notes:
Acts as a wrapper to abstract the async calls to
async_callback. Note that this will allow cpu_count
amount of running subprocesses. Each time the queue
is emptied, another process will start. Note that
the python code is single threaded, it will just
rely on async/await to start subprocesses at
subprocess_count
"""
reset_env = os.environ.copy()
loop = asyncio.get_event_loop()
loop.run_until_complete(self.__run_to_completion__(async_callback, *extra_args))
os.environ.clear()
os.environ.update(reset_env)
################################################################################
# SuperPMI Collect
################################################################################
class SuperPMICollect:
""" SuperPMI Collect class
Notes:
The object is responsible for setting up a SuperPMI collection given
the arguments passed into the script.
"""
def __init__(self, coreclr_args):
""" Constructor
Args:
coreclr_args (CoreclrArguments) : parsed args
"""
if coreclr_args.host_os == "OSX":
self.collection_shim_name = "libsuperpmi-shim-collector.dylib"
self.corerun_tool_name = "corerun"
elif coreclr_args.host_os == "Linux":
self.collection_shim_name = "libsuperpmi-shim-collector.so"
self.corerun_tool_name = "corerun"
elif coreclr_args.host_os == "windows":
self.collection_shim_name = "superpmi-shim-collector.dll"
self.corerun_tool_name = "corerun.exe"
else:
raise RuntimeError("Unsupported OS.")
self.jit_path = os.path.join(coreclr_args.core_root, determine_jit_name(coreclr_args))
self.superpmi_path = determine_superpmi_tool_path(coreclr_args)
self.mcs_path = determine_mcs_tool_path(coreclr_args)
self.core_root = coreclr_args.core_root
self.collection_command = coreclr_args.collection_command
self.collection_args = coreclr_args.collection_args
if coreclr_args.pmi:
self.pmi_location = determine_pmi_location(coreclr_args)
self.corerun = os.path.join(self.core_root, self.corerun_tool_name)
if coreclr_args.crossgen2:
self.corerun = os.path.join(self.core_root, self.corerun_tool_name)
if coreclr_args.dotnet_tool_path is None:
self.crossgen2_driver_tool = self.corerun
else:
self.crossgen2_driver_tool = coreclr_args.dotnet_tool_path
logging.debug("Using crossgen2 driver tool %s", self.crossgen2_driver_tool)
if coreclr_args.pmi or coreclr_args.crossgen2:
self.assemblies = coreclr_args.assemblies
self.exclude = coreclr_args.exclude
self.coreclr_args = coreclr_args
# Pathname for a temporary .MCL file used for noticing SuperPMI replay failures against base MCH.
self.base_fail_mcl_file = None
# The base .MCH file path
self.base_mch_file = None
# Final .MCH file path
self.final_mch_file = None
# The .TOC file path for the clean thin unique .MCH file
self.toc_file = None
self.temp_location = None
############################################################################
# Instance Methods
############################################################################
def collect(self):
""" Do the SuperPMI Collection.
"""
# Do a basic SuperPMI collect and validation:
# 1. Collect MC files by running a set of sample apps.
# 2. Create a merged thin unique MCH by using "mcs -merge -recursive -dedup -thin base.mch *.mc".
# 3. Create a clean MCH by running SuperPMI over the MCH, and using "mcs -strip" to filter
# out any failures (if any).
# 4. Create a TOC using "mcs -toc".
# 5. Verify the resulting MCH file is error-free when running SuperPMI against it with the
# same JIT used for collection.
#
# MCH files are big. If we don't need them anymore, clean them up right away to avoid
# running out of disk space in disk constrained situations.
passed = False
try:
with TempDir(self.coreclr_args.temp_dir, self.coreclr_args.skip_cleanup) as temp_location:
# Setup all of the temp locations
self.base_fail_mcl_file = os.path.join(temp_location, "basefail.mcl")
self.base_mch_file = os.path.join(temp_location, "base.mch")
self.temp_location = temp_location
if self.coreclr_args.output_mch_path is not None:
self.final_mch_file = os.path.abspath(self.coreclr_args.output_mch_path)
final_mch_dir = os.path.dirname(self.final_mch_file)
if not os.path.isdir(final_mch_dir):
os.makedirs(final_mch_dir)
else:
default_coreclr_bin_mch_location = os.path.join(self.coreclr_args.spmi_location, "mch", "{}.{}.{}".format(self.coreclr_args.host_os, self.coreclr_args.arch, self.coreclr_args.build_type))
if not os.path.isdir(default_coreclr_bin_mch_location):
os.makedirs(default_coreclr_bin_mch_location)
self.final_mch_file = os.path.abspath(os.path.join(default_coreclr_bin_mch_location, "{}.{}.{}.mch".format(self.coreclr_args.host_os, self.coreclr_args.arch, self.coreclr_args.build_type)))
self.toc_file = "{}.mct".format(self.final_mch_file)
# If we have passed temp_dir, then we have a few flags we need
# to check to see where we are in the collection process. Note that this
# functionality exists to help not lose progress during a SuperPMI collection.
# It is not unreasonable for the SuperPMI collection to take many hours
# therefore allow re-use of a collection in progress
if not self.coreclr_args.skip_collection_step:
self.__collect_mc_files__()
if not self.coreclr_args.skip_merge_step:
if not self.coreclr_args.merge_mch_files:
self.__merge_mc_files__()
else:
self.__merge_mch_files__()
if not self.coreclr_args.skip_clean_and_verify_step:
self.__create_clean_mch_file__()
self.__create_toc__()
self.__verify_final_mch__()
passed = True
except Exception as exception:
logging.critical(exception)
return passed
############################################################################
# Helper Methods
############################################################################
def __collect_mc_files__(self):
""" Do the actual SuperPMI collection for a command
Returns:
None
"""
if not self.coreclr_args.skip_collect_mc_files:
assert os.path.isdir(self.temp_location)
# Set environment variables. For crossgen2, we need to pass the COMPlus variables as arguments to the JIT using
# the `-codegenopt` argument.
env_copy = os.environ.copy()
root_env = {}
root_env["SuperPMIShimLogPath"] = self.temp_location
root_env["SuperPMIShimPath"] = self.jit_path
complus_env = {}
complus_env["EnableExtraSuperPmiQueries"] = "1"
if not self.coreclr_args.tiered_compilation:
complus_env["TieredCompilation"] = "0"
if self.coreclr_args.use_zapdisable:
complus_env["ZapDisable"] = "1"
complus_env["ReadyToRun"] = "0"
logging.debug("Starting collection.")
logging.debug("")
def set_and_report_env(env, root_env, complus_env = None):
for var, value in root_env.items():
env[var] = value
print_platform_specific_environment_vars(logging.DEBUG, self.coreclr_args, var, value)
if complus_env is not None:
for var, value in complus_env.items():
complus_var = "COMPlus_" + var
env[complus_var] = value
print_platform_specific_environment_vars(logging.DEBUG, self.coreclr_args, complus_var, value)
# If we need them, collect all the assemblies we're going to use for the collection(s).
# Remove the files matching the `-exclude` arguments (case-insensitive) from the list.
if self.coreclr_args.pmi or self.coreclr_args.crossgen2:
assemblies = []
for item in self.assemblies:
assemblies += get_files_from_path(item, match_func=lambda file: any(file.endswith(extension) for extension in [".dll", ".exe"]) and (self.exclude is None or not any(e.lower() in file.lower() for e in self.exclude)))
if len(assemblies) == 0:
logging.error("No assemblies found using `-assemblies` and `-exclude` arguments!")
else:
logging.debug("Using assemblies:")
for item in assemblies:
logging.debug(" %s", item)
logging.debug("") # add trailing empty line
################################################################################################ Do collection using given collection command (e.g., script)
if self.collection_command is not None:
logging.debug("Starting collection using command")
collection_command_env = env_copy.copy()
collection_complus_env = complus_env.copy()
collection_complus_env["JitName"] = self.collection_shim_name
set_and_report_env(collection_command_env, root_env, collection_complus_env)
logging.info("Collecting using command:")
logging.info(" %s %s", self.collection_command, " ".join(self.collection_args))
assert isinstance(self.collection_command, str)
assert isinstance(self.collection_args, list)
command = [self.collection_command, ] + self.collection_args
proc = subprocess.Popen(command, env=collection_command_env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
stdout_output, _ = proc.communicate()
for line in stdout_output.decode('utf-8', errors='replace').splitlines(): # There won't be any stderr output since it was piped to stdout
logging.debug(line)
################################################################################################ end of "self.collection_command is not None"
################################################################################################ Do collection using PMI
if self.coreclr_args.pmi is True:
logging.debug("Starting collection using PMI")
async def run_pmi(print_prefix, assembly, self):
""" Run pmi over all dlls
"""
command = [self.corerun, self.pmi_location, "DRIVEALL", assembly]
command_string = " ".join(command)
logging.debug("%s%s", print_prefix, command_string)
# Save the stdout and stderr to files, so we can see if PMI wrote any interesting messages.
# Use the name of the assembly as the basename of the file. mkstemp() will ensure the file
# is unique.
root_output_filename = make_safe_filename("pmi_" + assembly + "_")
try:
stdout_file_handle, stdout_filepath = tempfile.mkstemp(suffix=".stdout", prefix=root_output_filename, dir=self.temp_location)
stderr_file_handle, stderr_filepath = tempfile.mkstemp(suffix=".stderr", prefix=root_output_filename, dir=self.temp_location)
proc = await asyncio.create_subprocess_shell(
command_string,
stdout=stdout_file_handle,
stderr=stderr_file_handle)
await proc.communicate()
os.close(stdout_file_handle)
os.close(stderr_file_handle)
# No need to keep zero-length files
if is_zero_length_file(stdout_filepath):
os.remove(stdout_filepath)
if is_zero_length_file(stderr_filepath):
os.remove(stderr_filepath)
return_code = proc.returncode
if return_code != 0:
logging.debug("'%s': Error return code: %s", command_string, return_code)
write_file_to_log(stdout_filepath, log_level=logging.DEBUG)
write_file_to_log(stderr_filepath, log_level=logging.DEBUG)
except OSError as ose:
if "[WinError 32] The process cannot access the file because it is being used by another " \
"process:" in format(ose):
logging.warning("Skipping file %s. Got error: %s", root_output_filename, ose)
else:
raise ose
# Set environment variables.
pmi_command_env = env_copy.copy()
pmi_complus_env = complus_env.copy()
pmi_complus_env["JitName"] = self.collection_shim_name
set_and_report_env(pmi_command_env, root_env, pmi_complus_env)
old_env = os.environ.copy()
os.environ.update(pmi_command_env)
helper = AsyncSubprocessHelper(assemblies, verbose=True)
helper.run_to_completion(run_pmi, self)
os.environ.clear()
os.environ.update(old_env)
################################################################################################ end of "self.coreclr_args.pmi is True"
################################################################################################ Do collection using crossgen2
if self.coreclr_args.crossgen2 is True:
logging.debug("Starting collection using crossgen2")
async def run_crossgen2(print_prefix, assembly, self):
""" Run crossgen2 over all dlls
"""
root_crossgen2_output_filename = make_safe_filename("crossgen2_" + assembly) + ".out.dll"
crossgen2_output_assembly_filename = os.path.join(self.temp_location, root_crossgen2_output_filename)
try:
if os.path.exists(crossgen2_output_assembly_filename):
os.remove(crossgen2_output_assembly_filename)
except OSError as ose:
if "[WinError 32] The process cannot access the file because it is being used by another " \
"process:" in format(ose):
logging.warning("Skipping file %s. Got error: %s", crossgen2_output_assembly_filename, ose)
return
else:
raise ose
root_output_filename = make_safe_filename("crossgen2_" + assembly + "_")
# Create a temporary response file to put all the arguments to crossgen2 (otherwise the path length limit could be exceeded):
#
# <dll to compile>
# -o:<output dll>
# -r:<Core_Root>\System.*.dll
# -r:<Core_Root>\Microsoft.*.dll
# -r:<Core_Root>\mscorlib.dll
# -r:<Core_Root>\netstandard.dll
# --jitpath:<self.collection_shim_name>
# --codegenopt:<option>=<value> /// for each member of complus_env
#
# invoke with:
#
# dotnet <Core_Root>\crossgen2\crossgen2.dll @<temp.rsp>
#
# where "dotnet" is one of:
# 1. <runtime_root>\dotnet.cmd/sh
# 2. "dotnet" on PATH
# 3. corerun in Core_Root
rsp_file_handle, rsp_filepath = tempfile.mkstemp(suffix=".rsp", prefix=root_output_filename, dir=self.temp_location)
with open(rsp_file_handle, "w") as rsp_write_handle:
rsp_write_handle.write(assembly + "\n")
rsp_write_handle.write("-o:" + crossgen2_output_assembly_filename + "\n")
rsp_write_handle.write("-r:" + os.path.join(self.core_root, "System.*.dll") + "\n")
rsp_write_handle.write("-r:" + os.path.join(self.core_root, "Microsoft.*.dll") + "\n")
rsp_write_handle.write("-r:" + os.path.join(self.core_root, "mscorlib.dll") + "\n")
rsp_write_handle.write("-r:" + os.path.join(self.core_root, "netstandard.dll") + "\n")
rsp_write_handle.write("--parallelism:1" + "\n")
rsp_write_handle.write("--jitpath:" + os.path.join(self.core_root, self.collection_shim_name) + "\n")
for var, value in complus_env.items():
rsp_write_handle.write("--codegenopt:" + var + "=" + value + "\n")
# Log what is in the response file
write_file_to_log(rsp_filepath)
command = [self.crossgen2_driver_tool, self.coreclr_args.crossgen2_tool_path, "@" + rsp_filepath]
command_string = " ".join(command)
logging.debug("%s%s", print_prefix, command_string)
# Save the stdout and stderr to files, so we can see if crossgen2 wrote any interesting messages.
# Use the name of the assembly as the basename of the file. mkstemp() will ensure the file
# is unique.
try:
stdout_file_handle, stdout_filepath = tempfile.mkstemp(suffix=".stdout", prefix=root_output_filename, dir=self.temp_location)
stderr_file_handle, stderr_filepath = tempfile.mkstemp(suffix=".stderr", prefix=root_output_filename, dir=self.temp_location)
proc = await asyncio.create_subprocess_shell(
command_string,
stdout=stdout_file_handle,
stderr=stderr_file_handle)
await proc.communicate()
os.close(stdout_file_handle)
os.close(stderr_file_handle)
# No need to keep zero-length files
if is_zero_length_file(stdout_filepath):
os.remove(stdout_filepath)
if is_zero_length_file(stderr_filepath):
os.remove(stderr_filepath)
return_code = proc.returncode
if return_code != 0:
logging.debug("'%s': Error return code: %s", command_string, return_code)
write_file_to_log(stdout_filepath, log_level=logging.DEBUG)
write_file_to_log(stderr_filepath, log_level=logging.DEBUG)
except OSError as ose:
if "[WinError 32] The process cannot access the file because it is being used by another " \
"process:" in format(ose):
logging.warning("Skipping file %s. Got error: %s", root_output_filename, ose)
else:
raise ose
# Delete the response file unless we are skipping cleanup
if not self.coreclr_args.skip_cleanup:
os.remove(rsp_filepath)
# Set environment variables.
crossgen2_command_env = env_copy.copy()
set_and_report_env(crossgen2_command_env, root_env)
old_env = os.environ.copy()
os.environ.update(crossgen2_command_env)
# Note: crossgen2 compiles in parallel by default. However, it seems to lead to sharing violations
# in SuperPMI collection, accessing the MC file. So, disable crossgen2 parallism by using
# the "--parallelism:1" switch, and allowing coarse-grained (per-assembly) parallelism here.
# It turns out this works better anyway, as there is a lot of non-parallel time between
# crossgen2 parallel compilations.
helper = AsyncSubprocessHelper(assemblies, verbose=True)
helper.run_to_completion(run_crossgen2, self)
os.environ.clear()
os.environ.update(old_env)
################################################################################################ end of "self.coreclr_args.crossgen2 is True"
mc_files = [os.path.join(self.temp_location, item) for item in os.listdir(self.temp_location) if item.endswith(".mc")]
if len(mc_files) == 0:
raise RuntimeError("No .mc files generated.")
def __merge_mc_files__(self):
""" Merge the mc files that were generated
Notes:
mcs -merge <s_baseMchFile> <s_tempDir>\\*.mc -recursive -dedup -thin
"""
logging.info("Merging MC files")
pattern = os.path.join(self.temp_location, "*.mc")
command = [self.mcs_path, "-merge", self.base_mch_file, pattern, "-recursive", "-dedup", "-thin"]
run_and_log(command)
if not os.path.isfile(self.base_mch_file):
raise RuntimeError("MCH file failed to be generated at: %s" % self.base_mch_file)
# All the individual MC files are no longer necessary, now that we have
# merged them into the base.mch. Delete them.
if not self.coreclr_args.skip_cleanup:
mc_files = [os.path.join(self.temp_location, item) for item in os.listdir(self.temp_location) if item.endswith(".mc")]
for item in mc_files:
os.remove(item)
def __merge_mch_files__(self):
""" Merge MCH files in the mch_files list. This is only used with the `--merge_mch_files` argument.
Notes:
mcs -concat <s_baseMchFile> [self.coreclr_args.mch_files]
"""
logging.info("Merging MCH files")