forked from ml-energy/zeus
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_single.py
120 lines (97 loc) · 3.8 KB
/
run_single.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# Copyright (C) 2022 Jae-Won Chung <[email protected]>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example script for running the Zeus trace-driven simulator."""
from __future__ import annotations
import argparse
from pprint import pprint
from typing import Literal
import pandas as pd
from zeus.job import Job
from zeus.policy.optimizer import JITPowerLimitOptimizer, PruningGTSBatchSizeOptimizer
from zeus.simulate import Simulator
from zeus.analyze import HistoryEntry
def parse_args() -> argparse.Namespace:
"""Parse commandline arguments."""
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", default="librispeech")
parser.add_argument("--model", default="deepspeech2")
parser.add_argument("--optimizer", default="adamw")
parser.add_argument("--target_metric", type=float, default=40.0)
parser.add_argument("--max_epochs", type=int, default=16)
parser.add_argument("--b_0", type=int, default=192)
parser.add_argument(
"--gpu", default="v100", choices=["a40", "v100", "p100", "rtx6000"]
)
parser.add_argument("--eta_knob", type=float, default=0.5)
parser.add_argument("--beta_knob", type=float, default=2.0)
parser.add_argument("--seed", type=int, default=1)
parser.add_argument(
"--num_recurrence",
type=int,
default=None,
help="If None, 2*|B|*|P| will be used as in the paper.",
)
return parser.parse_args()
def read_trace(
gpu: Literal["a40", "v100", "p100", "rtx6000"]
) -> tuple[pd.DataFrame, pd.DataFrame]:
"""Read the train and power trace files as Pandas DataFrames."""
train_df = pd.DataFrame(pd.read_csv("../../trace/summary_train.csv"))
power_df = pd.DataFrame(pd.read_csv(f"../../trace/summary_power_{gpu}.csv"))
return train_df, power_df
def run_simulator(
job: Job,
gpu: Literal["a40", "v100", "p100", "rtx6000"],
eta_knob: float,
beta_knob: float,
num_recurrence: int | None,
seed: int = 1,
) -> list[HistoryEntry]:
"""Run the simulator on the given job."""
# Read in the train and power traces.
train_df, power_df = read_trace(gpu)
# Instantiate optimizers.
plo = JITPowerLimitOptimizer(verbose=True)
bso = PruningGTSBatchSizeOptimizer(seed=seed, verbose=True)
# Instantitate the simulator.
simulator = Simulator(train_df, power_df, bso, plo, seed=seed, verbose=True)
# Use 2 * |B| * |P| is num_recurrence is None.
print(num_recurrence)
if num_recurrence is None:
job_df = job.filter_df(train_df.merge(power_df, how="inner"))
num_recurrence = (
2 * len(job_df.batch_size.unique()) * len(job_df.power_limit.unique())
)
# Run the simulator.
print(num_recurrence)
return simulator.simulate_one_job(job, num_recurrence, beta_knob, eta_knob)
def main(args: argparse.Namespace) -> None:
"""Run the main routine."""
# Instantitate the job specification dataclass.
job = Job(
args.dataset,
args.model,
args.optimizer,
args.target_metric,
args.max_epochs,
args.b_0,
)
# Run the simulator.
history = run_simulator(
job, args.gpu, args.eta_knob, args.beta_knob, args.num_recurrence, args.seed
)
# Print out the list of HistoryEntry's.
pprint(history)
if __name__ == "__main__":
main(parse_args())