-
Notifications
You must be signed in to change notification settings - Fork 718
/
Copy pathprepare_dataset.py
93 lines (73 loc) · 3.09 KB
/
prepare_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# Copyright (c) MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import distutils.util
import json
import logging
import os
import sys
from monai.apps.deepgrow.dataset import create_dataset
def prepare_datalist(args):
dimensions = args.dimensions
dataset_json = os.path.join(args.output, "dataset.json")
if not os.path.exists(dataset_json):
logging.info("Processing dataset...")
with open(os.path.join(args.dataset_json)) as f:
datalist = json.load(f)
datalist = create_dataset(
datalist=datalist[args.datalist_key],
base_dir=args.dataset_root,
output_dir=args.output,
dimension=dimensions,
pixdim=[1.0] * dimensions,
limit=args.limit,
relative_path=args.relative_path,
)
with open(dataset_json, "w") as fp:
json.dump(datalist, fp, indent=2)
else:
logging.info("Pre-load existing dataset.json")
dataset_json = os.path.join(args.output, "dataset.json")
with open(dataset_json) as f:
datalist = json.load(f)
logging.info("+++ Dataset File: {}".format(dataset_json))
logging.info("+++ Total Records: {}".format(len(datalist)))
logging.info("")
def run(args):
for arg in vars(args):
logging.info("USING:: {} = {}".format(arg, getattr(args, arg)))
logging.info("")
if not os.path.exists(args.output):
logging.info("output path [{}] does not exist. creating it now.".format(args.output))
os.makedirs(args.output, exist_ok=True)
prepare_datalist(args)
def strtobool(val):
return bool(distutils.util.strtobool(val))
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-s", "--seed", type=int, default=42)
parser.add_argument("-dims", "--dimensions", type=int, default=2)
parser.add_argument("-d", "--dataset_root", default="/workspace/data/MSD_Task09_Spleen")
parser.add_argument("-j", "--dataset_json", default="/workspace/data/MSD_Task09_Spleen/dataset.json")
parser.add_argument("-k", "--datalist_key", default="training")
parser.add_argument("-o", "--output", default="/workspace/data/deepgrow/2D/MSD_Task09_Spleen")
parser.add_argument("-t", "--limit", type=int, default=0)
parser.add_argument("-r", "--relative_path", type=strtobool, default="false")
args = parser.parse_args()
run(args)
if __name__ == "__main__":
logging.basicConfig(
stream=sys.stdout,
level=logging.INFO,
format="[%(asctime)s.%(msecs)03d][%(levelname)5s] - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
main()