Skip to content

Main forked #35

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,9 @@ task_info
*.data
*.png
*.gnuplot
table.txt
table.txt
training_molkgnn/best_model_metric_epoch=*.ckpt
training_molkgnn/lightning_logs


.vscode
3 changes: 1 addition & 2 deletions dataset_multigenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,7 @@ def attach_exp_id(input_tuple, tuple_id):
input_list.append(data_pair)

with Pool(processes = 9) as pool:
pool.starmap(run, input_list)
pool.starmap(run, input_list)

pool.join()
print(f'all tasks finish')

20 changes: 15 additions & 5 deletions entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def add_args(gnn_type):
parser = DataLoaderModule.add_argparse_args(parser)

# Custom arguments
parser.add_argument("--enable_pretraining", default=False) # TODO: \
parser.add_argument("--enable_pretraining", default=False)
parser.add_argument('--task_name', type=str, default='Unnamed')
# Pretraining

Expand Down Expand Up @@ -150,16 +150,17 @@ def load_best_model(trainer, data_module, metric=None, args=None):
best_result = trainer.test(model, datamodule=data_module)
new_name = f'logs/best_{metric}_sample_scores.log'
os.rename('logs/test_sample_scores.log', new_name)
return best_result
return best_result, model


def testing_procedure(trainer, data_module, args):
print(f'In Testing Mode:')
print(f'default_root_dir:{args.default_root_dir}')

model_dict = {}
# Load last model
last_path = osp.join(args.default_root_dir, 'last.ckpt')
model = GNNModel.load_from_checkpoint(last_path, gnn_type=gnn_type, args=args)
model_dict['last'] = model
print('====last_result====:\n')
last_result = trainer.test(model, datamodule=data_module)
os.rename('logs/test_sample_scores.log',
Expand All @@ -174,12 +175,14 @@ def testing_procedure(trainer, data_module, args):
out_file.write(f'{str(last_result)}\n')

for metric in data_module.dataset["metrics"]:
best_result = load_best_model(trainer=trainer, data_module=data_module, metric=metric, args=args)
best_result, best_model = load_best_model(trainer=trainer, data_module=data_module, metric=metric, args=args)
model_dict[f'{metric}_best'] = best_model
if best_result is not False:
out_file.write(f'best_{metric}:\n')
out_file.write(f'{str(best_result)}\n')
out_file.write(f'args:\n')
out_file.write(f'{args}')
return model_dict


def actual_training(model, data_module, use_clearml, gnn_type, args):
Expand Down Expand Up @@ -297,7 +300,14 @@ def actual_training(model, data_module, use_clearml, gnn_type, args):
continue

if args.test:
testing_procedure(trainer, data_module, args)
import pickle
model_dict = testing_procedure(trainer, data_module, args)
with open('logs/model_dict.pickle', 'wb') as f:
pickle.dump(model_dict, f)
# if gnn_type=='kgnn':
# model.save_kernels(dir='analyses/atom_encoder/', file_name='kernels.pt')
# model.print_graph_embedding()
# model.save_graph_embedding('analyses/atom_encoder/graph_embedding')
elif args.validate:
print(f'In Validation Mode:')
result = trainer.validate(model, datamodule=data_module)
Expand Down
9 changes: 9 additions & 0 deletions environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
name: molkgnn
channels:
- conda-forge
dependencies:
- python=3.7
- pip
- pip:
- -r requirements.txt

6 changes: 6 additions & 0 deletions mlruns/0/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
artifact_location: mlflow-artifacts:/0
creation_time: 1681345630255
experiment_id: '0'
last_update_time: 1681345630255
lifecycle_stage: active
name: Default
5 changes: 3 additions & 2 deletions models/MolKGNN/MolKGNNNet.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,9 @@ def forward(self, *argv, save_score=False):
# print(self.dropout)

graph_representation = self.pool(
self.graph_embedding_lin2(self.dropout(self.act(self.graph_embedding_lin1(node_representation)))),
self.graph_embedding_lin2(
self.dropout(self.act(self.graph_embedding_lin1(node_representation)))
),
batch)


Expand Down Expand Up @@ -174,4 +176,3 @@ def add_model_specific_args(parent_parser):
parser.add_argument('--dropout_ratio', type=float, default=0)

return parent_parser

1,819 changes: 1,819 additions & 0 deletions notebooks/get_feature_from_molkgnn.ipynb

Large diffs are not rendered by default.

31 changes: 19 additions & 12 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,24 +116,31 @@ tensorflow-io-gcs-filesystem==0.24.0
termcolor==1.1.0
tf-estimator-nightly==2.8.0.dev2021122109
threadpoolctl==3.1.0
--extra-index-url https://download.pytorch.org/whl/cu113
torch==1.11.0+cu113
torch-cluster==1.6.0
torch-geometric==2.0.4
torchvision==0.12.0+cu113
torchaudio==0.11.0+cu113

-f https://data.pyg.org/whl/torch-1.11.0+cu113.html

torch-scatter==2.0.9
torch-sparse==0.6.13
torchaudio==0.11.0
torchmetrics==0.7.2
torchvision==0.12.0
tqdm==4.63.0
traitlets==5.3.0
typed-argument-parser==1.7.2
typing-extensions==4.1.1
typing-inspect==0.7.1
torch-cluster==1.6.0
torch-spline-conv==1.2.1
torch-geometric==2.0.4

clearml==1.4.1
dive-into-graphs==1.0.0
pytorch-lightning==1.6.3
rdkit
sympy==1.10.1
scikit-learn

update==0.0.1
urllib3==1.26.9
virtualenv==20.14.1
wcwidth==0.2.5
Werkzeug==2.0.3
wrapt==1.14.0
yarl==1.7.2
zipp==3.7.0

networkx==2.6.3
136 changes: 136 additions & 0 deletions requirements.txt.old
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
absl-py==1.0.0
aiohttp==3.8.1
aiosignal==1.2.0
antlr4-python3-runtime==4.9.3
ase==3.22.1
asttokens==2.0.5
astunparse==1.6.3
async-timeout==4.0.2
attrs==21.4.0
backcall==0.2.0
cachetools==5.0.0
captum==0.2.0
certifi==2021.10.8
charset-normalizer==2.0.12
cilog==1.2.3
clearml==1.4.1
click==8.1.3
cloudpickle==2.1.0
cycler==0.11.0
decorator==5.1.1
distlib==0.3.4
dive-into-graphs==1.0.0
et-xmlfile==1.1.0
executing==0.8.3
filelock==3.7.0
flatbuffers==2.0
fonttools==4.31.1
frozenlist==1.3.0
fsspec==2022.2.0
furl==2.1.3
future==0.18.2
gast==0.5.3
google-auth==2.6.2
google-auth-oauthlib==0.4.6
google-pasta==0.2.0
grpcio==1.43.0
h5py==3.6.0
hydra-core==1.2.0
idna==3.3
importlib-metadata==4.11.3
importlib-resources==5.4.0
ipython==8.4.0
jedi==0.18.1
Jinja2==3.0.3
joblib==1.1.0
jsonschema==4.4.0
keras==2.8.0
Keras-Preprocessing==1.1.2
kiwisolver==1.4.0
libclang==13.0.0
llvmlite==0.38.1
Markdown==3.3.6
MarkupSafe==2.1.1
matplotlib==3.5.1
matplotlib-inline==0.1.3
mpmath==1.2.1
msgpack==1.0.3
multidict==6.0.2
mypy-extensions==0.4.3
numba==0.55.2
nvidia-ml-py3==7.352.0
oauthlib==3.2.0
omegaconf==2.2.2
openpyxl==3.0.10
opt-einsum==3.3.0
orderedmultidict==1.0.1
packaging==21.3
parso==0.8.3
pathlib2==2.3.7.post1
pexpect==4.8.0
pickle5==0.0.11
pickleshare==0.7.5
Pillow==9.0.1
platformdirs==2.5.2
prompt-toolkit==3.0.30
protobuf==3.19.4
psutil==5.9.0
ptyprocess==0.7.0
pure-eval==0.2.2
pyasn1==0.4.8
pyasn1-modules==0.2.8
pyDeprecate==0.3.1
pyg-nightly==2.0.5.dev20220723
Pygments==2.12.0
PyJWT==2.1.0
pyparsing==3.0.7
pyrsistent==0.18.1
pyscf==1.7.6.post1
python-dateutil==2.8.2
pytorch-lightning==1.6.3
pytz==2022.1
PyYAML==6.0
ray==1.12.1
rdkit-pypi==2021.9.5.1
requests==2.27.1
requests-oauthlib==1.3.1
rsa==4.8
scikit-learn==1.0.2
scipy==1.8.0
shap==0.41.0
six==1.16.0
sklearn==0.0
slicer==0.0.7
stack-data==0.3.0
style==1.1.0
sympy==1.10.1
tabulate==0.8.9
tensorboard==2.8.0
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.1
tensorboardX==2.5
tensorflow-io-gcs-filesystem==0.24.0
termcolor==1.1.0
tf-estimator-nightly==2.8.0.dev2021122109
threadpoolctl==3.1.0
torch==1.11.0+cu113
torch-cluster==1.6.0
torch-geometric==2.0.4
torch-scatter==2.0.9
torch-sparse==0.6.13
torchaudio==0.11.0
torchmetrics==0.7.2
torchvision==0.12.0
tqdm==4.63.0
traitlets==5.3.0
typed-argument-parser==1.7.2
typing-extensions==4.1.1
typing-inspect==0.7.1
update==0.0.1
urllib3==1.26.9
virtualenv==20.14.1
wcwidth==0.2.5
Werkzeug==2.0.3
wrapt==1.14.0
yarl==1.7.2
zipp==3.7.0
14 changes: 14 additions & 0 deletions scripts/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
set -e

# for seed in 1798 1843 2258 2689 9999 435008 435034 463087 485290 488997
# do
seed=1798
CUDA_VISIBLE_DEVICES=1 PYTHONPATH=. python entry.py --test --dataset_name ${seed} --gnn_type "kgnn" --dataset_path dataset/ \
--num_workers 0 --accelerator gpu --devices 1 \
--enable_oversampling_with_replacement --warmup_iterations 300 --max_epochs 40 --peak_lr 5e-3 \
--end_lr 1e-10 --batch_size 16 --default_root_dir training_molkgnn --num_layers 3 \
--num_kernel1_1hop 10 --num_kernel2_1hop 20 --num_kernel3_1hop 30 --num_kernel4_1hop 50 \
--num_kernel1_Nhop 10 --num_kernel2_Nhop 20 --num_kernel3_Nhop 30 --num_kernel4_Nhop 50 \
--node_feature_dim 28 --edge_feature_dim 7 --hidden_dim 32 --seed 1 --task_comment "this is a train on ${seed}"
# done
12 changes: 12 additions & 0 deletions scripts/train.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
set -e

for seed in 1798 1843 2258 2689 9999 435008 435034 463087 485290 488997
do
CUDA_VISIBLE_DEVICES=1 PYTHONPATH=. python entry.py --dataset_name ${seed} --gnn_type "kgnn" --dataset_path dataset/ --num_workers 0 \
--enable_oversampling_with_replacement --warmup_iterations 300 --max_epochs 40 --peak_lr 5e-3 \
--end_lr 1e-10 --batch_size 16 --default_root_dir training_molkgnn --num_layers 3 \
--num_kernel1_1hop 10 --num_kernel2_1hop 20 --num_kernel3_1hop 30 --num_kernel4_1hop 50 \
--num_kernel1_Nhop 10 --num_kernel2_Nhop 20 --num_kernel3_Nhop 30 --num_kernel4_Nhop 50 \
--node_feature_dim 28 --edge_feature_dim 7 --hidden_dim 32 --seed 1 --task_comment "this is a train on ${seed}"
done
Binary file added training_molkgnn/last.ckpt
Binary file not shown.
1 change: 1 addition & 0 deletions wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ def process(self):
invalid_id_series.to_csv(os.path.join(self.processed_dir, f'{self.gnn_type}-{self.dataset}-invalid_id.csv'),
index=False,
header=False)
# TODO: use following lines for collate and save data_list
data, slices = self.collate(data_list)
torch.save((data, slices), self.processed_paths[0])

Expand Down