Skip to content

Commit 3c8a915

Browse files
author
Balázs Hidasi
committed
Model variant selection added to run.py
1 parent 87971fe commit 3c8a915

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__pycache__

run.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def __init__(self, *args, **kwargs):
1919
parser.add_argument('-ss', '--sample_store_size', metavar='SS', type=int, default=10000000, help='GRU4Rec uses a buffer for negative samples during training to maximize GPU utilization. This parameter sets the buffer length. Lower values require more frequent recomputation, higher values use more (GPU) memory. Unless you know what you are doing, you shouldn\'t mess with this parameter. (Default: 10000000)')
2020
parser.add_argument('--sample_store_on_cpu', action='store_true', help='If provided, the sample store will be stored in the RAM instead of the GPU memory. This is not advised in most cases, because it significantly lowers the GPU utilization. This option is provided if for some reason you want to train the model on the CPU (NOT advised).')
2121
parser.add_argument('--test_against_items', metavar='N_TEST_ITEMS', type=int, help='It is NOT advised to evaluate recommender algorithms by ranking a single positive item against a set of sampled negatives. It overestimates recommendation performance and also skewes comparisons, as it affects algorithms differently (and if a different sequence of random samples is used, the results are downright uncomparable). If testing takes too much time, it is advised to sample test sessions to create a smaller test set. However, if the number of items is very high (i.e. ABOVE FEW MILLIONS), it might be impossible to evaluate the model within a reasonable time, even on a smaller (but still representative) test set. In this case, and this case only, one can sample items to evaluate against. This option allows to rank the positive item against the N_TEST_ITEMS most popular items. This has a lesser effect on comparison and it is a much stronger criteria than ranking against randomly sampled items. Keep in mind that the real performcance of the algorithm will still be overestimated by the results, but comparison will be mostly fair. If used, you should NEVER SET THIS PARAMETER BELOW 50000 and try to set it as high as possible (for your required evaluation time). (Default: all items are used as negatives for evaluation)')
22+
parser.add_argument('-g', '--gru4rec_model', metavar='GRFILE', type=str, default='gru4rec', help='Name of the file containing the GRU4Rec class. Can be sued to select different varaiants. (Default: gru4rec)')
2223
args = parser.parse_args()
2324

2425
import os.path
@@ -30,7 +31,8 @@ def __init__(self, *args, **kwargs):
3031
import sys
3132
import time
3233
from collections import OrderedDict
33-
from gru4rec import GRU4Rec
34+
import importlib
35+
GRU4Rec = importlib.import_module(args.gru4rec_model).GRU4Rec
3436
import evaluation
3537
import importlib.util
3638
import joblib
@@ -68,7 +70,7 @@ def load_data(fname, gru):
6870
print('The default column name is "Time", but you can specify otherwise by setting the `time_key` parameter of the model.')
6971
sys.exit(1)
7072
print('Loading data from TAB separated file: {}'.format(fname))
71-
data = pd.read_csv(fname, sep='\t', usecols=[gru.session_key, gru.item_key, gru.time_key], dtype={gru.session_key:'int32', gru.item_key:np.str})
73+
data = pd.read_csv(fname, sep='\t', usecols=[gru.session_key, gru.item_key, gru.time_key], dtype={gru.session_key:'int32', gru.item_key:'str'})
7274
return data
7375

7476
if (args.parameter_string is not None) + (args.parameter_file is not None) + (args.load_model) != 1:

0 commit comments

Comments
 (0)