Skip to content

Commit 7a7d0e7

Browse files
committed
Data Feed Update 2.3.9
Data Feed Update 2.3.9
1 parent 3b7aea6 commit 7a7d0e7

19 files changed

+504
-190
lines changed

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,7 @@
1717
alphapy/examples/Trading System/.ipynb_checkpoints/A Trading System-checkpoint.ipynb
1818
*.pkl
1919
*.png
20+
*.code-workspace
21+
alphapy/.vscode/launch.json
22+
alphapy/.vscode/settings.json
23+
*.log

alphapy/__main__.py

+14-9
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
import numpy as np
6767
import os
6868
import pandas as pd
69+
from sklearn.model_selection import train_test_split
6970
import sys
7071
import warnings
7172
warnings.simplefilter(action='ignore', category=DeprecationWarning)
@@ -115,18 +116,27 @@ def training_pipeline(model):
115116
feature_selection = model.specs['feature_selection']
116117
grid_search = model.specs['grid_search']
117118
model_type = model.specs['model_type']
118-
predict_mode = model.specs['predict_mode']
119119
rfe = model.specs['rfe']
120120
sampling = model.specs['sampling']
121121
scorer = model.specs['scorer']
122+
seed = model.specs['seed']
122123
separator = model.specs['separator']
124+
split = model.specs['split']
123125
target = model.specs['target']
124126

125127
# Get train and test data
126128

127129
X_train, y_train = get_data(model, Partition.train)
128130
X_test, y_test = get_data(model, Partition.test)
129131

132+
# If there is no test partition, then we will split the train partition
133+
134+
if X_test.empty:
135+
logger.info("No Test Data Found")
136+
logger.info("Splitting Training Data")
137+
X_train, X_test, y_train, y_test = train_test_split(
138+
X_train, y_train, test_size=split, random_state=seed)
139+
130140
# Determine if there are any test labels
131141

132142
if y_test.any():
@@ -311,11 +321,9 @@ def prediction_pipeline(model):
311321

312322
directory = model.specs['directory']
313323
drop = model.specs['drop']
314-
extension = model.specs['extension']
315324
feature_selection = model.specs['feature_selection']
316325
model_type = model.specs['model_type']
317326
rfe = model.specs['rfe']
318-
separator = model.specs['separator']
319327

320328
# Get all data. We need original train and test for interactions.
321329

@@ -379,15 +387,12 @@ def prediction_pipeline(model):
379387
if model_type == ModelType.classification:
380388
model.probas[(tag, partition)] = predictor.predict_proba(all_features)[:, 1]
381389

382-
# Get date stamp to record file creation
383-
384-
d = datetime.now()
385-
f = "%Y%m%d"
386-
timestamp = d.strftime(f)
387-
388390
# Save predictions
389391
save_predictions(model, tag, partition)
390392

393+
# Return the model
394+
return model
395+
391396

392397
#
393398
# Function main_pipeline

alphapy/analysis.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Module : analysis
55
# Created : July 11, 2013
66
#
7-
# Copyright 2017 ScottFree Analytics LLC
7+
# Copyright 2019 ScottFree Analytics LLC
88
# Mark Conway & Robert D. Scott II
99
#
1010
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -95,7 +95,7 @@ class Analysis(object):
9595
analyses = {}
9696

9797
# __new__
98-
98+
9999
def __new__(cls,
100100
model,
101101
group):
@@ -123,7 +123,7 @@ def __init__(self,
123123
self.group = group
124124
# add analysis to analyses list
125125
Analysis.analyses[an] = self
126-
126+
127127
# __str__
128128

129129
def __str__(self):
@@ -192,9 +192,6 @@ def run_analysis(analysis, lag_period, forecast_period, leaders,
192192
# Calculate split date
193193
logger.info("Analysis Dates")
194194
split_date = subtract_days(predict_date, predict_history)
195-
logger.info("Train Date: %s", train_date)
196-
logger.info("Split Date: %s", split_date)
197-
logger.info("Test Date: %s", predict_date)
198195

199196
# Load the data frames
200197
data_frames = load_frames(group, directory, extension, separator, splits)
@@ -203,9 +200,11 @@ def run_analysis(analysis, lag_period, forecast_period, leaders,
203200

204201
if predict_mode:
205202
# create predict frame
203+
logger.info("Split Date for Prediction Mode: %s", split_date)
206204
predict_frame = pd.DataFrame()
207205
else:
208206
# create train and test frames
207+
logger.info("Split Date for Training Mode: %s", predict_date)
209208
train_frame = pd.DataFrame()
210209
test_frame = pd.DataFrame()
211210

@@ -232,11 +231,11 @@ def run_analysis(analysis, lag_period, forecast_period, leaders,
232231
tag)
233232
else:
234233
# split data into train and test
235-
new_train = df.loc[(df.index >= train_date) & (df.index < split_date)]
234+
new_train = df.loc[(df.index >= train_date) & (df.index < predict_date)]
236235
if len(new_train) > 0:
237236
new_train = new_train.dropna()
238237
train_frame = train_frame.append(new_train)
239-
new_test = df.loc[(df.index >= split_date) & (df.index <= last_date)]
238+
new_test = df.loc[(df.index >= predict_date) & (df.index <= last_date)]
240239
if len(new_test) > 0:
241240
# check if target column has NaN values
242241
nan_count = df[target].isnull().sum()

0 commit comments

Comments
 (0)