31
31
from alphapy .frame import read_frame
32
32
from alphapy .globals import ModelType
33
33
from alphapy .globals import Partition , datasets
34
- from alphapy .globals import PSEP , SSEP
34
+ from alphapy .globals import PD_INTRADAY_OFFSETS
35
+ from alphapy .globals import PD_WEB_DATA_FEEDS
36
+ from alphapy .globals import PSEP , SSEP , USEP
35
37
from alphapy .globals import SamplingMethod
36
38
from alphapy .globals import WILDCARD
37
39
@@ -282,6 +284,53 @@ def sample_data(model):
282
284
return model
283
285
284
286
287
+ #
288
+ # Function enhance_intraday_data
289
+ #
290
+
291
+ def enhance_intraday_data (df ):
292
+ r"""Add columns to the intraday dataframe.
293
+
294
+ Parameters
295
+ ----------
296
+ df : pandas.DataFrame
297
+ The intraday dataframe.
298
+
299
+ Returns
300
+ -------
301
+ df : pandas.DataFrame
302
+ The dataframe with bar number and end-of-day columns.
303
+
304
+ """
305
+
306
+ # Convert the columns to proper data types
307
+
308
+ index_column = 'datetime'
309
+ dt_column = df ['date' ] + ' ' + df ['time' ]
310
+ df [index_column ] = pd .to_datetime (dt_column )
311
+ cols_float = ['open' , 'high' , 'low' , 'close' , 'volume' ]
312
+ df [cols_float ] = df [cols_float ].astype (float )
313
+
314
+ # Number the intraday bars
315
+
316
+ date_group = df .groupby ('date' )
317
+ df ['bar_number' ] = date_group .cumcount ()
318
+
319
+ # Mark the end of the trading day
320
+
321
+ df ['end_of_day' ] = False
322
+ df .loc [date_group .tail (1 ).index , 'end_of_day' ] = True
323
+
324
+ # Set the data frame's index
325
+ df .set_index (pd .DatetimeIndex (df [index_column ]), drop = True , inplace = True )
326
+
327
+ # Return the enhanced frame
328
+
329
+ del df ['date' ]
330
+ del df ['time' ]
331
+ return df
332
+
333
+
285
334
#
286
335
# Function get_google_data
287
336
#
@@ -345,25 +394,12 @@ def get_google_data(symbol, lookback_period, fractal):
345
394
dt = datetime .fromtimestamp (day_item + (interval * offset ))
346
395
dt = pd .to_datetime (dt )
347
396
dt_date = dt .strftime ('%Y-%m-%d' )
348
- record = (dt , dt_date , open_item , high_item , low_item , close_item , volume_item )
397
+ dt_time = dt .strftime ('%H:%M:%S' )
398
+ record = (dt_date , dt_time , open_item , high_item , low_item , close_item , volume_item )
349
399
records .append (record )
350
400
# create data frame
351
- cols = ['datetime ' , 'date ' , 'open' , 'high' , 'low' , 'close' , 'volume' ]
401
+ cols = ['date ' , 'time ' , 'open' , 'high' , 'low' , 'close' , 'volume' ]
352
402
df = pd .DataFrame .from_records (records , columns = cols )
353
- # convert to proper data types
354
- cols_float = ['open' , 'high' , 'low' , 'close' ]
355
- df [cols_float ] = df [cols_float ].astype (float )
356
- df ['volume' ] = df ['volume' ].astype (int )
357
- # number the intraday bars
358
- date_group = df .groupby ('date' )
359
- df ['bar_number' ] = date_group .cumcount ()
360
- # mark the end of the trading day
361
- df ['end_of_day' ] = False
362
- del df ['date' ]
363
- df .loc [date_group .tail (1 ).index , 'end_of_day' ] = True
364
- # set the index to datetime
365
- df .index = df ['datetime' ]
366
- del df ['datetime' ]
367
403
# return the dataframe
368
404
return df
369
405
@@ -373,7 +409,7 @@ def get_google_data(symbol, lookback_period, fractal):
373
409
#
374
410
375
411
def get_pandas_data (schema , symbol , lookback_period ):
376
- r"""Get Yahoo Finance daily data.
412
+ r"""Get Pandas Web Reader data.
377
413
378
414
Parameters
379
415
----------
@@ -391,32 +427,39 @@ def get_pandas_data(schema, symbol, lookback_period):
391
427
392
428
"""
393
429
430
+ # Quandl is a special case.
431
+
432
+ if 'quandl' in schema :
433
+ schema , symbol_prefix = schema .split (USEP )
434
+ symbol = SSEP .join ([symbol_prefix , symbol ]).upper ()
435
+
394
436
# Calculate the start and end date for Yahoo.
395
437
396
438
start = datetime .now () - timedelta (lookback_period )
397
439
end = datetime .now ()
398
440
399
441
# Call the Pandas Web data reader.
400
442
401
- df = None
402
443
try :
403
444
df = web .DataReader (symbol , schema , start , end )
404
- df = df .rename (columns = lambda x : x .lower ().replace (' ' ,'' ))
405
445
except :
446
+ df = None
406
447
logger .info ("Could not retrieve data for: %s" , symbol )
407
448
408
449
return df
409
450
410
451
411
452
#
412
- # Function get_feed_data
453
+ # Function get_market_data
413
454
#
414
455
415
- def get_feed_data ( group , lookback_period ):
456
+ def get_market_data ( model , group , lookback_period , resample_data ):
416
457
r"""Get data from an external feed.
417
458
418
459
Parameters
419
460
----------
461
+ model : alphapy.Model
462
+ The model object describing the data.
420
463
group : alphapy.Group
421
464
The group of symbols.
422
465
lookback_period : int
@@ -429,27 +472,71 @@ def get_feed_data(group, lookback_period):
429
472
430
473
"""
431
474
475
+ # Unpack model specifications
476
+
477
+ directory = model .specs ['directory' ]
478
+ extension = model .specs ['extension' ]
479
+ separator = model .specs ['separator' ]
480
+
481
+ # Unpack group elements
482
+
432
483
gspace = group .space
433
484
schema = gspace .schema
434
485
fractal = gspace .fractal
486
+
435
487
# Determine the feed source
436
- if 'd' in fractal :
437
- # daily data (date only)
438
- logger .info ("Getting Daily Data" )
439
- daily_data = True
440
- else :
488
+
489
+ if any (substring in fractal for substring in PD_INTRADAY_OFFSETS ):
441
490
# intraday data (date and time)
442
- logger .info ("Getting Intraday Data (Google 50-day limit)" )
443
- daily_data = False
491
+ logger .info ("Getting Intraday Data [%s] from %s" , fractal , schema )
492
+ intraday_data = True
493
+ index_column = 'datetime'
494
+ else :
495
+ # daily data or higher (date only)
496
+ logger .info ("Getting Daily Data [%s] from %s" , fractal , schema )
497
+ intraday_data = False
498
+ index_column = 'date'
499
+
444
500
# Get the data from the relevant feed
501
+
502
+ data_dir = SSEP .join ([directory , 'data' ])
503
+ pandas_data = any (substring in schema for substring in PD_WEB_DATA_FEEDS )
445
504
n_periods = 0
505
+
446
506
for item in group .members :
447
507
logger .info ("Getting %s data for last %d days" , item , lookback_period )
448
- if daily_data :
508
+ # Locate the data source
509
+ if schema == 'data' :
510
+ fname = frame_name (item .lower (), gspace )
511
+ df = read_frame (data_dir , fname , extension , separator )
512
+ if not intraday_data :
513
+ df .set_index (pd .DatetimeIndex (df [index_column ]),
514
+ drop = True , inplace = True )
515
+ elif schema == 'google' and intraday_data :
516
+ df = get_google_data (item , lookback_period , fractal )
517
+ elif pandas_data :
449
518
df = get_pandas_data (schema , item , lookback_period )
450
519
else :
451
- df = get_google_data (item , lookback_period , fractal )
520
+ logger .error ("Unsupported Data Source: %s" , schema )
521
+ # Now that we have content, standardize the data
452
522
if df is not None and not df .empty :
523
+ logger .info ("Rows: %d" , len (df ))
524
+ # standardize column names
525
+ df = df .rename (columns = lambda x : x .lower ().replace (' ' ,'' ))
526
+ # add intraday columns if necessary
527
+ if intraday_data :
528
+ df = enhance_intraday_data (df )
529
+ # order by increasing date if necessary
530
+ df = df .sort_index ()
531
+ # resample data
532
+ if resample_data :
533
+ df = df .resample (fractal ).agg ({'open' : 'first' ,
534
+ 'high' : 'max' ,
535
+ 'low' : 'min' ,
536
+ 'close' : 'last' ,
537
+ 'volume' : 'sum' })
538
+ logger .info ("Rows after Resampling at %s: %d" ,
539
+ fractal , len (df ))
453
540
# allocate global Frame
454
541
newf = Frame (item .lower (), gspace , df )
455
542
if newf is None :
@@ -460,5 +547,6 @@ def get_feed_data(group, lookback_period):
460
547
n_periods = df_len
461
548
else :
462
549
logger .info ("No DataFrame for %s" , item )
550
+
463
551
# The number of periods actually retrieved
464
552
return n_periods
0 commit comments