Skip to content

Commit 85caf8e

Browse files
author
rob
committed
more pd refactoring
1 parent 43d5d22 commit 85caf8e

File tree

3 files changed

+89
-12
lines changed

3 files changed

+89
-12
lines changed

syscore/pandas/pdutils.py

Lines changed: 85 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,15 @@ def closing_date_rows_in_pd_object(
622622
pd_object: Union[pd.DataFrame, pd.Series],
623623
closing_time: pd.DateOffset = NOTIONAL_CLOSING_TIME_AS_PD_OFFSET,
624624
) -> Union[pd.DataFrame, pd.Series]:
625+
"""
626+
>>> d = datetime.datetime
627+
>>> date_index = [d(2000,1,1,15),d(2000,1,1,23), d(2000,1,2,15)]
628+
>>> df = pd.DataFrame(dict(a=[1, 2, 3], b=[4 , 6, 5]), index=date_index)
629+
>>> closing_date_rows_in_pd_object(df)
630+
a b
631+
2000-01-01 23:00:00 2 6
632+
633+
"""
625634
return pd_object[
626635
[
627636
check_time_matches_closing_time_to_second(
@@ -632,26 +641,71 @@ def closing_date_rows_in_pd_object(
632641
]
633642

634643

635-
def intraday_date_rows_in_pd_object(pd_object):
644+
def intraday_date_rows_in_pd_object(
645+
pd_object: Union[pd.DataFrame, pd.Series],
646+
closing_time: pd.DateOffset = NOTIONAL_CLOSING_TIME_AS_PD_OFFSET,
647+
) -> Union[pd.DataFrame, pd.Series]:
648+
"""
649+
>>> d = datetime.datetime
650+
>>> date_index = [d(2000,1,1,15),d(2000,1,1,23), d(2000,1,2,15)]
651+
>>> df = pd.DataFrame(dict(a=[1, 2, 3], b=[4 , 6, 5]), index=date_index)
652+
>>> intraday_date_rows_in_pd_object(df)
653+
a b
654+
2000-01-01 15:00:00 1 4
655+
2000-01-02 15:00:00 3 5
656+
"""
657+
636658
return pd_object[
637659
[
638660
not check_time_matches_closing_time_to_second(
639-
index_entry, NOTIONAL_CLOSING_TIME_AS_PD_OFFSET
661+
index_entry=index_entry, closing_time=closing_time
640662
)
641663
for index_entry in pd_object.index
642664
]
643665
]
644666

645667

646-
def get_intraday_df_at_frequency(df: pd.DataFrame, frequency="H"):
647-
intraday_only_df = intraday_date_rows_in_pd_object(df)
668+
def get_intraday_pdf_at_frequency(
669+
pd_object: Union[pd.DataFrame, pd.Series],
670+
frequency: str = "H",
671+
closing_time: pd.DateOffset = NOTIONAL_CLOSING_TIME_AS_PD_OFFSET,
672+
) -> Union[pd.Series, pd.DataFrame]:
673+
"""
674+
>>> d = datetime.datetime
675+
>>> date_index = [d(2000,1,1,15),d(2000,1,1,16),d(2000,1,1,23), d(2000,1,2,15)]
676+
>>> df = pd.DataFrame(dict(a=[1, 2, 3,4], b=[4,5,6,7]), index=date_index)
677+
>>> get_intraday_pdf_at_frequency(df,"2H")
678+
a b
679+
2000-01-01 14:00:00 1 4
680+
2000-01-02 14:00:00 3 5
681+
"""
682+
intraday_only_df = intraday_date_rows_in_pd_object(
683+
pd_object, closing_time=closing_time
684+
)
648685
intraday_df = intraday_only_df.resample(frequency).last()
649686
intraday_df_clean = intraday_df.dropna()
650687

651688
return intraday_df_clean
652689

653690

654-
def merge_data_with_different_freq(list_of_data: list):
691+
def merge_data_with_different_freq(
692+
list_of_data: List[Union[pd.DataFrame, pd.Series]]
693+
) -> Union[pd.Series, pd.DataFrame]:
694+
"""
695+
>>> d = datetime.datetime
696+
>>> date_index1 = [d(2000,1,1,23),d(2000,1,2,23),d(2000,1,3,23)]
697+
>>> date_index2 = [d(2000,1,1,15),d(2000,1,1,16),d(2000,1,2,15)]
698+
>>> s1 = pd.Series([3,5,6], index=date_index1)
699+
>>> s2 = pd.Series([1,2,4], index=date_index2)
700+
>>> merge_data_with_different_freq([s1,s2])
701+
2000-01-01 15:00:00 1
702+
2000-01-01 16:00:00 2
703+
2000-01-01 23:00:00 3
704+
2000-01-02 15:00:00 4
705+
2000-01-02 23:00:00 5
706+
2000-01-03 23:00:00 6
707+
"""
708+
655709
list_as_concat_pd = pd.concat(list_of_data, axis=0)
656710
sorted_pd = list_as_concat_pd.sort_index()
657711
unique_pd = uniquets(sorted_pd)
@@ -660,9 +714,30 @@ def merge_data_with_different_freq(list_of_data: list):
660714

661715

662716
def sumup_business_days_over_pd_series_without_double_counting_of_closing_data(
663-
pd_series,
664-
):
665-
intraday_data = intraday_date_rows_in_pd_object(pd_series)
717+
pd_series: pd.Series,
718+
closing_time: pd.DateOffset = NOTIONAL_CLOSING_TIME_AS_PD_OFFSET,
719+
) -> pd.Series:
720+
"""
721+
Used for volume data - adds up a series over a day to get a daily total
722+
723+
Uses closing values when available, otherwise sums up intraday values
724+
725+
>>> d = datetime.datetime
726+
>>> date_index1 = [d(2000,2,1,15),d(2000,2,1,16), d(2000,2,1,23), ]
727+
>>> s1 = pd.Series([10,5,17], index=date_index1)
728+
>>> sumup_business_days_over_pd_series_without_double_counting_of_closing_data(s1)
729+
2000-02-01 17
730+
Freq: B, Name: 0, dtype: int64
731+
>>> date_index1 = [d(2000,2,1,15),d(2000,2,1,16), d(2000,2,2,23) ]
732+
>>> s1 = pd.Series([10,5,2], index=date_index1)
733+
>>> sumup_business_days_over_pd_series_without_double_counting_of_closing_data(s1)
734+
2000-02-01 15.0
735+
2000-02-02 2.0
736+
Freq: B, Name: 0, dtype: float64
737+
"""
738+
intraday_data = intraday_date_rows_in_pd_object(
739+
pd_series, closing_time=closing_time
740+
)
666741
if len(intraday_data) == 0:
667742
return pd_series
668743

@@ -683,6 +758,8 @@ def sumup_business_days_over_pd_series_without_double_counting_of_closing_data(
683758
def replace_all_zeros_with_nan(result: pd.Series) -> pd.Series:
684759
check_result = copy(result)
685760
check_result[check_result == 0.0] = np.nan
761+
762+
##
686763
if all(check_result.isna()):
687764
result[:] = np.nan
688765

sysdata/sim/sim_data.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from syscore.dateutils import ARBITRARY_START
66
from syscore.pandas.pdutils import (
77
resample_prices_to_business_day_index,
8-
get_intraday_df_at_frequency,
8+
get_intraday_pdf_at_frequency,
99
)
1010
from sysdata.base_data import baseData
1111

@@ -135,7 +135,7 @@ def _get_hourly_prices_for_directional_instrument(
135135
raise Exception("No adjusted hourly prices for %s" % instrument_code)
136136

137137
# ignore type warning - series or data frame both work
138-
hourly_prices = get_intraday_df_at_frequency(instrprice)
138+
hourly_prices = get_intraday_pdf_at_frequency(instrprice)
139139

140140
return hourly_prices
141141

sysinit/futures/create_hourly_and_daily.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from syscore.dateutils import DAILY_PRICE_FREQ, HOURLY_FREQ
22
from syscore.pandas.pdutils import (
3-
get_intraday_df_at_frequency,
3+
get_intraday_pdf_at_frequency,
44
closing_date_rows_in_pd_object,
55
)
66
from sysdata.arctic.arctic_futures_per_contract_prices import (
@@ -19,7 +19,7 @@ def write_split_data_for_instrument(instrument_code):
1919
if len(merged_data) == 0:
2020
continue
2121
daily_data = closing_date_rows_in_pd_object(merged_data)
22-
hourly_data = get_intraday_df_at_frequency(merged_data, frequency="H")
22+
hourly_data = get_intraday_pdf_at_frequency(merged_data, frequency="H")
2323
if len(daily_data) > 0:
2424
a.write_prices_at_frequency_for_contract_object(
2525
contract,

0 commit comments

Comments
 (0)