@@ -622,6 +622,15 @@ def closing_date_rows_in_pd_object(
622
622
pd_object : Union [pd .DataFrame , pd .Series ],
623
623
closing_time : pd .DateOffset = NOTIONAL_CLOSING_TIME_AS_PD_OFFSET ,
624
624
) -> Union [pd .DataFrame , pd .Series ]:
625
+ """
626
+ >>> d = datetime.datetime
627
+ >>> date_index = [d(2000,1,1,15),d(2000,1,1,23), d(2000,1,2,15)]
628
+ >>> df = pd.DataFrame(dict(a=[1, 2, 3], b=[4 , 6, 5]), index=date_index)
629
+ >>> closing_date_rows_in_pd_object(df)
630
+ a b
631
+ 2000-01-01 23:00:00 2 6
632
+
633
+ """
625
634
return pd_object [
626
635
[
627
636
check_time_matches_closing_time_to_second (
@@ -632,26 +641,71 @@ def closing_date_rows_in_pd_object(
632
641
]
633
642
634
643
635
- def intraday_date_rows_in_pd_object (pd_object ):
644
+ def intraday_date_rows_in_pd_object (
645
+ pd_object : Union [pd .DataFrame , pd .Series ],
646
+ closing_time : pd .DateOffset = NOTIONAL_CLOSING_TIME_AS_PD_OFFSET ,
647
+ ) -> Union [pd .DataFrame , pd .Series ]:
648
+ """
649
+ >>> d = datetime.datetime
650
+ >>> date_index = [d(2000,1,1,15),d(2000,1,1,23), d(2000,1,2,15)]
651
+ >>> df = pd.DataFrame(dict(a=[1, 2, 3], b=[4 , 6, 5]), index=date_index)
652
+ >>> intraday_date_rows_in_pd_object(df)
653
+ a b
654
+ 2000-01-01 15:00:00 1 4
655
+ 2000-01-02 15:00:00 3 5
656
+ """
657
+
636
658
return pd_object [
637
659
[
638
660
not check_time_matches_closing_time_to_second (
639
- index_entry , NOTIONAL_CLOSING_TIME_AS_PD_OFFSET
661
+ index_entry = index_entry , closing_time = closing_time
640
662
)
641
663
for index_entry in pd_object .index
642
664
]
643
665
]
644
666
645
667
646
- def get_intraday_df_at_frequency (df : pd .DataFrame , frequency = "H" ):
647
- intraday_only_df = intraday_date_rows_in_pd_object (df )
668
+ def get_intraday_pdf_at_frequency (
669
+ pd_object : Union [pd .DataFrame , pd .Series ],
670
+ frequency : str = "H" ,
671
+ closing_time : pd .DateOffset = NOTIONAL_CLOSING_TIME_AS_PD_OFFSET ,
672
+ ) -> Union [pd .Series , pd .DataFrame ]:
673
+ """
674
+ >>> d = datetime.datetime
675
+ >>> date_index = [d(2000,1,1,15),d(2000,1,1,16),d(2000,1,1,23), d(2000,1,2,15)]
676
+ >>> df = pd.DataFrame(dict(a=[1, 2, 3,4], b=[4,5,6,7]), index=date_index)
677
+ >>> get_intraday_pdf_at_frequency(df,"2H")
678
+ a b
679
+ 2000-01-01 14:00:00 1 4
680
+ 2000-01-02 14:00:00 3 5
681
+ """
682
+ intraday_only_df = intraday_date_rows_in_pd_object (
683
+ pd_object , closing_time = closing_time
684
+ )
648
685
intraday_df = intraday_only_df .resample (frequency ).last ()
649
686
intraday_df_clean = intraday_df .dropna ()
650
687
651
688
return intraday_df_clean
652
689
653
690
654
- def merge_data_with_different_freq (list_of_data : list ):
691
+ def merge_data_with_different_freq (
692
+ list_of_data : List [Union [pd .DataFrame , pd .Series ]]
693
+ ) -> Union [pd .Series , pd .DataFrame ]:
694
+ """
695
+ >>> d = datetime.datetime
696
+ >>> date_index1 = [d(2000,1,1,23),d(2000,1,2,23),d(2000,1,3,23)]
697
+ >>> date_index2 = [d(2000,1,1,15),d(2000,1,1,16),d(2000,1,2,15)]
698
+ >>> s1 = pd.Series([3,5,6], index=date_index1)
699
+ >>> s2 = pd.Series([1,2,4], index=date_index2)
700
+ >>> merge_data_with_different_freq([s1,s2])
701
+ 2000-01-01 15:00:00 1
702
+ 2000-01-01 16:00:00 2
703
+ 2000-01-01 23:00:00 3
704
+ 2000-01-02 15:00:00 4
705
+ 2000-01-02 23:00:00 5
706
+ 2000-01-03 23:00:00 6
707
+ """
708
+
655
709
list_as_concat_pd = pd .concat (list_of_data , axis = 0 )
656
710
sorted_pd = list_as_concat_pd .sort_index ()
657
711
unique_pd = uniquets (sorted_pd )
@@ -660,9 +714,30 @@ def merge_data_with_different_freq(list_of_data: list):
660
714
661
715
662
716
def sumup_business_days_over_pd_series_without_double_counting_of_closing_data (
663
- pd_series ,
664
- ):
665
- intraday_data = intraday_date_rows_in_pd_object (pd_series )
717
+ pd_series : pd .Series ,
718
+ closing_time : pd .DateOffset = NOTIONAL_CLOSING_TIME_AS_PD_OFFSET ,
719
+ ) -> pd .Series :
720
+ """
721
+ Used for volume data - adds up a series over a day to get a daily total
722
+
723
+ Uses closing values when available, otherwise sums up intraday values
724
+
725
+ >>> d = datetime.datetime
726
+ >>> date_index1 = [d(2000,2,1,15),d(2000,2,1,16), d(2000,2,1,23), ]
727
+ >>> s1 = pd.Series([10,5,17], index=date_index1)
728
+ >>> sumup_business_days_over_pd_series_without_double_counting_of_closing_data(s1)
729
+ 2000-02-01 17
730
+ Freq: B, Name: 0, dtype: int64
731
+ >>> date_index1 = [d(2000,2,1,15),d(2000,2,1,16), d(2000,2,2,23) ]
732
+ >>> s1 = pd.Series([10,5,2], index=date_index1)
733
+ >>> sumup_business_days_over_pd_series_without_double_counting_of_closing_data(s1)
734
+ 2000-02-01 15.0
735
+ 2000-02-02 2.0
736
+ Freq: B, Name: 0, dtype: float64
737
+ """
738
+ intraday_data = intraday_date_rows_in_pd_object (
739
+ pd_series , closing_time = closing_time
740
+ )
666
741
if len (intraday_data ) == 0 :
667
742
return pd_series
668
743
@@ -683,6 +758,8 @@ def sumup_business_days_over_pd_series_without_double_counting_of_closing_data(
683
758
def replace_all_zeros_with_nan (result : pd .Series ) -> pd .Series :
684
759
check_result = copy (result )
685
760
check_result [check_result == 0.0 ] = np .nan
761
+
762
+ ##
686
763
if all (check_result .isna ()):
687
764
result [:] = np .nan
688
765
0 commit comments