Skip to content

Commit f1aac43

Browse files
mroeschkejreback
authored andcommitted
CLN: ASV frame_ctor benchmark (#18499)
1 parent c44a063 commit f1aac43

File tree

2 files changed

+43
-41
lines changed

2 files changed

+43
-41
lines changed

asv_bench/benchmarks/frame_ctor.py

+29-41
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,33 @@
1-
from .pandas_vb_common import *
1+
import numpy as np
2+
import pandas.util.testing as tm
3+
from pandas import DataFrame, Series, MultiIndex, Timestamp, date_range
24
try:
3-
from pandas.tseries.offsets import *
5+
from pandas.tseries import offsets
46
except:
57
from pandas.core.datetools import *
68

79

8-
#----------------------------------------------------------------------
10+
# ----------------------------------------------------------------------
911
# Creation from nested dict
1012

1113
class FromDicts(object):
14+
1215
goal_time = 0.2
1316

1417
def setup(self):
15-
(N, K) = (5000, 50)
18+
np.random.seed(1234)
19+
N, K = 5000, 50
1620
self.index = tm.makeStringIndex(N)
1721
self.columns = tm.makeStringIndex(K)
18-
self.frame = DataFrame(np.random.randn(N, K), index=self.index, columns=self.columns)
19-
try:
20-
self.data = self.frame.to_dict()
21-
except:
22-
self.data = self.frame.toDict()
22+
self.frame = DataFrame(np.random.randn(N, K),
23+
index=self.index,
24+
columns=self.columns)
25+
self.data = self.frame.to_dict()
2326
self.some_dict = list(self.data.values())[0]
24-
self.dict_list = [dict(zip(self.columns, row)) for row in self.frame.values]
25-
27+
self.dict_list = self.frame.to_dict(orient='records')
2628
self.data2 = {i: {j: float(j) for j in range(100)}
2729
for i in range(2000)}
2830

29-
3031
def time_frame_ctor_list_of_dict(self):
3132
DataFrame(self.dict_list)
3233

@@ -38,38 +39,21 @@ def time_series_ctor_from_dict(self):
3839

3940
def time_frame_ctor_nested_dict_int64(self):
4041
# nested dict, integer indexes, regression described in #621
41-
DataFrame(self.data)
42+
DataFrame(self.data2)
4243

4344

4445
# from a mi-series
4546

46-
class frame_from_series(object):
47+
class FromSeries(object):
4748
goal_time = 0.2
4849

4950
def setup(self):
50-
self.mi = MultiIndex.from_tuples([(x, y) for x in range(100) for y in range(100)])
51-
self.s = Series(randn(10000), index=self.mi)
51+
self.mi = MultiIndex.from_product([range(100), range(100)])
52+
self.s = Series(np.random.randn(10000), index=self.mi)
5253

5354
def time_frame_from_mi_series(self):
5455
DataFrame(self.s)
5556

56-
57-
#----------------------------------------------------------------------
58-
# get_numeric_data
59-
60-
class frame_get_numeric_data(object):
61-
goal_time = 0.2
62-
63-
def setup(self):
64-
self.df = DataFrame(randn(10000, 25))
65-
self.df['foo'] = 'bar'
66-
self.df['bar'] = 'baz'
67-
self.df = self.df.consolidate()
68-
69-
def time_frame_get_numeric_data(self):
70-
self.df._get_numeric_data()
71-
72-
7357
# ----------------------------------------------------------------------
7458
# From dict with DatetimeIndex with all offsets
7559

@@ -84,13 +68,15 @@ def get_period_count(start_date, off):
8468
if (ten_offsets_in_days == 0):
8569
return 1000
8670
else:
87-
return min((9 * ((Timestamp.max - start_date).days // ten_offsets_in_days)), 1000)
71+
periods = 9 * (Timestamp.max - start_date).days // ten_offsets_in_days
72+
return min(periods, 1000)
8873

8974

9075
def get_index_for_offset(off):
9176
start_date = Timestamp('1/1/1900')
92-
return date_range(start_date, periods=min(1000, get_period_count(
93-
start_date, off)), freq=off)
77+
return date_range(start_date,
78+
periods=get_period_count(start_date, off),
79+
freq=off)
9480

9581

9682
all_offsets = offsets.__all__
@@ -100,21 +86,23 @@ def get_index_for_offset(off):
10086
all_offsets.extend([off + '_1', off + '_2'])
10187

10288

103-
class FrameConstructorDTIndexFromOffsets(object):
89+
class FromDictwithTimestampOffsets(object):
10490

10591
params = [all_offsets, [1, 2]]
10692
param_names = ['offset', 'n_steps']
10793

10894
offset_kwargs = {'WeekOfMonth': {'weekday': 1, 'week': 1},
10995
'LastWeekOfMonth': {'weekday': 1, 'week': 1},
11096
'FY5253': {'startingMonth': 1, 'weekday': 1},
111-
'FY5253Quarter': {'qtr_with_extra_week': 1, 'startingMonth': 1, 'weekday': 1}}
97+
'FY5253Quarter': {'qtr_with_extra_week': 1,
98+
'startingMonth': 1,
99+
'weekday': 1}}
112100

113101
offset_extra_cases = {'FY5253': {'variation': ['nearest', 'last']},
114102
'FY5253Quarter': {'variation': ['nearest', 'last']}}
115103

116104
def setup(self, offset, n_steps):
117-
105+
np.random.seed(1234)
118106
extra = False
119107
if offset.endswith("_", None, -1):
120108
extra = int(offset[-1])
@@ -127,12 +115,12 @@ def setup(self, offset, n_steps):
127115
if extra:
128116
extras = self.offset_extra_cases[offset]
129117
for extra_arg in extras:
130-
kwargs[extra_arg] = extras[extra_arg][extra -1]
118+
kwargs[extra_arg] = extras[extra_arg][extra - 1]
131119

132120
offset = getattr(offsets, offset)
133121
self.idx = get_index_for_offset(offset(n_steps, **kwargs))
134122
self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx)
135-
self.d = dict(self.df.items())
123+
self.d = self.df.to_dict()
136124

137125
def time_frame_ctor(self, offset, n_steps):
138126
DataFrame(self.d)

asv_bench/benchmarks/frame_methods.py

+14
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
11
from .pandas_vb_common import *
22
import string
33

4+
#----------------------------------------------------------------------
5+
# get_numeric_data
6+
7+
class frame_get_numeric_data(object):
8+
goal_time = 0.2
9+
10+
def setup(self):
11+
self.df = DataFrame(np.random.randn(10000, 25))
12+
self.df['foo'] = 'bar'
13+
self.df['bar'] = 'baz'
14+
self.df = self.df.consolidate()
15+
16+
def time_frame_get_numeric_data(self):
17+
self.df._get_numeric_data()
418

519
#----------------------------------------------------------------------
620
# lookup

0 commit comments

Comments
 (0)