18
18
import sys
19
19
20
20
import numpy
21
- from setuptools import Command , Extension , find_packages , setup
21
+ from setuptools import Command , Extension , setup
22
22
from setuptools .command .build_ext import build_ext as _build_ext
23
23
24
24
import versioneer
@@ -34,7 +34,6 @@ def is_platform_mac():
34
34
return sys .platform == "darwin"
35
35
36
36
37
- min_numpy_ver = "1.16.5"
38
37
min_cython_ver = "0.29.21" # note: sync with pyproject.toml
39
38
40
39
try :
@@ -99,96 +98,6 @@ def build_extensions(self):
99
98
super ().build_extensions ()
100
99
101
100
102
- DESCRIPTION = "Powerful data structures for data analysis, time series, and statistics"
103
- LONG_DESCRIPTION = """
104
- **pandas** is a Python package that provides fast, flexible, and expressive data
105
- structures designed to make working with structured (tabular, multidimensional,
106
- potentially heterogeneous) and time series data both easy and intuitive. It
107
- aims to be the fundamental high-level building block for doing practical,
108
- **real world** data analysis in Python. Additionally, it has the broader goal
109
- of becoming **the most powerful and flexible open source data analysis /
110
- manipulation tool available in any language**. It is already well on its way
111
- toward this goal.
112
-
113
- pandas is well suited for many different kinds of data:
114
-
115
- - Tabular data with heterogeneously-typed columns, as in an SQL table or
116
- Excel spreadsheet
117
- - Ordered and unordered (not necessarily fixed-frequency) time series data.
118
- - Arbitrary matrix data (homogeneously typed or heterogeneous) with row and
119
- column labels
120
- - Any other form of observational / statistical data sets. The data actually
121
- need not be labeled at all to be placed into a pandas data structure
122
-
123
- The two primary data structures of pandas, Series (1-dimensional) and DataFrame
124
- (2-dimensional), handle the vast majority of typical use cases in finance,
125
- statistics, social science, and many areas of engineering. For R users,
126
- DataFrame provides everything that R's ``data.frame`` provides and much
127
- more. pandas is built on top of `NumPy <https://www.numpy.org>`__ and is
128
- intended to integrate well within a scientific computing environment with many
129
- other 3rd party libraries.
130
-
131
- Here are just a few of the things that pandas does well:
132
-
133
- - Easy handling of **missing data** (represented as NaN) in floating point as
134
- well as non-floating point data
135
- - Size mutability: columns can be **inserted and deleted** from DataFrame and
136
- higher dimensional objects
137
- - Automatic and explicit **data alignment**: objects can be explicitly
138
- aligned to a set of labels, or the user can simply ignore the labels and
139
- let `Series`, `DataFrame`, etc. automatically align the data for you in
140
- computations
141
- - Powerful, flexible **group by** functionality to perform
142
- split-apply-combine operations on data sets, for both aggregating and
143
- transforming data
144
- - Make it **easy to convert** ragged, differently-indexed data in other
145
- Python and NumPy data structures into DataFrame objects
146
- - Intelligent label-based **slicing**, **fancy indexing**, and **subsetting**
147
- of large data sets
148
- - Intuitive **merging** and **joining** data sets
149
- - Flexible **reshaping** and pivoting of data sets
150
- - **Hierarchical** labeling of axes (possible to have multiple labels per
151
- tick)
152
- - Robust IO tools for loading data from **flat files** (CSV and delimited),
153
- Excel files, databases, and saving / loading data from the ultrafast **HDF5
154
- format**
155
- - **Time series**-specific functionality: date range generation and frequency
156
- conversion, moving window statistics, date shifting and lagging.
157
-
158
- Many of these principles are here to address the shortcomings frequently
159
- experienced using other languages / scientific research environments. For data
160
- scientists, working with data is typically divided into multiple stages:
161
- munging and cleaning data, analyzing / modeling it, then organizing the results
162
- of the analysis into a form suitable for plotting or tabular display. pandas is
163
- the ideal tool for all of these tasks.
164
- """
165
-
166
- DISTNAME = "pandas"
167
- LICENSE = "BSD"
168
- AUTHOR = "The PyData Development Team"
169
-
170
- URL = "https://pandas.pydata.org"
171
- DOWNLOAD_URL = ""
172
- PROJECT_URLS = {
173
- "Bug Tracker" : "https://github.com/pandas-dev/pandas/issues" ,
174
- "Documentation" : "https://pandas.pydata.org/pandas-docs/stable/" ,
175
- "Source Code" : "https://github.com/pandas-dev/pandas" ,
176
- }
177
- CLASSIFIERS = [
178
- "Development Status :: 5 - Production/Stable" ,
179
- "Environment :: Console" ,
180
- "Operating System :: OS Independent" ,
181
- "Intended Audience :: Science/Research" ,
182
- "Programming Language :: Python" ,
183
- "Programming Language :: Python :: 3" ,
184
- "Programming Language :: Python :: 3.7" ,
185
- "Programming Language :: Python :: 3.8" ,
186
- "Programming Language :: Python :: 3.9" ,
187
- "Programming Language :: Cython" ,
188
- "Topic :: Scientific/Engineering" ,
189
- ]
190
-
191
-
192
101
class CleanCommand (Command ):
193
102
"""Custom distutils command to clean the .so and .pyc files."""
194
103
@@ -711,51 +620,11 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
711
620
# ----------------------------------------------------------------------
712
621
713
622
714
- def setup_package ():
715
- setuptools_kwargs = {
716
- "install_requires" : [
717
- "python-dateutil >= 2.7.3" ,
718
- "pytz >= 2017.3" ,
719
- f"numpy >= { min_numpy_ver } " ,
720
- ],
721
- "setup_requires" : [f"numpy >= { min_numpy_ver } " ],
722
- "zip_safe" : False ,
723
- }
724
-
623
+ if __name__ == "__main__" :
624
+ # Freeze to support parallel compilation when using spawn instead of fork
625
+ multiprocessing .freeze_support ()
725
626
setup (
726
- name = DISTNAME ,
727
- maintainer = AUTHOR ,
728
627
version = versioneer .get_version (),
729
- packages = find_packages (include = ["pandas" , "pandas.*" ]),
730
- package_data = {"" : ["templates/*" , "_libs/**/*.dll" ]},
731
628
ext_modules = maybe_cythonize (extensions , compiler_directives = directives ),
732
- maintainer_email = EMAIL ,
733
- description = DESCRIPTION ,
734
- license = LICENSE ,
735
629
cmdclass = cmdclass ,
736
- url = URL ,
737
- download_url = DOWNLOAD_URL ,
738
- project_urls = PROJECT_URLS ,
739
- long_description = LONG_DESCRIPTION ,
740
- classifiers = CLASSIFIERS ,
741
- platforms = "any" ,
742
- python_requires = ">=3.7.1" ,
743
- extras_require = {
744
- "test" : [
745
- # sync with setup.cfg minversion & install.rst
746
- "pytest>=5.0.1" ,
747
- "pytest-xdist" ,
748
- "hypothesis>=3.58" ,
749
- ]
750
- },
751
- entry_points = {
752
- "pandas_plotting_backends" : ["matplotlib = pandas:plotting._matplotlib" ]
753
- },
754
- ** setuptools_kwargs ,
755
630
)
756
-
757
-
758
- if __name__ == "__main__" :
759
- # Freeze to support parallel compilation when using spawn instead of fork
760
- multiprocessing .freeze_support ()
761
- setup_package ()
0 commit comments