pandas-dev · simonjayhawkins · Feb 16, 2021 · Dec 31, 2020 · Dec 31, 2020 · Dec 31, 2020
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,9 +1,4 @@
-include MANIFEST.in
-include LICENSE
 include RELEASE.md
-include README.md
-include setup.py
-include pyproject.toml
 
 graft doc
 prune doc/build
@@ -16,10 +11,12 @@ global-exclude *.bz2
 global-exclude *.csv
 global-exclude *.dta
 global-exclude *.feather
+global-exclude *.tar
 global-exclude *.gz
 global-exclude *.h5
 global-exclude *.html
 global-exclude *.json
+global-exclude *.jsonl
 global-exclude *.pickle
 global-exclude *.png
 global-exclude *.pyc
@@ -40,6 +37,11 @@ global-exclude .DS_Store
 global-exclude .git*
 global-exclude \#*
 
+# GH 39321
+# csv_dir_path fixture checks the existence of the directory
+# exclude the whole directory to avoid running related tests in sdist
+prune pandas/tests/io/parser/data
+
 include versioneer.py
 include pandas/_version.py
 include pandas/io/formats/templates/*.tpl
diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml
@@ -19,7 +19,7 @@ requirements:
     - pip
     - cython
     - numpy
-    - setuptools >=3.3
+    - setuptools >=38.6.0
     - python-dateutil >=2.7.3
     - pytz
   run:

diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
@@ -219,7 +219,7 @@ Dependencies
 ================================================================ ==========================
 Package                                                          Minimum supported version
 ================================================================ ==========================
-`setuptools <https://setuptools.readthedocs.io/en/latest/>`__    24.2.0
+`setuptools <https://setuptools.readthedocs.io/en/latest/>`__    38.6.0
 `NumPy <https://numpy.org>`__                                    1.16.5
 `python-dateutil <https://dateutil.readthedocs.io/en/stable/>`__ 2.7.3
 `pytz <https://pypi.org/project/pytz/>`__                        2017.3

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -164,6 +164,8 @@ If installed, we now require:
 +-----------------+-----------------+----------+---------+
 | mypy (dev)      | 0.800           |          |    X    |
 +-----------------+-----------------+----------+---------+
+| setuptools      | 38.6.0          |          |    X    |
++-----------------+-----------------+----------+---------+
 
 For `optional libraries <https://pandas.pydata.org/docs/getting_started/install.html>`_ the general recommendation is to use the latest version.
 The following table lists the lowest version per library that is currently being tested throughout the development of pandas.

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,16 +1,17 @@
 [build-system]
 # Minimum requirements for the build system to execute.
-# See https://github.com/scipy/scipy/pull/10431 for the AIX issue.
+# See https://github.com/scipy/scipy/pull/12940 for the AIX issue.
 requires = [
-    "setuptools",
+    "setuptools>=38.6.0",
     "wheel",
     "Cython>=0.29.21,<3",  # Note: sync with setup.py
-    "numpy==1.16.5; python_version=='3.7' and platform_system!='AIX'",
-    "numpy==1.17.3; python_version=='3.8' and platform_system!='AIX'",
-    "numpy==1.16.5; python_version=='3.7' and platform_system=='AIX'",
-    "numpy==1.17.3; python_version=='3.8' and platform_system=='AIX'",
+    "numpy==1.16.5; python_version=='3.7'",
+    "numpy==1.17.3; python_version=='3.8'",
     "numpy; python_version>='3.9'",
 ]
+# uncomment to enable pep517 after versioneer problem is fixed.
+# https://github.com/python-versioneer/python-versioneer/issues/193
+# build-backend = "setuptools.build_meta"
 
 [tool.black]
 target-version = ['py37', 'py38']

diff --git a/setup.cfg b/setup.cfg
@@ -1,11 +1,65 @@
+[metadata]
+name = pandas
+description = Powerful data structures for data analysis, time series, and statistics
+long_description = file: README.md
+long_description_content_type = text/markdown
+url = https://pandas.pydata.org
+author = The Pandas Development Team
+author_email = [email protected]
+license = BSD-3-Clause
+license_file = LICENSE
+platforms = any
+classifiers =
+    Development Status :: 5 - Production/Stable
+    Environment :: Console
+    Intended Audience :: Science/Research
+    License :: OSI Approved :: BSD License
+    Operating System :: OS Independen
+    Programming Language :: Cython
+    Programming Language :: Python
+    Programming Language :: Python :: 3
+    Programming Language :: Python :: 3 :: Only
+    Programming Language :: Python :: 3.7
+    Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Topic :: Scientific/Engineering
+project_urls =
+    Bug Tracker = https://github.com/pandas-dev/pandas/issues
+    Documentation = https://pandas.pydata.org/pandas-docs/stable
+    Source Code = https://github.com/pandas-dev/pandas
+
+[options]
+packages = find:
+install_requires =
+    numpy>=1.16.5
+    python-dateutil>=2.7.3
+    pytz>=2017.3
+python_requires = >=3.7.1
+include_package_data = True
+zip_safe = False
+
+[options.entry_points]
+pandas_plotting_backends =
+    matplotlib = pandas:plotting._matplotlib
+
+[options.extras_require]
+test =
+    hypothesis>=3.58
+    pytest>=5.0.1
+    pytest-xdist
+
+[options.package_data]
+* = templates/*, _libs/**/*.dll
 
 [build_ext]
-inplace = 1
+inplace = True
+
+[options.packages.find]
+include = pandas, pandas.*
 
 # See the docstring in versioneer.py for instructions. Note that you must
 # re-run 'versioneer.py setup' after changing this section, and commit the
 # resulting files.
-
 [versioneer]
 VCS = git
 style = pep440
@@ -38,16 +92,16 @@ bootstrap =
     import pandas as pd
     np  # avoiding error when importing again numpy or pandas
     pd  # (in some cases we want to do it to show users)
-ignore = E203,  # space before : (needed for how black formats slicing)
-         E402,  # module level import not at top of file
-         W503,  # line break before binary operator
-         # Classes/functions in different blocks can generate those errors
-         E302,  # expected 2 blank lines, found 0
-         E305,  # expected 2 blank lines after class or function definition, found 0
-         # We use semicolon at the end to avoid displaying plot objects
-         E703,  # statement ends with a semicolon
-         E711,  # comparison to none should be 'if cond is none:'
-
+ignore =
+    E203,  # space before : (needed for how black formats slicing)
+    E402,  # module level import not at top of file
+    W503,  # line break before binary operator
+    # Classes/functions in different blocks can generate those errors
+    E302,  # expected 2 blank lines, found 0
+    E305,  # expected 2 blank lines after class or function definition, found 0
+    # We use semicolon at the end to avoid displaying plot objects
+    E703,  # statement ends with a semicolon
+    E711,  # comparison to none should be 'if cond is none:'
 exclude =
     doc/source/development/contributing_docstring.rst,
     # work around issue of undefined variable warnings
@@ -64,18 +118,18 @@ xfail_strict = True
 filterwarnings =
     error:Sparse:FutureWarning
     error:The SparseArray:FutureWarning
-junit_family=xunit2
+junit_family = xunit2
 
 [codespell]
-ignore-words-list=ba,blocs,coo,hist,nd,ser
-ignore-regex=https://(\w+\.)+
+ignore-words-list = ba,blocs,coo,hist,nd,ser
+ignore-regex = https://(\w+\.)+
 
 [coverage:run]
 branch = False
 omit =
-     */tests/*
-     pandas/_typing.py
-     pandas/_version.py
+    */tests/*
+    pandas/_typing.py
+    pandas/_version.py
 plugins = Cython.Coverage
 
 [coverage:report]
@@ -130,10 +184,10 @@ warn_unused_ignores = True
 show_error_codes = True
 
 [mypy-pandas.tests.*]
-check_untyped_defs=False
+check_untyped_defs = False
 
 [mypy-pandas._version]
-check_untyped_defs=False
+check_untyped_defs = False
 
 [mypy-pandas.io.clipboard]
-check_untyped_defs=False
+check_untyped_defs = False
diff --git a/setup.py b/setup.py
@@ -18,7 +18,7 @@
 import sys
 
 import numpy
-from setuptools import Command, Extension, find_packages, setup
+from setuptools import Command, Extension, setup
 from setuptools.command.build_ext import build_ext as _build_ext
 
 import versioneer
@@ -34,7 +34,6 @@ def is_platform_mac():
     return sys.platform == "darwin"
 
 
-min_numpy_ver = "1.16.5"
 min_cython_ver = "0.29.21"  # note: sync with pyproject.toml
 
 try:
@@ -99,96 +98,6 @@ def build_extensions(self):
         super().build_extensions()
 
 
-DESCRIPTION = "Powerful data structures for data analysis, time series, and statistics"
-LONG_DESCRIPTION = """
-**pandas** is a Python package that provides fast, flexible, and expressive data
-structures designed to make working with structured (tabular, multidimensional,
-potentially heterogeneous) and time series data both easy and intuitive. It
-aims to be the fundamental high-level building block for doing practical,
-**real world** data analysis in Python. Additionally, it has the broader goal
-of becoming **the most powerful and flexible open source data analysis /
-manipulation tool available in any language**. It is already well on its way
-toward this goal.
-
-pandas is well suited for many different kinds of data:
-
-  - Tabular data with heterogeneously-typed columns, as in an SQL table or
-    Excel spreadsheet
-  - Ordered and unordered (not necessarily fixed-frequency) time series data.
-  - Arbitrary matrix data (homogeneously typed or heterogeneous) with row and
-    column labels
-  - Any other form of observational / statistical data sets. The data actually
-    need not be labeled at all to be placed into a pandas data structure
-
-The two primary data structures of pandas, Series (1-dimensional) and DataFrame
-(2-dimensional), handle the vast majority of typical use cases in finance,
-statistics, social science, and many areas of engineering. For R users,
-DataFrame provides everything that R's ``data.frame`` provides and much
-more. pandas is built on top of `NumPy <https://www.numpy.org>`__ and is
-intended to integrate well within a scientific computing environment with many
-other 3rd party libraries.
-
-Here are just a few of the things that pandas does well:
-
-  - Easy handling of **missing data** (represented as NaN) in floating point as
-    well as non-floating point data
-  - Size mutability: columns can be **inserted and deleted** from DataFrame and
-    higher dimensional objects
-  - Automatic and explicit **data alignment**: objects can be explicitly
-    aligned to a set of labels, or the user can simply ignore the labels and
-    let `Series`, `DataFrame`, etc. automatically align the data for you in
-    computations
-  - Powerful, flexible **group by** functionality to perform
-    split-apply-combine operations on data sets, for both aggregating and
-    transforming data
-  - Make it **easy to convert** ragged, differently-indexed data in other
-    Python and NumPy data structures into DataFrame objects
-  - Intelligent label-based **slicing**, **fancy indexing**, and **subsetting**
-    of large data sets
-  - Intuitive **merging** and **joining** data sets
-  - Flexible **reshaping** and pivoting of data sets
-  - **Hierarchical** labeling of axes (possible to have multiple labels per
-    tick)
-  - Robust IO tools for loading data from **flat files** (CSV and delimited),
-    Excel files, databases, and saving / loading data from the ultrafast **HDF5
-    format**
-  - **Time series**-specific functionality: date range generation and frequency
-    conversion, moving window statistics, date shifting and lagging.
-
-Many of these principles are here to address the shortcomings frequently
-experienced using other languages / scientific research environments. For data
-scientists, working with data is typically divided into multiple stages:
-munging and cleaning data, analyzing / modeling it, then organizing the results
-of the analysis into a form suitable for plotting or tabular display. pandas is
-the ideal tool for all of these tasks.
-"""
-
-DISTNAME = "pandas"
-LICENSE = "BSD"
-AUTHOR = "The PyData Development Team"
-EMAIL = "[email protected]"
-URL = "https://pandas.pydata.org"
-DOWNLOAD_URL = ""
-PROJECT_URLS = {
-    "Bug Tracker": "https://github.com/pandas-dev/pandas/issues",
-    "Documentation": "https://pandas.pydata.org/pandas-docs/stable/",
-    "Source Code": "https://github.com/pandas-dev/pandas",
-}
-CLASSIFIERS = [
-    "Development Status :: 5 - Production/Stable",
-    "Environment :: Console",
-    "Operating System :: OS Independent",
-    "Intended Audience :: Science/Research",
-    "Programming Language :: Python",
-    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.7",
-    "Programming Language :: Python :: 3.8",
-    "Programming Language :: Python :: 3.9",
-    "Programming Language :: Cython",
-    "Topic :: Scientific/Engineering",
-]
-
-
 class CleanCommand(Command):
     """Custom distutils command to clean the .so and .pyc files."""
 
@@ -711,51 +620,11 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
 # ----------------------------------------------------------------------
 
 
-def setup_package():
-    setuptools_kwargs = {
-        "install_requires": [
-            "python-dateutil >= 2.7.3",
-            "pytz >= 2017.3",
-            f"numpy >= {min_numpy_ver}",
-        ],
-        "setup_requires": [f"numpy >= {min_numpy_ver}"],
-        "zip_safe": False,
-    }
-
+if __name__ == "__main__":
+    # Freeze to support parallel compilation when using spawn instead of fork
+    multiprocessing.freeze_support()
     setup(
-        name=DISTNAME,
-        maintainer=AUTHOR,
         version=versioneer.get_version(),
-        packages=find_packages(include=["pandas", "pandas.*"]),
-        package_data={"": ["templates/*", "_libs/**/*.dll"]},
         ext_modules=maybe_cythonize(extensions, compiler_directives=directives),
-        maintainer_email=EMAIL,
-        description=DESCRIPTION,
-        license=LICENSE,
         cmdclass=cmdclass,
-        url=URL,
-        download_url=DOWNLOAD_URL,
-        project_urls=PROJECT_URLS,
-        long_description=LONG_DESCRIPTION,
-        classifiers=CLASSIFIERS,
-        platforms="any",
-        python_requires=">=3.7.1",
-        extras_require={
-            "test": [
-                # sync with setup.cfg minversion & install.rst
-                "pytest>=5.0.1",
-                "pytest-xdist",
-                "hypothesis>=3.58",
-            ]
-        },
-        entry_points={
-            "pandas_plotting_backends": ["matplotlib = pandas:plotting._matplotlib"]
-        },
-        **setuptools_kwargs,
     )
-
-
-if __name__ == "__main__":
-    # Freeze to support parallel compilation when using spawn instead of fork
-    multiprocessing.freeze_support()
-    setup_package()
-Original file line number
+Diff line change
@@ Expand Up / @@ -19,7 +19,7 @@ requirements: @@
         - pip
         - cython
         - numpy
-        - setuptools >=3.3
+        - setuptools >=38.6.0
         - python-dateutil >=2.7.3
         - pytz
       run:
@@ Expand Down @@