diff --git a/.travis.yml b/.travis.yml index 5198c5a8..150d62dd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,7 +10,8 @@ notifications: env: global: - - NUMPY_VERSION=1.9 + - NUMPY_VERSION=1.12 + - DOCTEST=TRUE addons: apt: @@ -32,12 +33,6 @@ matrix: # - brew update # - brew tap homebrew/versions && brew reinstall gcc49 --without-multilib # - wget http://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh -O miniconda.sh - # - bash miniconda.sh -b -p $HOME/miniconda - # - export PATH="$HOME/miniconda/bin:$PATH" - # - conda create -q --yes -n test python=$PYTHON_VERSION numpy=$NUMPY_VERSION sphinx - # - source activate test - # - conda install -q --yes -c asmeurer gsl - # - os: osx # compiler: clang @@ -47,62 +42,35 @@ matrix: # - brew outdated xctool || brew upgrade xctool # - brew tap homebrew/versions && brew install clang-omp # - wget http://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh -O miniconda.sh - # - bash miniconda.sh -b -p $HOME/miniconda - # - export PATH="$HOME/miniconda/bin:$PATH" - # - conda create -q --yes -n test python=$PYTHON_VERSION numpy=$NUMPY_VERSION sphinx - # - source activate test - # - conda install -q --yes -c asmeurer gsl - os: osx - osx_image: xcode8 + osx_image: xcode9 compiler: clang - env: COMPILER=clang FAMILY=clang V='Apple LLVM 8.0.0' PYTHON_VERSION=3.5 + env: COMPILER=clang FAMILY=clang V='Apple LLVM 7.0.0' PYTHON_VERSION=3.6 DOCTEST=FALSE before_install: - wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh - - bash miniconda.sh -b -p $HOME/miniconda - - export PATH="$HOME/miniconda/bin:$PATH" - - conda create -q --yes -n test python=$PYTHON_VERSION numpy=$NUMPY_VERSION sphinx - - source activate test - - conda install -q --yes -c asmeurer gsl + - os: osx - osx_image: xcode7.3 + osx_image: xcode8 compiler: clang - env: COMPILER=clang FAMILY=clang V='Apple LLVM 7.0.0' PYTHON_VERSION=3.5 + env: COMPILER=clang FAMILY=clang V='Apple LLVM 7.0.0' PYTHON_VERSION=3.5 DOCTEST=FALSE before_install: - wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh - - bash miniconda.sh -b -p $HOME/miniconda - - export PATH="$HOME/miniconda/bin:$PATH" - - conda create -q --yes -n test python=$PYTHON_VERSION numpy=$NUMPY_VERSION sphinx - - source activate test - - conda install -q --yes -c asmeurer gsl - - os: osx - osx_image: xcode9 + osx_image: xcode7.3 compiler: clang - env: COMPILER=clang FAMILY=clang V='Apple LLVM 9.0.0' PYTHON_VERSION=3.5 + env: COMPILER=clang FAMILY=clang V='Apple LLVM 7.0.0' PYTHON_VERSION=2.7 DOCTEST=FALSE before_install: - - wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh - - bash miniconda.sh -b -p $HOME/miniconda - - export PATH="$HOME/miniconda/bin:$PATH" - - conda create -q --yes -n test python=$PYTHON_VERSION numpy=$NUMPY_VERSION sphinx - - source activate test - - conda install -q --yes -c asmeurer gsl - + - wget http://repo.continuum.io/miniconda/Miniconda2-latest-MacOSX-x86_64.sh -O miniconda.sh - os: osx osx_image: xcode6.4 compiler: clang - env: COMPILER=clang FAMILY=clang V='Apple LLVM 7.0.0' PYTHON_VERSION=3.5 + env: COMPILER=clang FAMILY=clang V='Apple LLVM 7.0.0' PYTHON_VERSION=2.6 NUMPY_VERSION=1.7 DOCTEST=FALSE before_install: - - wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh - - bash miniconda.sh -b -p $HOME/miniconda - - export PATH="$HOME/miniconda/bin:$PATH" - - conda create -q --yes -n test python=$PYTHON_VERSION numpy=$NUMPY_VERSION sphinx - - source activate test - - conda install -q --yes -c asmeurer gsl - + - wget http://repo.continuum.io/miniconda/Miniconda2-latest-MacOSX-x86_64.sh -O miniconda.sh # - os: osx # compiler: gcc @@ -110,11 +78,6 @@ matrix: # before_install: # - brew update && brew tap homebrew/versions && brew install gcc48 --without-multilib # - wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh - # - bash miniconda.sh -b -p $HOME/miniconda - # - export PATH="$HOME/miniconda/bin:$PATH" - # - conda create -q --yes -n test python=$PYTHON_VERSION numpy=$NUMPY_VERSION sphinx - # - source activate test - # - conda install -q --yes -c asmeurer gsl # - os: linux # dist: trusty @@ -127,10 +90,6 @@ matrix: # env: COMPILER=clang-3.6 V=3.6 PYTHON_VERSION=2.7 # before_install: # - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh - # - bash miniconda.sh -b -p $HOME/miniconda - # - export PATH="$HOME/miniconda/bin:$PATH" - # - conda create -q --yes -n test python=$PYTHON_VERSION numpy=$NUMPY_VERSION sphinx - # - source activate test # - os: linux # dist: trusty @@ -143,22 +102,14 @@ matrix: # env: COMPILER=clang-3.6 V=3.6 PYTHON_VERSION=3.5 # before_install: # - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh - # - bash miniconda.sh -b -p $HOME/miniconda - # - export PATH="$HOME/miniconda/bin:$PATH" - # - conda create -q --yes -n test python=$PYTHON_VERSION numpy=$NUMPY_VERSION sphinx - # - source activate test - os: linux dist: trusty sudo: required compiler: gcc - env: COMPILER=gcc PYTHON_VERSION=2.6 NUMPY_VERSION=1.7 + env: COMPILER=gcc PYTHON_VERSION=2.6 NUMPY_VERSION=1.7 DOCTEST=FALSE before_install: - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh - - bash miniconda.sh -b -p $HOME/miniconda - - export PATH="$HOME/miniconda/bin:$PATH" - - conda create -q --yes -n test python=$PYTHON_VERSION numpy=$NUMPY_VERSION sphinx - - source activate test - os: linux dist: trusty @@ -167,22 +118,14 @@ matrix: env: COMPILER=gcc PYTHON_VERSION=2.7 before_install: - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh - - bash miniconda.sh -b -p $HOME/miniconda - - export PATH="$HOME/miniconda/bin:$PATH" - - conda create -q --yes -n test python=$PYTHON_VERSION numpy=$NUMPY_VERSION sphinx - - source activate test - os: linux dist: trusty sudo: required compiler: gcc - env: COMPILER=gcc PYTHON_VERSION=3.3 + env: COMPILER=gcc PYTHON_VERSION=3.4 before_install: - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh - - bash miniconda.sh -b -p $HOME/miniconda - - export PATH="$HOME/miniconda/bin:$PATH" - - conda create -q --yes -n test python=$PYTHON_VERSION numpy=$NUMPY_VERSION sphinx - - source activate test - os: linux dist: trusty @@ -191,22 +134,28 @@ matrix: env: COMPILER=gcc PYTHON_VERSION=3.5 before_install: - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh - - bash miniconda.sh -b -p $HOME/miniconda - - export PATH="$HOME/miniconda/bin:$PATH" - - conda create -q --yes -n test python=$PYTHON_VERSION numpy=$NUMPY_VERSION sphinx - - source activate test - + + - os: linux + dist: trusty + sudo: required + compiler: gcc + env: COMPILER=gcc PYTHON_VERSION=3.6 + before_install: + - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh install: + - bash miniconda.sh -b -p $HOME/miniconda + - export PATH="$HOME/miniconda/bin:$PATH" + - conda create -q --yes -n test python=$PYTHON_VERSION numpy=$NUMPY_VERSION sphinx + - source activate test + - conda install -q --yes -c asmeurer gsl - make -r CC=$COMPILER - make install CC=$COMPILER - python setup.py install script: - make tests CC=$COMPILER - -after_success: - - cd docs && make html && cd ../ - - if [[ "$TRAVIS_OS_NAME" != "osx" ]]; then cd docs && make doctest && cd ../; fi + - make -C docs html + - if [[ "${DOCTEST}" == "TRUE" ]]; then make -C docs doctest ; fi diff --git a/Corrfunc/__init__.py b/Corrfunc/__init__.py index 237e0521..91baba19 100644 --- a/Corrfunc/__init__.py +++ b/Corrfunc/__init__.py @@ -10,7 +10,7 @@ unicode_literals) import os -__version__ = "2.0.1" +__version__ = "2.1.0" __author__ = "Manodeep Sinha " diff --git a/Corrfunc/call_correlation_functions.py b/Corrfunc/call_correlation_functions.py index 5e92838d..c041f613 100644 --- a/Corrfunc/call_correlation_functions.py +++ b/Corrfunc/call_correlation_functions.py @@ -22,7 +22,8 @@ countpairs_rp_pi as DDrppi_extn,\ countpairs_wp as wp_extn,\ countpairs_xi as xi_extn,\ - countspheres_vpf as vpf_extn + countspheres_vpf as vpf_extn,\ + countpairs_s_mu as DDsmu_extn def main(): @@ -74,6 +75,26 @@ def main(): .format(items[0], items[1], items[2], items[3], items[4], items[5])) print("------------------------------------------------------------------------") + mu_max = 0.5 + nmu_bins = 10 + + print("\nRunning 2-D correlation function DD(s,mu)") + results_DDsmu, _ = DDsmu_extn(autocorr, nthreads, binfile, + mu_max, nmu_bins, + x, y, z, + weights1=np.ones_like(x), weight_type='pair_product', + verbose=True, periodic=periodic, + boxsize=boxsize, output_savg=True) + print("\n# ****** DD(s,mu): first {0} bins ******* " + .format(numbins_to_print)) + print("# smin smax savg mu_max npairs weightavg") + print("########################################################################") + for ibin in range(numbins_to_print): + items = results_DDsmu[ibin] + print("{0:12.4f} {1:12.4f} {2:10.4f} {3:10.1f} {4:10d} {5:10.4f}" + .format(items[0], items[1], items[2], items[3], items[4], items[5])) + print("------------------------------------------------------------------------") + print("\nRunning 2-D projected correlation function wp(rp)") results_wp, _, _ = wp_extn(boxsize, pimax, nthreads, binfile, x, y, z, diff --git a/Corrfunc/call_correlation_functions_mocks.py b/Corrfunc/call_correlation_functions_mocks.py index 5dae9112..e73bc565 100644 --- a/Corrfunc/call_correlation_functions_mocks.py +++ b/Corrfunc/call_correlation_functions_mocks.py @@ -19,6 +19,7 @@ def main(): from Corrfunc.io import read_catalog from Corrfunc._countpairs_mocks import\ countpairs_rp_pi_mocks as rp_pi_mocks_extn,\ + countpairs_s_mu_mocks as s_mu_mocks_extn,\ countpairs_theta_mocks as theta_mocks_extn,\ countspheres_vpf_mocks as vpf_mocks_extn @@ -52,7 +53,7 @@ def main(): output_rpavg=True, verbose=True) print("\n# ****** DD(rp,pi): first {0} bins ******* " .format(numbins_to_print)) - print("# rmin rmax rpavg pi_upper npairs weightavg") + print("# rmin rmax rpavg pi_upper npairs weightavg") print("########################################################################") for ibin in range(numbins_to_print): items = results_DDrppi[ibin] @@ -61,9 +62,29 @@ def main(): print("------------------------------------------------------------------------") + nmu_bins = 10 + mu_max = 1.0 + + print("\nRunning 2-D correlation function xi(s,mu)") + results_DDsmu, _ = s_mu_mocks_extn(autocorr, cosmology, nthreads, + mu_max, nmu_bins, binfile, + ra, dec, cz, weights1=np.ones_like(ra), + output_savg=True, verbose=True, + weight_type='pair_product') + print("\n# ****** DD(s,mu): first {0} bins ******* " + .format(numbins_to_print)) + print("# smin smax savg mu_upper npairs weight_avg") + print("###########################################################################") + for ibin in range(numbins_to_print): + items = results_DDsmu[ibin] + print("{0:12.4f} {1:12.4f} {2:10.4f} {3:10.1f} {4:10d} {5:12.4f}" + .format(items[0], items[1], items[2], items[3], items[4], items[5])) + + print("--------------------------------------------------------------------------") + binfile = pjoin(dirname(abspath(__file__)), "../mocks/tests/", "angular_bins") - print("\nRunning angular correlation function w(theta)") + print("\nRunning angular correlation function DD(theta)") results_wtheta, _ = theta_mocks_extn(autocorr, nthreads, binfile, ra, dec, RA2=ra, DEC2=dec, weights1=np.ones_like(ra), @@ -71,10 +92,10 @@ def main(): weight_type='pair_product', output_thetaavg=True, fast_acos=True, verbose=1) - print("\n# ****** wtheta: first {0} bins ******* " + print("\n# ****** DD(theta): first {0} bins ******* " .format(numbins_to_print)) - print("# thetamin thetamax thetaavg npairs weightavg") - print("#######################################################################") + print("# thetamin thetamax thetaavg npairs weightavg") + print("############################################################################") for ibin in range(numbins_to_print): items = results_wtheta[ibin] print("{0:14.4f} {1:14.4f} {2:14.4f} {3:14d} {4:14.4f}" diff --git a/Corrfunc/io.py b/Corrfunc/io.py index 7cd4bce8..69eab92a 100644 --- a/Corrfunc/io.py +++ b/Corrfunc/io.py @@ -10,6 +10,10 @@ from os.path import dirname, abspath, splitext, exists as file_exists,\ join as pjoin import numpy as np +try: + import pandas as pd +except ImportError: + pd = None __all__ = ('read_fastfood_catalog', 'read_ascii_catalog', 'read_catalog') @@ -220,11 +224,6 @@ def read_ascii_catalog(filename, return_dtype=None): # check if pandas is available - much faster to read in the data # using pandas - try: - import pandas as pd - except ImportError: - pd = None - if pd is not None: df = pd.read_csv(filename, header=None, engine="c", diff --git a/Corrfunc/mocks/DDrppi_mocks.py b/Corrfunc/mocks/DDrppi_mocks.py index ec548f4c..e98a8be6 100644 --- a/Corrfunc/mocks/DDrppi_mocks.py +++ b/Corrfunc/mocks/DDrppi_mocks.py @@ -50,11 +50,11 @@ def DDrppi_mocks(autocorr, cosmology, nthreads, pimax, binfile, Parameters ----------- - autocorr: boolean, required + autocorr : boolean, required Boolean flag for auto/cross-correlation. If autocorr is set to 1, then the second set of particle positions are not required. - cosmology: integer, required + cosmology : integer, required Integer choice for setting cosmology. Valid values are 1->LasDamas cosmology and 2->Planck cosmology. If you need arbitrary cosmology, easiest way is to convert the ``CZ`` values into co-moving distance, @@ -69,65 +69,62 @@ def DDrppi_mocks(autocorr, cosmology, nthreads, pimax, binfile, ``init_cosmology`` in ``ROOT/utils/cosmology_params.c`` and re-install the entire package. - nthreads: integer + nthreads : integer The number of OpenMP threads to use. Has no effect if OpenMP was not enabled during library compilation. - pimax: double + pimax : double A double-precision value for the maximum separation along the Z-dimension. Distances along the :math:`\\pi` direction are binned with unit depth. For instance, if ``pimax=40``, then 40 bins will be created - along the ``pi`` direction. + along the ``pi`` direction. Only pairs with ``0 <= dz < pimax`` + are counted (no equality). + binfile: string or an list/array of floats + For string input: filename specifying the ``rp`` bins for + ``DDrppi_mocks``. The file should contain white-space separated values + of (rpmin, rpmax) for each ``rp`` wanted. The bins need to be + contiguous and sorted in increasing order (smallest bins come first). + + For array-like input: A sequence of ``rp`` values that provides the + bin-edges. For example, + ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid + input specifying **14** (logarithmic) bins between 0.1 and 10.0. This + array does not need to be sorted. + + RA1 : array-like, real (float/double) + The array of Right Ascensions for the first set of points. RA's + are expected to be in [0.0, 360.0], but the code will try to fix cases + where the RA's are in [-180, 180.0]. For peace of mind, always supply + RA's in [0.0, 360.0]. - .. note:: Only pairs with ``0 <= dz < pimax`` are counted (no equality). + Calculations are done in the precision of the supplied arrays. + DEC1 : array-like, real (float/double) + Array of Declinations for the first set of points. DEC's are expected + to be in the [-90.0, 90.0], but the code will try to fix cases where + the DEC's are in [0.0, 180.0]. Again, for peace of mind, always supply + DEC's in [-90.0, 90.0]. - binfile: string or an list/array of floats - For string input: filename specifying the ``rp`` bins for - ``DDrppi_mocks``. The file should contain white-space separated values - of (rpmin, rpmax) for each ``rp`` wanted. The bins need to be - contiguous and sorted in increasing order (smallest bins come first). - - For array-like input: A sequence of ``rp`` values that provides the - bin-edges. For example, - ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid - input specifying **14** (logarithmic) bins between 0.1 and 10.0. This - array does not need to be sorted. - - RA1: array-like, real (float/double) - The array of Right Ascensions for the first set of points. RA's - are expected to be in [0.0, 360.0], but the code will try to fix cases - where the RA's are in [-180, 180.0]. For peace of mind, always supply - RA's in [0.0, 360.0]. - - Calculations are done in the precision of the supplied arrays. - - DEC1: array-like, real (float/double) - Array of Declinations for the first set of points. DEC's are expected - to be in the [-90.0, 90.0], but the code will try to fix cases where - the DEC's are in [0.0, 180.0]. Again, for peace of mind, always supply - DEC's in [-90.0, 90.0]. - - Must be of same precision type as RA1. - - CZ1: array-like, real (float/double) - Array of (Speed Of Light * Redshift) values for the first set of - points. Code will try to detect cases where ``redshifts`` have been - passed and multiply the entire array with the ``speed of light``. - - If is_comoving_dist is set, then ``CZ1`` is interpreted as the - co-moving distance, rather than `cz`. + Must be of same precision type as RA1. + + CZ1 : array-like, real (float/double) + Array of (Speed Of Light * Redshift) values for the first set of + points. Code will try to detect cases where ``redshifts`` have been + passed and multiply the entire array with the ``speed of light``. + + If is_comoving_dist is set, then ``CZ1`` is interpreted as the + co-moving distance, rather than `cz`. - weights1: array_like, real (float/double), optional - A scalar, or an array of weights of shape (n_weights, n_positions) or (n_positions,). - `weight_type` specifies how these weights are used; results are returned - in the `weightavg` field. If only one of weights1 and weights2 is - specified, the other will be set to uniform weights. + weights1 : array_like, real (float/double), optional + A scalar, or an array of weights of shape (n_weights, n_positions) or (n_positions,). + `weight_type` specifies how these weights are used; results are returned + in the `weightavg` field. If only one of weights1 and weights2 is + specified, the other will be set to uniform weights. - RA2: array-like, real (float/double) + RA2 : array-like, real (float/double) The array of Right Ascensions for the second set of points. RA's are expected to be in [0.0, 360.0], but the code will try to fix cases where the RA's are in [-180, 180.0]. For peace of mind, always supply @@ -135,7 +132,7 @@ def DDrppi_mocks(autocorr, cosmology, nthreads, pimax, binfile, Must be of same precision type as RA1/DEC1/CZ1. - DEC2: array-like, real (float/double) + DEC2 : array-like, real (float/double) Array of Declinations for the second set of points. DEC's are expected to be in the [-90.0, 90.0], but the code will try to fix cases where the DEC's are in [0.0, 180.0]. Again, for peace of mind, always supply @@ -143,7 +140,7 @@ def DDrppi_mocks(autocorr, cosmology, nthreads, pimax, binfile, Must be of same precision type as RA1/DEC1/CZ1. - CZ2: array-like, real (float/double) + CZ2 : array-like, real (float/double) Array of (Speed Of Light * Redshift) values for the second set of points. Code will try to detect cases where ``redshifts`` have been passed and multiply the entire array with the ``speed of light``. @@ -153,47 +150,45 @@ def DDrppi_mocks(autocorr, cosmology, nthreads, pimax, binfile, Must be of same precision type as RA1/DEC1/CZ1. - weights2: array-like, real (float/double), optional + weights2 : array-like, real (float/double), optional Same as weights1, but for the second set of positions - is_comoving_dist: boolean (default false) + is_comoving_dist : boolean (default false) Boolean flag to indicate that ``cz`` values have already been converted into co-moving distances. This flag allows arbitrary cosmologies to be used in ``Corrfunc``. - verbose: boolean (default false) + verbose : boolean (default false) Boolean flag to control output of informational messages - output_rpavg: boolean (default false) + output_rpavg : boolean (default false) Boolean flag to output the average ``rp`` for each bin. Code will - run slower if you set this flag. Also, - - - .. note:: If you are calculating in single-precision, ``rpavg`` will + run slower if you set this flag. + + If you are calculating in single-precision, ``rpavg`` will suffer suffer from numerical loss of precision and can not be trusted. If you need accurate ``rpavg`` values, then pass in double precision arrays for the particle positions. - - - fast_divide: boolean (default false) + + fast_divide : boolean (default false) Boolean flag to replace the division in ``AVX`` implementation with an - approximate reciprocal, followed by a Newton-Raphson step. Improves + approximate reciprocal, followed by two Newton-Raphson steps. Improves runtime by ~15-20%. Loss of precision is at the 5-6th decimal place. - (xyz)bin_refine_factor: integer, default is (2,2,1); typically within [1-3] - Controls the refinement on the cell sizes. Can have up to a 20% impact - on runtime. + (xyz)bin_refine_factor : integer, default is (2,2,1); typically within [1-3] + Controls the refinement on the cell sizes. Can have up to a 20% impact + on runtime. max_cells_per_dim: integer, default is 100, typical values in [50-300] - Controls the maximum number of cells per dimension. Total number of - cells can be up to (max_cells_per_dim)^3. Only increase if ``rpmax`` is - too small relative to the boxsize (and increasing helps the runtime). + Controls the maximum number of cells per dimension. Total number of + cells can be up to (max_cells_per_dim)^3. Only increase if ``rpmax`` is + too small relative to the boxsize (and increasing helps the runtime). - c_api_timer: boolean (default false) + c_api_timer : boolean (default false) Boolean flag to measure actual time spent in the C libraries. Here to allow for benchmarking and scaling studies. - isa: string (default ``fastest``) + isa : string (default ``fastest``) Controls the runtime dispatch for the instruction set to use. Possible options are: [``fastest``, ``avx``, ``sse42``, ``fallback``] @@ -208,25 +203,25 @@ def DDrppi_mocks(autocorr, cosmology, nthreads, pimax, binfile, benchmarking, then the string supplied here gets translated into an ``enum`` for the instruction set defined in ``utils/defs.h``. - weight_type: string, optional + weight_type : string, optional The type of weighting to apply. One of ["pair_product", None]. Default: None. Returns -------- - results: Numpy structured array + results : Numpy structured array - A numpy structured array containing [rpmin, rpmax, rpavg, pimax, npairs, weightavg] - for each radial bin specified in the ``binfile``. If ``output_ravg`` is - not set, then ``rpavg`` will be set to 0.0 for all bins; similarly for - ``weightavg``. ``npairs`` - contains the number of pairs in that bin and can be used to compute the - actual :math:`\\xi(r_p, \pi)` or :math:`wp(rp)` by combining with - (DR, RR) counts. + A numpy structured array containing [rpmin, rpmax, rpavg, pimax, npairs, weightavg] + for each radial bin specified in the ``binfile``. If ``output_ravg`` is + not set, then ``rpavg`` will be set to 0.0 for all bins; similarly for + ``weightavg``. ``npairs`` + contains the number of pairs in that bin and can be used to compute the + actual :math:`\\xi(r_p, \pi)` or :math:`wp(rp)` by combining with + (DR, RR) counts. - api_time: float, optional - Only returned if ``c_api_timer`` is set. ``api_time`` measures only the time - spent within the C library and ignores all python overhead. + api_time : float, optional + Only returned if ``c_api_timer`` is set. ``api_time`` measures only the time + spent within the C library and ignores all python overhead. Example -------- diff --git a/Corrfunc/mocks/DDsmu_mocks.py b/Corrfunc/mocks/DDsmu_mocks.py new file mode 100755 index 00000000..3d4a3ace --- /dev/null +++ b/Corrfunc/mocks/DDsmu_mocks.py @@ -0,0 +1,332 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Python wrapper around the C extension for the pair counter in +``mocks/DDsmu``. This python wrapper is :py:mod:`Corrfunc.mocks.DDsmu_mocks` +""" + +from __future__ import (division, print_function, absolute_import, + unicode_literals) + +__author__ = ('Manodeep Sinha', 'Nick Hand') +__all__ = ('DDsmu_mocks', ) + + +def DDsmu_mocks(autocorr, cosmology, nthreads, mu_max, nmu_bins, binfile, + RA1, DEC1, CZ1, weights1=None, + RA2=None, DEC2=None, CZ2=None, weights2=None, + is_comoving_dist=False, + verbose=False, output_savg=False, + fast_divide=False, xbin_refine_factor=2, + ybin_refine_factor=2, zbin_refine_factor=1, + max_cells_per_dim=100, + c_api_timer=False, isa='fastest', weight_type=None): + """ + Calculate the 2-D pair-counts corresponding to the projected correlation + function, :math:`\\xi(s, \mu)`. The pairs are counted in bins of + radial separation and cosine of angle to the line-of-sight (LOS). The + input positions are expected to be on-sky co-ordinates. This module is + suitable for calculating correlation functions for mock catalogs. + + If ``weights`` are provided, the resulting pair counts are weighted. The + weighting scheme depends on ``weight_type``. + + Returns a numpy structured array containing the pair counts for the + specified bins. + + + .. note:: This module only returns pair counts and not the actual + correlation function :math:`\\xi(s, \mu)`. See the + utilities :py:mod:`Corrfunc.utils.convert_3d_counts_to_cf` + for computing :math:`\\xi(s, \mu)` from the pair counts. + + .. versionadded:: 2.1.0 + + Parameters + ---------- + + autocorr: boolean, required + Boolean flag for auto/cross-correlation. If autocorr is set to 1, + then the second set of particle positions are not required. + + cosmology: integer, required + Integer choice for setting cosmology. Valid values are 1->LasDamas + cosmology and 2->Planck cosmology. If you need arbitrary cosmology, + easiest way is to convert the ``CZ`` values into co-moving distance, + based on your preferred cosmology. Set ``is_comoving_dist=True``, to + indicate that the co-moving distance conversion has already been done. + + Choices: + 1. LasDamas cosmology. :math:`\\Omega_m=0.25`, :math:`\\Omega_\Lambda=0.75` + 2. Planck cosmology. :math:`\\Omega_m=0.302`, :math:`\\Omega_\Lambda=0.698` + + To setup a new cosmology, add an entry to the function, + ``init_cosmology`` in ``ROOT/utils/cosmology_params.c`` and re-install + the entire package. + + nthreads: integer + The number of OpenMP threads to use. Has no effect if OpenMP was not + enabled during library compilation. + + mu_max: double. Must be in range [0.0, 1.0] + A double-precision value for the maximum cosine of the angular + separation from the line of sight (LOS). Here, ``mu`` is defined as + the angle between ``s`` and ``l``. If :math:`v_1` and :math:`v_2` + represent the vectors to each point constituting the pair, then + :math:`s := v_1 - v_2` and :math:`l := 1/2 (v_1 + v_2)`. + + Note: Only pairs with :math:`0 <= cos(\theta_{LOS}) < \mu_{max}` + are counted (no equality). + + nmu_bins: int + The number of linear ``mu`` bins, with the bins ranging from + from (0,``mu_max``) + + binfile: string or an list/array of floats + For string input: filename specifying the ``s`` bins for + ``DDsmu_mocks``. The file should contain white-space separated values + of (smin, smax) specifying each ``s`` bin wanted. The bins + need to be contiguous and sorted in increasing order (smallest bins + come first). + + For array-like input: A sequence of ``s`` values that provides the + bin-edges. For example, + ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid + input specifying **14** (logarithmic) bins between 0.1 and 10.0. This + array does not need to be sorted. + + RA1: array-like, real (float/double) + The array of Right Ascensions for the first set of points. RA's + are expected to be in [0.0, 360.0], but the code will try to fix cases + where the RA's are in [-180, 180.0]. For peace of mind, always supply + RA's in [0.0, 360.0]. + + Calculations are done in the precision of the supplied arrays. + + DEC1: array-like, real (float/double) + Array of Declinations for the first set of points. DEC's are expected + to be in the [-90.0, 90.0], but the code will try to fix cases where + the DEC's are in [0.0, 180.0]. Again, for peace of mind, always supply + DEC's in [-90.0, 90.0]. + + Must be of same precision type as RA1. + + CZ1: array-like, real (float/double) + Array of (Speed Of Light * Redshift) values for the first set of + points. Code will try to detect cases where ``redshifts`` have been + passed and multiply the entire array with the ``speed of light``. + + If is_comoving_dist is set, then ``CZ1`` is interpreted as the + co-moving distance, rather than `cz`. + + weights1: array_like, real (float/double), optional + A scalar, or an array of weights of shape (n_weights, n_positions) or (n_positions,). + `weight_type` specifies how these weights are used; results are returned + in the `weightavg` field. If only one of weights1 and weights2 is + specified, the other will be set to uniform weights. + + RA2: array-like, real (float/double) + The array of Right Ascensions for the second set of points. RA's + are expected to be in [0.0, 360.0], but the code will try to fix cases + where the RA's are in [-180, 180.0]. For peace of mind, always supply + RA's in [0.0, 360.0]. + + Must be of same precision type as RA1/DEC1/CZ1. + + DEC2: array-like, real (float/double) + Array of Declinations for the second set of points. DEC's are expected + to be in the [-90.0, 90.0], but the code will try to fix cases where + the DEC's are in [0.0, 180.0]. Again, for peace of mind, always supply + DEC's in [-90.0, 90.0]. + + Must be of same precision type as RA1/DEC1/CZ1. + + CZ2: array-like, real (float/double) + Array of (Speed Of Light * Redshift) values for the second set of + points. Code will try to detect cases where ``redshifts`` have been + passed and multiply the entire array with the ``speed of light``. + + If is_comoving_dist is set, then ``CZ2`` is interpreted as the + co-moving distance, rather than `cz`. + + Must be of same precision type as RA1/DEC1/CZ1. + + weights2: array-like, real (float/double), optional + Same as weights1, but for the second set of positions + + is_comoving_dist: boolean (default false) + Boolean flag to indicate that ``cz`` values have already been + converted into co-moving distances. This flag allows arbitrary + cosmologies to be used in ``Corrfunc``. + + verbose: boolean (default false) + Boolean flag to control output of informational messages + + output_savg: boolean (default false) + Boolean flag to output the average ``s`` for each bin. Code will + run slower if you set this flag. Also, note, if you are calculating + in single-precision, ``savg`` will suffer from numerical loss of + precision and can not be trusted. If you need accurate ``savg`` + values, then pass in double precision arrays for the particle + positions. + + fast_divide: boolean (default false) + Boolean flag to replace the division in ``AVX`` implementation with an + approximate reciprocal, followed by a Newton-Raphson step. Improves + runtime by ~15-20%. Loss of precision is at the 5-6th decimal place. + + (xyz)bin_refine_factor: integer, default is (2,2,1); typically within [1-3] + Controls the refinement on the cell sizes. Can have up to a 20% impact + on runtime. + + max_cells_per_dim: integer, default is 100, typical values in [50-300] + Controls the maximum number of cells per dimension. Total number of + cells can be up to (max_cells_per_dim)^3. Only increase if ``rpmax`` is + too small relative to the boxsize (and increasing helps the runtime). + + c_api_timer: boolean (default false) + Boolean flag to measure actual time spent in the C libraries. Here + to allow for benchmarking and scaling studies. + + isa: string (default ``fastest``) + Controls the runtime dispatch for the instruction set to use. Possible + options are: [``fastest``, ``avx``, ``sse42``, ``fallback``] + + Setting isa to ``fastest`` will pick the fastest available instruction + set on the current computer. However, if you set ``isa`` to, say, + ``avx`` and ``avx`` is not available on the computer, then the code + will revert to using ``fallback`` (even though ``sse42`` might be + available). + + Unless you are benchmarking the different instruction sets, you should + always leave ``isa`` to the default value. And if you *are* + benchmarking, then the string supplied here gets translated into an + ``enum`` for the instruction set defined in ``utils/defs.h``. + + weight_type: string, optional + The type of weighting to apply. One of ["pair_product", None]. Default: None. + + Returns + -------- + + results: Numpy structured array + A numpy structured array containing [smin, smax, savg, mumax, npairs, weightavg] + for each separation bin specified in the ``binfile``. If ``output_savg`` is + not set, then ``savg`` will be set to 0.0 for all bins; similarly for + ``weightavg``. ``npairs`` contains the number of pairs in that bin and + can be used to compute the actual :math:`\\xi(s, \mu)` by combining + with (DR, RR) counts. + + api_time: float, optional + Only returned if ``c_api_timer`` is set. ``api_time`` measures only + the time spent within the C library and ignores all python overhead. + """ + try: + from Corrfunc._countpairs_mocks import countpairs_s_mu_mocks as\ + DDsmu_extn + except ImportError: + msg = "Could not import the C extension for the on-sky"\ + "pair counter." + raise ImportError(msg) + + import numpy as np + from Corrfunc.utils import translate_isa_string_to_enum, fix_ra_dec,\ + return_file_with_rbins + from future.utils import bytes_to_native_str + + # Broadcast scalar weights to arrays + if weights1 is not None: + weights1 = np.atleast_1d(weights1) + if weights2 is not None: + weights2 = np.atleast_1d(weights2) + + # Check if mu_max is scalar + if not np.isscalar(mu_max): + msg = "The parameter `mu_max` = {0}, has size = {1}. "\ + "The code is expecting a scalar quantity (and not "\ + "not a list, array)".format(mu_max, np.size(mu_max)) + raise TypeError(msg) + + # Check that mu_max is within (0.0, 1.0] + if mu_max <= 0.0 or mu_max > 1.0: + msg = "The parameter `mu_max` = {0}, is the max. of cosine of an " + "angle and should be within (0.0, 1.0]".format(mu_max) + raise ValueError(msg) + + if not autocorr: + if RA2 is None or DEC2 is None or CZ2 is None: + msg = "Must pass valid arrays for RA2/DEC2/CZ2 for "\ + "computing cross-correlation" + raise ValueError(msg) + + # If only one set of points has weights, set the other to uniform weights + if weights1 is None and weights2 is not None: + weights1 = np.ones_like(weights2) + if weights2 is None and weights1 is not None: + weights2 = np.ones_like(weights1) + + else: + RA2 = np.empty(1) + DEC2 = np.empty(1) + CZ2 = np.empty(1) + + fix_ra_dec(RA1, DEC1) + if autocorr == 0: + fix_ra_dec(RA2, DEC2) + + # Passing None parameters breaks the parsing code, so avoid this + kwargs = {} + for k in ['weights1', 'weights2', 'weight_type', 'RA2', 'DEC2', 'CZ2']: + v = locals()[k] + if v is not None: + kwargs[k] = v + + integer_isa = translate_isa_string_to_enum(isa) + sbinfile, delete_after_use = return_file_with_rbins(binfile) + extn_results, api_time = DDsmu_extn(autocorr, cosmology, nthreads, + mu_max, nmu_bins, sbinfile, + RA1, DEC1, CZ1, + is_comoving_dist=is_comoving_dist, + verbose=verbose, + output_savg=output_savg, + fast_divide=fast_divide, + xbin_refine_factor=xbin_refine_factor, + ybin_refine_factor=ybin_refine_factor, + zbin_refine_factor=zbin_refine_factor, + max_cells_per_dim=max_cells_per_dim, + c_api_timer=c_api_timer, + isa=integer_isa, **kwargs) + if extn_results is None: + msg = "RuntimeError occurred" + raise RuntimeError(msg) + + if delete_after_use: + import os + os.remove(sbinfile) + + results_dtype = np.dtype([(bytes_to_native_str(b'smin'), np.float), + (bytes_to_native_str(b'smax'), np.float), + (bytes_to_native_str(b'savg'), np.float), + (bytes_to_native_str(b'mumax'), np.float), + (bytes_to_native_str(b'npairs'), np.uint64), + (bytes_to_native_str(b'weightavg'), np.float)]) + + nbin = len(extn_results) + results = np.zeros(nbin, dtype=results_dtype) + for ii, r in enumerate(extn_results): + results['smin'][ii] = r[0] + results['smax'][ii] = r[1] + results['savg'][ii] = r[2] + results['mumax'][ii] = r[3] + results['npairs'][ii] = r[4] + results['weightavg'][ii] = r[5] + + if not c_api_timer: + return results + else: + return results, api_time + +if __name__ == '__main__': + import doctest + doctest.testmod() diff --git a/Corrfunc/mocks/DDtheta_mocks.py b/Corrfunc/mocks/DDtheta_mocks.py index 084022ee..dcb21279 100644 --- a/Corrfunc/mocks/DDtheta_mocks.py +++ b/Corrfunc/mocks/DDtheta_mocks.py @@ -43,26 +43,26 @@ def DDtheta_mocks(autocorr, nthreads, binfile, Parameters ----------- - autocorr: boolean, required + autocorr : boolean, required Boolean flag for auto/cross-correlation. If autocorr is set to 1, then the second set of particle positions are not required. - nthreads: integer + nthreads : integer Number of threads to use. - binfile: string or an list/array of floats, units: degrees - For string input: filename specifying the ``rp`` bins for - ``DDtheta_mocks``. The file should contain white-space separated values - of (thetapmin, thetamax) for each ``theta`` wanted. The bins need to be - contiguous and sorted in increasing order (smallest bins come first). + binfile: string or an list/array of floats. Units: degrees. + For string input: filename specifying the ``theta`` bins for + ``DDtheta_mocks``. The file should contain white-space separated values + of (thetamin, thetamax) for each ``theta`` wanted. The bins need to be + contiguous and sorted in increasing order (smallest bins come first). - For array-like input: A sequence of ``theta`` values that provides the - bin-edges. For example, - ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid - input specifying **14** (logarithmic) bins between 0.1 and 10.0 degrees. - This array does not need to be sorted. + For array-like input: A sequence of ``theta`` values that provides the + bin-edges. For example, + ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid + input specifying **14** (logarithmic) bins between 0.1 and 10.0 + degrees. This array does not need to be sorted. - RA1: array-like, real (float/double) + RA1 : array-like, real (float/double) The array of Right Ascensions for the first set of points. RA's are expected to be in [0.0, 360.0], but the code will try to fix cases where the RA's are in [-180, 180.0]. For peace of mind, always supply @@ -70,75 +70,70 @@ def DDtheta_mocks(autocorr, nthreads, binfile, Calculations are done in the precision of the supplied arrays. - DEC1: array-like, real (float/double) + DEC1 : array-like, real (float/double) Array of Declinations for the first set of points. DEC's are expected to be in the [-90.0, 90.0], but the code will try to fix cases where the DEC's are in [0.0, 180.0]. Again, for peace of mind, always supply DEC's in [-90.0, 90.0]. Must be of same precision type as RA1. - weights1: array_like, real (float/double), optional + weights1 : array_like, real (float/double), optional A scalar, or an array of weights of shape (n_weights, n_positions) or (n_positions,). `weight_type` specifies how these weights are used; results are returned in the `weightavg` field. If only one of weights1 and weights2 is specified, the other will be set to uniform weights. - RA2: array-like, real (float/double) + RA2 : array-like, real (float/double) The array of Right Ascensions for the second set of points. RA's are expected to be in [0.0, 360.0], but the code will try to fix cases where the RA's are in [-180, 180.0]. For peace of mind, always supply RA's in [0.0, 360.0]. Must be of same precision type as RA1/DEC1. - DEC2: array-like, real (float/double) + DEC2 : array-like, real (float/double) Array of Declinations for the second set of points. DEC's are expected to be in the [-90.0, 90.0], but the code will try to fix cases where the DEC's are in [0.0, 180.0]. Again, for peace of mind, always supply DEC's in [-90.0, 90.0]. Must be of same precision type as RA1/DEC1. - weights2: array-like, real (float/double), optional + weights2 : array-like, real (float/double), optional Same as weights1, but for the second set of positions - link_in_dec: boolean (default True) + link_in_dec : boolean (default True) Boolean flag to create lattice in Declination. Code runs faster with this option. However, if the angular separations are too small, then linking in declination might produce incorrect results. When running for the first time, check your results by comparing with the output of the code for ``link_in_dec=False`` and ``link_in_ra=False``. - link_in_ra: boolean (default True) + link_in_ra : boolean (default True) Boolean flag to create lattice in Right Ascension. Setting this option implies ``link_in_dec=True``. Similar considerations as ``link_in_dec`` described above. - - .. note:: If you disable both ``link_in_dec`` and ``link_in_ra``, then + If you disable both ``link_in_dec`` and ``link_in_ra``, then the code reduces to a brute-force pair counter. No lattices are created at all. For very small angular separations, the brute-force method might be the most numerically stable method. - - verbose: boolean (default false) + verbose : boolean (default false) Boolean flag to control output of informational messages - output_thetaavg: boolean (default false) + output_thetaavg : boolean (default false) Boolean flag to output the average ``\theta`` for each bin. Code will run slower if you set this flag. - - .. note:: If you are calculating in single-precision, ``thetaavg`` will + If you are calculating in single-precision, ``thetaavg`` will suffer from numerical loss of precision and can not be trusted. If you need accurate ``thetaavg`` values, then pass in double precision arrays for ``RA/DEC``. - - .. note:: Code will run significantly slower if you enable this option. + Code will run significantly slower if you enable this option. Use the keyword ``fast_acos`` if you can tolerate some loss of precision. - - fast_acos: boolean (default false) + fast_acos : boolean (default false) Flag to use numerical approximation for the ``arccos`` - gives better performance at the expense of some precision. Relevant only if ``output_thetaavg==True``. @@ -149,30 +144,26 @@ def DDtheta_mocks(autocorr, nthreads, binfile, if you know your ``theta`` range is limited. If you implement a new version, then you will have to reinstall the entire Corrfunc package. + Note: Tests will fail if you run the tests with``fast_acos=True``. - .. note:: Tests will fail if you run the tests with``fast_acos=True``. - - - (radec)_refine_factor: integer, default is (2,2); typically within [1-3] + (radec)_refine_factor : integer, default is (2,2); typically within [1-3] Controls the refinement on the cell sizes. Can have up to a 20% impact on runtime. - - .. note:: Only two refine factors are to be specified and these + Only two refine factors are to be specified and these correspond to ``ra`` and ``dec`` (rather, than the usual three of ``(xyz)bin_refine_factor`` for all other correlation functions). - - max_cells_per_dim: integer, default is 100, typical values in [50-300] + max_cells_per_dim : integer, default is 100, typical values in [50-300] Controls the maximum number of cells per dimension. Total number of cells can be up to (max_cells_per_dim)^3. Only increase if ``thetamax`` is too small relative to the boxsize (and increasing helps the runtime). - c_api_timer: boolean (default false) + c_api_timer : boolean (default false) Boolean flag to measure actual time spent in the C libraries. Here to allow for benchmarking and scaling studies. - isa: string (default ``fastest``) + isa : string (default ``fastest``) Controls the runtime dispatch for the instruction set to use. Possible options are: [``fastest``, ``avx``, ``sse42``, ``fallback``] @@ -189,15 +180,14 @@ def DDtheta_mocks(autocorr, nthreads, binfile, Returns -------- - results: Numpy structured array - + results : Numpy structured array A numpy structured array containing [thetamin, thetamax, thetaavg, npairs, weightavg] for each angular bin specified in the ``binfile``. If ``output_thetaavg`` is not set then ``thetavg`` will be set to 0.0 for all bins; similarly for ``weightavg``. ``npairs`` contains the number of pairs in that bin. - api_time: float, optional + api_time : float, optional Only returned if ``c_api_timer`` is set. ``api_time`` measures only the time spent within the C library and ignores all python overhead. diff --git a/Corrfunc/mocks/__init__.py b/Corrfunc/mocks/__init__.py index cf8b4294..d559fbdb 100644 --- a/Corrfunc/mocks/__init__.py +++ b/Corrfunc/mocks/__init__.py @@ -9,12 +9,13 @@ unicode_literals) __author__ = ('Manodeep Sinha') -__all__ = ("DDrppi_mocks", "DDtheta_mocks", "vpf_mocks", ) +__all__ = ("DDrppi_mocks", "DDtheta_mocks", "vpf_mocks", "DDsmu_mocks" ) import sys from .DDrppi_mocks import DDrppi_mocks from .DDtheta_mocks import DDtheta_mocks from .vpf_mocks import vpf_mocks +from .DDsmu_mocks import DDsmu_mocks if sys.version_info[0] < 3: __all__ = [n.encode('ascii') for n in __all__] diff --git a/Corrfunc/mocks/vpf_mocks.py b/Corrfunc/mocks/vpf_mocks.py index 39e51271..5872de4b 100644 --- a/Corrfunc/mocks/vpf_mocks.py +++ b/Corrfunc/mocks/vpf_mocks.py @@ -39,12 +39,12 @@ def vpf_mocks(rmax, nbins, nspheres, numpN, Number of bins in the counts-in-cells. Radius of first shell is rmax/nbins - nspheres: integer (>= 0) + nspheres : integer (>= 0) Number of random spheres to place within the particle distribution. For a small number of spheres, the error is larger in the measured pN's. - numpN: integer (>= 1) + numpN : integer (>= 1) Governs how many unique pN's are to returned. If ``numpN`` is set to 1, then only the vpf (p0) is returned. For ``numpN=2``, p0 and p1 are returned. @@ -62,18 +62,16 @@ def vpf_mocks(rmax, nbins, nspheres, numpN, and so on... + Note: ``p0`` is the vpf - .. note:: p0 is the vpf - - - threshold_ngb: integer + threshold_ngb : integer Minimum number of random points needed in a ``rmax`` sphere such that it is considered to be entirely within the mock footprint. The command-line version, ``mocks/vpf/vpf_mocks.c``, assumes that the minimum number of randoms can be at most a 1-sigma deviation from the expected random number density. - centers_file: string, filename + centers_file : string, filename A file containing random sphere centers. If the file does not exist, then a list of random centers will be written out. In that case, the randoms arrays, ``RAND_RA``, ``RAND_DEC`` and ``RAND_CZ`` are used to @@ -81,13 +79,11 @@ def vpf_mocks(rmax, nbins, nspheres, numpN, exist but either ``rmax`` is too small or there are not enough centers then the file will be overwritten. - - .. note:: If the centers file has to be written, the code will take + Note: If the centers file has to be written, the code will take significantly longer to finish. However, subsequent runs can re-use that centers file and will be faster. - - cosmology: integer, required + cosmology : integer, required Integer choice for setting cosmology. Valid values are 1->LasDamas cosmology and 2->Planck cosmology. If you need arbitrary cosmology, easiest way is to convert the ``CZ`` values into co-moving distance, @@ -102,7 +98,7 @@ def vpf_mocks(rmax, nbins, nspheres, numpN, ``init_cosmology`` in ``ROOT/utils/cosmology_params.c`` and re-install the entire package. - RA: array-like, real (float/double) + RA : array-like, real (float/double) The array of Right Ascensions for the first set of points. RA's are expected to be in [0.0, 360.0], but the code will try to fix cases where the RA's are in [-180, 180.0]. For peace of mind, always supply @@ -110,7 +106,7 @@ def vpf_mocks(rmax, nbins, nspheres, numpN, Calculations are done in the precision of the supplied arrays. - DEC: array-like, real (float/double) + DEC : array-like, real (float/double) Array of Declinations for the first set of points. DEC's are expected to be in the [-90.0, 90.0], but the code will try to fix cases where the DEC's are in [0.0, 180.0]. Again, for peace of mind, always supply @@ -118,7 +114,7 @@ def vpf_mocks(rmax, nbins, nspheres, numpN, Must be of same precision type as RA. - CZ: array-like, real (float/double) + CZ : array-like, real (float/double) Array of (Speed Of Light * Redshift) values for the first set of points. Code will try to detect cases where ``redshifts`` have been passed and multiply the entire array with the ``speed of light``. @@ -126,7 +122,7 @@ def vpf_mocks(rmax, nbins, nspheres, numpN, If ``is_comoving_dist`` is set, then ``CZ`` is interpreted as the co-moving distance, rather than (Speed Of Light * Redshift). - RAND_RA: array-like, real (float/double) + RAND_RA : array-like, real (float/double) The array of Right Ascensions for the randoms. RA's are expected to be in [0.0, 360.0], but the code will try to fix cases where the RA's are in [-180, 180.0]. For peace of mind, always supply RA's in @@ -134,7 +130,7 @@ def vpf_mocks(rmax, nbins, nspheres, numpN, Must be of same precision type as RA/DEC/CZ. - RAND_DEC: array-like, real (float/double) + RAND_DEC : array-like, real (float/double) Array of Declinations for the randoms. DEC's are expected to be in the [-90.0, 90.0], but the code will try to fix cases where the DEC's are in [0.0, 180.0]. Again, for peace of mind, always supply DEC's in @@ -142,7 +138,7 @@ def vpf_mocks(rmax, nbins, nspheres, numpN, Must be of same precision type as RA/DEC/CZ. - RAND_CZ: array-like, real (float/double) + RAND_CZ : array-like, real (float/double) Array of (Speed Of Light * Redshift) values for the randoms. Code will try to detect cases where ``redshifts`` have been passed and multiply the entire array with the ``speed of light``. @@ -150,40 +146,36 @@ def vpf_mocks(rmax, nbins, nspheres, numpN, If ``is_comoving_dist`` is set, then ``CZ2`` is interpreted as the co-moving distance, rather than ``(Speed Of Light * Redshift)``. - - .. note:: RAND_RA, RAND_DEC and RAND_CZ are only used when the + Note: RAND_RA, RAND_DEC and RAND_CZ are only used when the ``centers_file`` needs to be written out. In that case, the RAND_RA, RAND_DEC, and RAND_CZ are used as random centers. - - verbose: boolean (default false) + verbose : boolean (default false) Boolean flag to control output of informational messages - is_comoving_dist: boolean (default false) + is_comoving_dist : boolean (default false) Boolean flag to indicate that ``cz`` values have already been converted into co-moving distances. This flag allows arbitrary cosmologies to be used in ``Corrfunc``. - (xyz)bin_refine_factor: integer, default is (1,1,1); typically within [1-3] + (xyz)bin_refine_factor : integer, default is (1,1,1); typically within [1-3] Controls the refinement on the cell sizes. Can have up to a 20% impact on runtime. - - .. note:: Since the counts in spheres calculation is symmetric + Note: Since the counts in spheres calculation is symmetric in all 3 dimensions, the defaults are different from the clustering routines. - - max_cells_per_dim: integer, default is 100, typical values in [50-300] + max_cells_per_dim : integer, default is 100, typical values in [50-300] Controls the maximum number of cells per dimension. Total number of cells can be up to (max_cells_per_dim)^3. Only increase if ``rmax`` is too small relative to the boxsize (and increasing helps the runtime). - c_api_timer: boolean (default false) + c_api_timer : boolean (default false) Boolean flag to measure actual time spent in the C libraries. Here to allow for benchmarking and scaling studies. - isa: string (default ``fastest``) + isa : string (default ``fastest``) Controls the runtime dispatch for the instruction set to use. Possible options are: [``fastest``, ``avx``, ``sse42``, ``fallback``] @@ -201,8 +193,7 @@ def vpf_mocks(rmax, nbins, nspheres, numpN, Returns -------- - results: Numpy structured array - + results : Numpy structured array A numpy structured array containing [rmax, pN[numpN]] with ``nbins`` elements. Each row contains the maximum radius of the sphere and the ``numpN`` elements in the ``pN`` array. Each element of this array @@ -211,9 +202,9 @@ def vpf_mocks(rmax, nbins, nspheres, numpN, function) is the probability that a sphere of radius ``rmax`` contains 0 galaxies. - if ``c_api_timer`` is set, then the return value is a tuple containing - (results, api_time). ``api_time`` measures only the time spent within - the C library and ignores all python overhead. + api_time : float, optional + Only returned if ``c_api_timer`` is set. ``api_time`` measures only the time + spent within the C library and ignores all python overhead. Example diff --git a/Corrfunc/theory/DD.py b/Corrfunc/theory/DD.py index 02a52267..29c2c044 100644 --- a/Corrfunc/theory/DD.py +++ b/Corrfunc/theory/DD.py @@ -53,8 +53,8 @@ def DD(autocorr, nthreads, binfile, X1, Y1, Z1, weights1=None, periodic=True, bin-edges. For example, ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid input specifying **14** (logarithmic) bins between 0.1 and 10.0. This - array does not need to be sorted. - + array does not need to be sorted. + X1/Y1/Z1: array_like, real (float/double) The array of X/Y/Z positions for the first set of points. Calculations are done in the precision of the supplied arrays. @@ -88,8 +88,7 @@ def DD(autocorr, nthreads, binfile, X1, Y1, Z1, weights1=None, periodic=True, Boolean flag to output the average ``r`` for each bin. Code will run slower if you set this flag. - - .. note:: If you are calculating in single-precision, ``ravg`` will + Note: If you are calculating in single-precision, ``ravg`` will suffer from numerical loss of precision and can not be trusted. If you need accurate ``ravg`` values, then pass in double precision arrays for the particle positions. diff --git a/Corrfunc/theory/DDrppi.py b/Corrfunc/theory/DDrppi.py index 8b2f4695..482e366a 100644 --- a/Corrfunc/theory/DDrppi.py +++ b/Corrfunc/theory/DDrppi.py @@ -57,21 +57,20 @@ def DDrppi(autocorr, nthreads, pimax, binfile, X1, Y1, Z1, weights1=None, depth. For instance, if ``pimax=40``, then 40 bins will be created along the ``pi`` direction. - - .. note:: Only pairs with ``0 <= dz < pimax`` are counted (no equality). + Note: Only pairs with ``0 <= dz < pimax`` are counted (no equality). binfile: string or an list/array of floats - For string input: filename specifying the ``rp`` bins for - ``DDrppi``. The file should contain white-space separated values - of (rpmin, rpmax) for each ``rp`` wanted. The bins need to be - contiguous and sorted in increasing order (smallest bins come first). + For string input: filename specifying the ``rp`` bins for + ``DDrppi``. The file should contain white-space separated values + of (rpmin, rpmax) for each ``rp`` wanted. The bins need to be + contiguous and sorted in increasing order (smallest bins come first). - For array-like input: A sequence of ``rp`` values that provides the - bin-edges. For example, - ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid - input, specifying **14** (logarithmic) bins between 0.1 and 10.0. This - array does not need to be sorted. + For array-like input: A sequence of ``rp`` values that provides the + bin-edges. For example, + ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid + input specifying **14** (logarithmic) bins between 0.1 and 10.0. This + array does not need to be sorted. X1/Y1/Z1: array-like, real (float/double) The array of X/Y/Z positions for the first set of points. @@ -106,11 +105,10 @@ def DDrppi(autocorr, nthreads, pimax, binfile, X1, Y1, Z1, weights1=None, Boolean flag to output the average ``rp`` for each bin. Code will run slower if you set this flag. - - .. note:: If you are calculating in single-precision, ``rpavg`` will - suffer from numerical loss of precision and can not be trusted. If - you need accurate ``rpavg`` values, then pass in double precision - arrays for the particle positions. + Note: If you are calculating in single-precision, ``rpavg`` will + suffer from numerical loss of precision and can not be trusted. If + you need accurate ``rpavg`` values, then pass in double precision + arrays for the particle positions. (xyz)bin_refine_factor: integer, default is (2,2,1); typically within [1-3] diff --git a/Corrfunc/theory/DDsmu.py b/Corrfunc/theory/DDsmu.py new file mode 100644 index 00000000..335ea677 --- /dev/null +++ b/Corrfunc/theory/DDsmu.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Python wrapper around the C extension for the pair counter in +``theory/DDsmu/``. This wrapper is in :py:mod:`Corrfunc.theory.DDsmu` +""" + +from __future__ import (division, print_function, absolute_import, + unicode_literals) + +__author__ = ('Manodeep Sinha', 'Nick Hand') +__all__ = ('DDsmu', ) + + +def DDsmu(autocorr, nthreads, binfile, mu_max, nmu_bins, X1, Y1, Z1, weights1=None, + periodic=True, X2=None, Y2=None, Z2=None, weights2=None, + verbose=False, boxsize=0.0, output_savg=False, + xbin_refine_factor=2, ybin_refine_factor=2, + zbin_refine_factor=1, max_cells_per_dim=100, + c_api_timer=False, isa=r'fastest', weight_type=None): + """ + Calculate the 2-D pair-counts corresponding to the redshift-space + correlation function, :math:`\\xi(s, \mu)` Pairs which are separated + by less than the ``s`` bins (specified in ``binfile``) in 3-D, and + less than ``s*mu_max`` in the Z-dimension are counted. + + If ``weights`` are provided, the resulting pair counts are weighted. The + weighting scheme depends on ``weight_type``. + + + .. note:: This module only returns pair counts and not the actual + correlation function :math:`\\xi(s, \mu)`. See the + utilities :py:mod:`Corrfunc.utils.convert_3d_counts_to_cf` + for computing :math:`\\xi(s, \mu)` from the pair counts. + + .. versionadded:: 2.1.0 + + Parameters + ---------- + + autocorr: boolean, required + Boolean flag for auto/cross-correlation. If autocorr is set to 1, + then the second set of particle positions are not required. + + nthreads: integer + The number of OpenMP threads to use. Has no effect if OpenMP was not + enabled during library compilation. + + binfile: string or an list/array of floats + For string input: filename specifying the ``s`` bins for + ``DDsmu_mocks``. The file should contain white-space separated values + of (smin, smax) specifying each ``s`` bin wanted. The bins + need to be contiguous and sorted in increasing order (smallest bins + come first). + + For array-like input: A sequence of ``s`` values that provides the + bin-edges. For example, + ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid + input specifying **14** (logarithmic) bins between 0.1 and 10.0. This + array does not need to be sorted. + + mu_max: double. Must be in range (0.0, 1.0] + A double-precision value for the maximum cosine of the angular + separation from the line of sight (LOS). Here, LOS is taken to be + along the Z direction. + + Note: Only pairs with :math:`0 <= cos(\theta_{LOS}) < \mu_{max}` + are counted (no equality). + + nmu_bins: int + The number of linear ``mu`` bins, with the bins ranging from + from (0,``mu_max``) + + X1/Y1/Z1 : array-like, real (float/double) + The array of X/Y/Z positions for the first set of points. + Calculations are done in the precision of the supplied arrays. + + weights1 : array-like, real (float/double), shape (n_particles,) or \ + (n_weights_per_particle,n_particles), optional + Weights for computing a weighted pair count. + + weight_type : str, optional + The type of pair weighting to apply. + Options: "pair_product", None; Default: None. + + periodic : boolean + Boolean flag to indicate periodic boundary conditions. + + X2/Y2/Z2 : array-like, real (float/double) + Array of XYZ positions for the second set of points. *Must* be the same + precision as the X1/Y1/Z1 arrays. Only required when ``autocorr==0``. + + weights2 : array-like, real (float/double), shape (n_particles,) or \ + (n_weights_per_particle,n_particles), optional + Weights for computing a weighted pair count. + + verbose : boolean (default false) + Boolean flag to control output of informational messages + + boxsize : double + The side-length of the cube in the cosmological simulation. + Present to facilitate exact calculations for periodic wrapping. + If boxsize is not supplied, then the wrapping is done based on + the maximum difference within each dimension of the X/Y/Z arrays. + + output_savg : boolean (default false) + Boolean flag to output the average ``s`` for each bin. Code will + run slower if you set this flag. Also, note, if you are calculating + in single-precision, ``s`` will suffer from numerical loss of + precision and can not be trusted. If you need accurate ``s`` + values, then pass in double precision arrays for the particle positions. + + (xyz)bin_refine_factor: integer (default (2,2,1) typical values in [1-3]) + Controls the refinement on the cell sizes. Can have up to a 20% impact + on runtime. + + max_cells_per_dim: integer (default 100, typical values in [50-300]) + Controls the maximum number of cells per dimension. Total number of + cells can be up to (max_cells_per_dim)^3. Only increase if ``rmax`` is + too small relative to the boxsize (and increasing helps the runtime). + + c_api_timer : boolean (default false) + Boolean flag to measure actual time spent in the C libraries. Here + to allow for benchmarking and scaling studies. + + isa : integer (default -1) + Controls the runtime dispatch for the instruction set to use. Possible + options are: [-1, AVX, SSE42, FALLBACK] + + Setting isa to -1 will pick the fastest available instruction + set on the current computer. However, if you set ``isa`` to, say, + ``AVX`` and ``AVX`` is not available on the computer, then the code will + revert to using ``FALLBACK`` (even though ``SSE42`` might be available). + + Unless you are benchmarking the different instruction sets, you should + always leave ``isa`` to the default value. And if you *are* benchmarking, + then the integer values correspond to the ``enum`` for the instruction set + defined in ``utils/defs.h``. + + Returns + -------- + results : A python list + A python list containing ``nmu_bins`` of [smin, smax, savg, mu_max, npairs, weightavg] + for each spatial bin specified in the ``binfile``. There will be a total of ``nmu_bins`` + ranging from [0, ``mu_max``) *per* spatial bin. If ``output_savg`` is not set, then ``savg`` + will be set to 0.0 for all bins; similarly for ``weight_avg``. ``npairs`` + contains the number of pairs in that bin. + + time : if ``c_api_timer`` is set, then the return value contains the time spent + in the API; otherwise time is set to 0.0 + + Example + ------- + >>> from __future__ import print_function + >>> import numpy as np + >>> from os.path import dirname, abspath, join as pjoin + >>> import Corrfunc + >>> from Corrfunc.theory.DDsmu import DDsmu + >>> binfile = pjoin(dirname(abspath(Corrfunc.__file__)), + ... "../theory/tests/", "bins") + >>> N = 10000 + >>> boxsize = 420.0 + >>> nthreads = 4 + >>> autocorr = 1 + >>> mu_max = 1.0 + >>> seed = 42 + >>> nmu_bins = 10 + >>> np.random.seed(seed) + >>> X = np.random.uniform(0, boxsize, N) + >>> Y = np.random.uniform(0, boxsize, N) + >>> Z = np.random.uniform(0, boxsize, N) + >>> weights = np.ones_like(X) + >>> results = DDsmu(autocorr, nthreads, binfile, mu_max, nmu_bins, + ... X, Y, Z, weights1=weights, weight_type='pair_product', output_savg=True) + >>> for r in results[100:]: print("{0:10.6f} {1:10.6f} {2:10.6f} {3:10.1f}" + ... " {4:10d} {5:10.6f}".format(r['smin'], r['smax'], + ... r['savg'], r['mu_max'], r['npairs'], r['weightavg'])) + ... # doctest: +NORMALIZE_WHITESPACE + 5.788530 8.249250 7.148213 0.1 230 1.000000 + 5.788530 8.249250 7.157218 0.2 236 1.000000 + 5.788530 8.249250 7.165338 0.3 208 1.000000 + 5.788530 8.249250 7.079905 0.4 252 1.000000 + 5.788530 8.249250 7.251661 0.5 184 1.000000 + 5.788530 8.249250 7.118536 0.6 222 1.000000 + 5.788530 8.249250 7.083466 0.7 238 1.000000 + 5.788530 8.249250 7.198184 0.8 170 1.000000 + 5.788530 8.249250 7.127409 0.9 208 1.000000 + 5.788530 8.249250 6.973090 1.0 206 1.000000 + 8.249250 11.756000 10.149183 0.1 592 1.000000 + 8.249250 11.756000 10.213009 0.2 634 1.000000 + 8.249250 11.756000 10.192220 0.3 532 1.000000 + 8.249250 11.756000 10.246931 0.4 544 1.000000 + 8.249250 11.756000 10.102675 0.5 530 1.000000 + 8.249250 11.756000 10.276180 0.6 644 1.000000 + 8.249250 11.756000 10.251264 0.7 666 1.000000 + 8.249250 11.756000 10.138399 0.8 680 1.000000 + 8.249250 11.756000 10.191916 0.9 566 1.000000 + 8.249250 11.756000 10.243229 1.0 608 1.000000 + 11.756000 16.753600 14.552776 0.1 1734 1.000000 + 11.756000 16.753600 14.579991 0.2 1806 1.000000 + 11.756000 16.753600 14.599611 0.3 1802 1.000000 + 11.756000 16.753600 14.471100 0.4 1820 1.000000 + 11.756000 16.753600 14.480192 0.5 1740 1.000000 + 11.756000 16.753600 14.493679 0.6 1746 1.000000 + 11.756000 16.753600 14.547713 0.7 1722 1.000000 + 11.756000 16.753600 14.465390 0.8 1750 1.000000 + 11.756000 16.753600 14.547465 0.9 1798 1.000000 + 11.756000 16.753600 14.440975 1.0 1828 1.000000 + 16.753600 23.875500 20.720406 0.1 5094 1.000000 + 16.753600 23.875500 20.735403 0.2 5004 1.000000 + 16.753600 23.875500 20.721069 0.3 5172 1.000000 + 16.753600 23.875500 20.723648 0.4 5014 1.000000 + 16.753600 23.875500 20.650621 0.5 5094 1.000000 + 16.753600 23.875500 20.688135 0.6 5076 1.000000 + 16.753600 23.875500 20.735691 0.7 4910 1.000000 + 16.753600 23.875500 20.714097 0.8 4864 1.000000 + 16.753600 23.875500 20.751836 0.9 4954 1.000000 + 16.753600 23.875500 20.721183 1.0 5070 1.000000 + """ + try: + from Corrfunc._countpairs import countpairs_s_mu as DDsmu_extn + except ImportError: + msg = "Could not import the C extension for the 3-D "\ + "redshift-space pair counter." + raise ImportError(msg) + + import numpy as np + from Corrfunc.utils import translate_isa_string_to_enum,\ + return_file_with_rbins + from future.utils import bytes_to_native_str + + # Broadcast scalar weights to arrays + if weights1 is not None: + weights1 = np.atleast_1d(weights1) + if weights2 is not None: + weights2 = np.atleast_1d(weights2) + + # Check if mu_max is scalar + if not np.isscalar(mu_max): + msg = "The parameter `mu_max` = {0}, has size = {1}. "\ + "The code is expecting a scalar quantity (and not "\ + "not a list, array)".format(mu_max, np.size(mu_max)) + raise TypeError(msg) + + # Check that mu_max is within (0.0, 1.0] + if mu_max <= 0.0 or mu_max > 1.0: + msg = "The parameter `mu_max` = {0}, is the max. of cosine of an " + "angle and should be within (0.0, 1.0]".format(mu_max) + raise ValueError(msg) + + if not autocorr: + if X2 is None or Y2 is None or Z2 is None: + msg = "Must pass valid arrays for X2/Y2/Z2 for "\ + "computing cross-correlation" + raise ValueError(msg) + + # If only one set of points has weights, set the other to uniform weights + if weights1 is None and weights2 is not None: + weights1 = np.ones_like(weights2) + if weights2 is None and weights1 is not None: + weights2 = np.ones_like(weights1) + + else: + X2 = np.empty(1) + Y2 = np.empty(1) + Z2 = np.empty(1) + + # Passing None parameters breaks the parsing code, so avoid this + kwargs = {} + for k in ['weights1', 'weights2', 'weight_type', 'X2', 'Y2', 'Z2']: + v = locals()[k] + if v is not None: + kwargs[k] = v + + integer_isa = translate_isa_string_to_enum(isa) + sbinfile, delete_after_use = return_file_with_rbins(binfile) + extn_results, api_time = DDsmu_extn(autocorr, nthreads, + sbinfile, + mu_max, nmu_bins, + X1, Y1, Z1, + periodic=periodic, + verbose=verbose, + boxsize=boxsize, + output_savg=output_savg, + xbin_refine_factor=xbin_refine_factor, + ybin_refine_factor=ybin_refine_factor, + zbin_refine_factor=zbin_refine_factor, + max_cells_per_dim=max_cells_per_dim, + c_api_timer=c_api_timer, + isa=integer_isa, **kwargs) + + if extn_results is None: + msg = "RuntimeError occurred" + raise RuntimeError(msg) + + if delete_after_use: + import os + os.remove(sbinfile) + + results_dtype = np.dtype([(bytes_to_native_str(b'smin'), np.float), + (bytes_to_native_str(b'smax'), np.float), + (bytes_to_native_str(b'savg'), np.float), + (bytes_to_native_str(b'mu_max'), np.float), + (bytes_to_native_str(b'npairs'), np.uint64), + (bytes_to_native_str(b'weightavg'), np.float),]) + results = np.array(extn_results, dtype=results_dtype) + + if not c_api_timer: + return results + else: + return results, api_time + + +if __name__ == '__main__': + import doctest + doctest.testmod() diff --git a/Corrfunc/theory/__init__.py b/Corrfunc/theory/__init__.py index 7d19322d..cf1afcfa 100644 --- a/Corrfunc/theory/__init__.py +++ b/Corrfunc/theory/__init__.py @@ -9,7 +9,7 @@ unicode_literals) __author__ = ('Manodeep Sinha') -__all__ = ('DD', 'DDrppi', 'wp', 'xi', 'vpf', ) +__all__ = ('DD', 'DDrppi', 'wp', 'xi', 'vpf', 'DDsmu',) import sys @@ -18,7 +18,7 @@ from .wp import wp from .xi import xi from .vpf import vpf - +from .DDsmu import DDsmu if sys.version_info[0] < 3: __all__ = [n.encode('ascii') for n in __all__] diff --git a/Corrfunc/theory/vpf.py b/Corrfunc/theory/vpf.py index 8b76c8c1..2696f31d 100644 --- a/Corrfunc/theory/vpf.py +++ b/Corrfunc/theory/vpf.py @@ -59,8 +59,7 @@ def vpf(rmax, nbins, nspheres, numpN, seed, and so on... - - .. note:: p0 is the vpf + Note: ``p0`` is the vpf seed: unsigned integer @@ -93,8 +92,7 @@ def vpf(rmax, nbins, nspheres, numpN, seed, Controls the refinement on the cell sizes. Can have up to a 20% impact on runtime. - - .. note:: Since the counts in spheres calculation is symmetric + Note: Since the counts in spheres calculation is symmetric in all 3 dimensions, the defaults are different from the clustering routines. diff --git a/Corrfunc/theory/wp.py b/Corrfunc/theory/wp.py index cd72fc55..b78b5d93 100644 --- a/Corrfunc/theory/wp.py +++ b/Corrfunc/theory/wp.py @@ -36,8 +36,7 @@ def find_fastest_wp_bin_refs(boxsize, pimax, nthreads, binfile, X, Y, Z, A double-precision value for the maximum separation along the Z-dimension. - - .. note:: Only pairs with ``0 <= dz < pimax`` are counted (no equality). + Note: Only pairs with ``0 <= dz < pimax`` are counted (no equality). nthreads: integer @@ -52,8 +51,8 @@ def find_fastest_wp_bin_refs(boxsize, pimax, nthreads, binfile, X, Y, Z, For array-like input: A sequence of ``rp`` values that provides the bin-edges. For example, ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid - input, specifying **14** (logarithmic) bins between 0.1 and 10.0. This - array does not need to be sorted. + input specifying **14** (logarithmic) bins between 0.1 and 10.0. This + array does not need to be sorted. X/Y/Z: arraytype, real (float/double) Particle positions in the 3 axes. Must be within [0, boxsize] @@ -72,11 +71,10 @@ def find_fastest_wp_bin_refs(boxsize, pimax, nthreads, binfile, X, Y, Z, Boolean flag to output the average ``rp`` for each bin. Code will run slower if you set this flag. - - .. note:: If you are calculating in single-precision, ``rpavg`` will - suffer from numerical loss of precision and can not be trusted. If - you need accurate ``rpavg`` values, then pass in double precision - arrays for the particle positions. + Note: If you are calculating in single-precision, ``rpavg`` will + suffer from numerical loss of precision and can not be trusted. If + you need accurate ``rpavg`` values, then pass in double precision + arrays for the particle positions. max_cells_per_dim: integer, default is 100, typical values in [50-300] @@ -119,7 +117,7 @@ def find_fastest_wp_bin_refs(boxsize, pimax, nthreads, binfile, X, Y, Z, The combination of ``bin refine factors`` along each dimension that produces the fastest code. - runtimes: numpy structured array + runtimes : numpy structured array if ``return_runtimes`` is set, then the return value is a tuple containing ((nx, ny, nz), runtimes). ``runtimes`` is a ``numpy`` @@ -318,8 +316,7 @@ def wp(boxsize, pimax, nthreads, binfile, X, Y, Z, A double-precision value for the maximum separation along the Z-dimension. - - .. note:: Only pairs with ``0 <= dz < pimax`` are counted (no equality). + Note: Only pairs with ``0 <= dz < pimax`` are counted (no equality). nthreads: integer @@ -334,8 +331,8 @@ def wp(boxsize, pimax, nthreads, binfile, X, Y, Z, For array-like input: A sequence of ``rp`` values that provides the bin-edges. For example, ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid - input, specifying **14** (logarithmic) bins between 0.1 and 10.0. This - array does not need to be sorted. + input specifying **14** (logarithmic) bins between 0.1 and 10.0. This + array does not need to be sorted. X/Y/Z: arraytype, real (float/double) Particle positions in the 3 axes. Must be within [0, boxsize] @@ -348,9 +345,9 @@ def wp(boxsize, pimax, nthreads, binfile, X, Y, Z, are double precision arrays (C double type). weights: array_like, real (float/double), optional - A scalar, or an array of weights of shape (n_weights, n_positions) or (n_positions,). - `weight_type` specifies how these weights are used; results are returned - in the `weightavg` field. + A scalar, or an array of weights of shape (n_weights, n_positions) or (n_positions,). + `weight_type` specifies how these weights are used; results are returned + in the `weightavg` field. verbose: boolean (default false) Boolean flag to control output of informational messages @@ -359,11 +356,10 @@ def wp(boxsize, pimax, nthreads, binfile, X, Y, Z, Boolean flag to output the average ``rp`` for each bin. Code will run slower if you set this flag. - - .. note:: If you are calculating in single-precision, ``rpavg`` will - suffer from numerical loss of precision and can not be trusted. If - you need accurate ``rpavg`` values, then pass in double precision - arrays for the particle positions. + Note: If you are calculating in single-precision, ``rpavg`` will + suffer from numerical loss of precision and can not be trusted. If + you need accurate ``rpavg`` values, then pass in double precision + arrays for the particle positions. (xyz)bin_refine_factor: integer, default is (2,2,1); typically within [1-3] diff --git a/Corrfunc/theory/xi.py b/Corrfunc/theory/xi.py index 5fe30077..557f96d5 100644 --- a/Corrfunc/theory/xi.py +++ b/Corrfunc/theory/xi.py @@ -46,16 +46,16 @@ def xi(boxsize, nthreads, binfile, X, Y, Z, Number of threads to use. binfile: string or an list/array of floats - For string input: filename specifying the ``r`` bins for - ``xi``. The file should contain white-space separated values - of (rmin, rmax) for each ``r`` wanted. The bins need to be - contiguous and sorted in increasing order (smallest bins come first). + For string input: filename specifying the ``r`` bins for + ``xi``. The file should contain white-space separated values + of (rmin, rmax) for each ``r`` wanted. The bins need to be + contiguous and sorted in increasing order (smallest bins come first). - For array-like input: A sequence of ``r`` values that provides the - bin-edges. For example, - ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid - input specifying **14** (logarithmic) bins between 0.1 and 10.0. This - array does not need to be sorted. + For array-like input: A sequence of ``r`` values that provides the + bin-edges. For example, + ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid + input specifying **14** (logarithmic) bins between 0.1 and 10.0. This + array does not need to be sorted. X/Y/Z: arraytype, real (float/double) Particle positions in the 3 axes. Must be within [0, boxsize] @@ -68,9 +68,9 @@ def xi(boxsize, nthreads, binfile, X, Y, Z, are double precision arrays (C double type). weights: array_like, real (float/double), optional - A scalar, or an array of weights of shape (n_weights, n_positions) or - (n_positions,). `weight_type` specifies how these weights are used; - results are returned in the `weightavg` field. + A scalar, or an array of weights of shape (n_weights, n_positions) or + (n_positions,). `weight_type` specifies how these weights are used; + results are returned in the `weightavg` field. verbose: boolean (default false) Boolean flag to control output of informational messages @@ -79,11 +79,10 @@ def xi(boxsize, nthreads, binfile, X, Y, Z, Boolean flag to output the average ``r`` for each bin. Code will run slower if you set this flag. - - .. note:: If you are calculating in single-precision, ``rpavg`` will - suffer from numerical loss of precision and can not be trusted. If - you need accurate ``rpavg`` values, then pass in double precision - arrays for the particle positions. + Note: If you are calculating in single-precision, ``rpavg`` will + suffer from numerical loss of precision and can not be trusted. If + you need accurate ``rpavg`` values, then pass in double precision + arrays for the particle positions. (xyz)bin_refine_factor: integer, default is (2,2,1); typically within [1-3] diff --git a/Corrfunc/utils.py b/Corrfunc/utils.py index 1877515e..a183f3ac 100644 --- a/Corrfunc/utils.py +++ b/Corrfunc/utils.py @@ -19,11 +19,6 @@ except NameError: xrange = range -try: - long -except NameError: - long = int - def convert_3d_counts_to_cf(ND1, ND2, NR1, NR2, D1D2, D1R2, D2R1, R1R2, estimator='LS'): @@ -571,7 +566,7 @@ def compute_nbins(max_diff, binsize, msg = 'Error: Invalid value for max_diff = {0} or binsize = {1}. '\ 'Both must be positive'.format(max_diff, binsize) raise ValueError(msg) - if max_nbins < 1: + if max_nbins is not None and max_nbins < 1: msg = 'Error: Invalid for the max. number of bins allowed = {0}.'\ 'Max. nbins must be >= 1'.format(max_nbins) raise ValueError(msg) @@ -582,7 +577,7 @@ def compute_nbins(max_diff, binsize, raise ValueError(msg) # At least 1 bin - ngrid = max(1, long(max_diff/binsize)) + ngrid = max(int(1), int(max_diff/binsize)) # Then refine ngrid *= refine_factor @@ -590,7 +585,7 @@ def compute_nbins(max_diff, binsize, # But don't exceed max number of bins # (if passed as a parameter) if max_nbins: - ngrid = min(max_nbins, ngrid) + ngrid = min(int(max_nbins), ngrid) return ngrid @@ -679,65 +674,67 @@ def gridlink_sphere(thetamax, -------- >>> from Corrfunc.utils import gridlink_sphere + >>> import numpy as np + >>> np.set_printoptions(precision=8) >>> thetamax=30 - >>> gridlink_sphere(thetamax) - array([([-1.57079633, -1.04719755], [ 0. , 3.14159265]), - ([-1.57079633, -1.04719755], [ 3.14159265, 6.28318531]), - ([-1.04719755, -0.52359878], [ 0. , 3.14159265]), - ([-1.04719755, -0.52359878], [ 3.14159265, 6.28318531]), - ([-0.52359878, 0. ], [ 0. , 1.25663706]), - ([-0.52359878, 0. ], [ 1.25663706, 2.51327412]), - ([-0.52359878, 0. ], [ 2.51327412, 3.76991118]), - ([-0.52359878, 0. ], [ 3.76991118, 5.02654825]), - ([-0.52359878, 0. ], [ 5.02654825, 6.28318531]), - ([ 0. , 0.52359878], [ 0. , 1.25663706]), - ([ 0. , 0.52359878], [ 1.25663706, 2.51327412]), - ([ 0. , 0.52359878], [ 2.51327412, 3.76991118]), - ([ 0. , 0.52359878], [ 3.76991118, 5.02654825]), - ([ 0. , 0.52359878], [ 5.02654825, 6.28318531]), - ([ 0.52359878, 1.04719755], [ 0. , 3.14159265]), - ([ 0.52359878, 1.04719755], [ 3.14159265, 6.28318531]), - ([ 1.04719755, 1.57079633], [ 0. , 3.14159265]), - ([ 1.04719755, 1.57079633], [ 3.14159265, 6.28318531])], - dtype=[(u'dec_limit', '>> gridlink_sphere(60, dec_refine_factor=3, ra_refine_factor=2) - array([([-1.57079633, -1.22173048], [ 0. , 1.57079633]), - ([-1.57079633, -1.22173048], [ 1.57079633, 3.14159265]), - ([-1.57079633, -1.22173048], [ 3.14159265, 4.71238898]), - ([-1.57079633, -1.22173048], [ 4.71238898, 6.28318531]), - ([-1.22173048, -0.87266463], [ 0. , 1.57079633]), - ([-1.22173048, -0.87266463], [ 1.57079633, 3.14159265]), - ([-1.22173048, -0.87266463], [ 3.14159265, 4.71238898]), - ([-1.22173048, -0.87266463], [ 4.71238898, 6.28318531]), - ([-0.87266463, -0.52359878], [ 0. , 1.57079633]), - ([-0.87266463, -0.52359878], [ 1.57079633, 3.14159265]), - ([-0.87266463, -0.52359878], [ 3.14159265, 4.71238898]), - ([-0.87266463, -0.52359878], [ 4.71238898, 6.28318531]), - ([-0.52359878, -0.17453293], [ 0. , 1.57079633]), - ([-0.52359878, -0.17453293], [ 1.57079633, 3.14159265]), - ([-0.52359878, -0.17453293], [ 3.14159265, 4.71238898]), - ([-0.52359878, -0.17453293], [ 4.71238898, 6.28318531]), - ([-0.17453293, 0.17453293], [ 0. , 1.57079633]), - ([-0.17453293, 0.17453293], [ 1.57079633, 3.14159265]), - ([-0.17453293, 0.17453293], [ 3.14159265, 4.71238898]), - ([-0.17453293, 0.17453293], [ 4.71238898, 6.28318531]), - ([ 0.17453293, 0.52359878], [ 0. , 1.57079633]), - ([ 0.17453293, 0.52359878], [ 1.57079633, 3.14159265]), - ([ 0.17453293, 0.52359878], [ 3.14159265, 4.71238898]), - ([ 0.17453293, 0.52359878], [ 4.71238898, 6.28318531]), - ([ 0.52359878, 0.87266463], [ 0. , 1.57079633]), - ([ 0.52359878, 0.87266463], [ 1.57079633, 3.14159265]), - ([ 0.52359878, 0.87266463], [ 3.14159265, 4.71238898]), - ([ 0.52359878, 0.87266463], [ 4.71238898, 6.28318531]), - ([ 0.87266463, 1.22173048], [ 0. , 1.57079633]), - ([ 0.87266463, 1.22173048], [ 1.57079633, 3.14159265]), - ([ 0.87266463, 1.22173048], [ 3.14159265, 4.71238898]), - ([ 0.87266463, 1.22173048], [ 4.71238898, 6.28318531]), - ([ 1.22173048, 1.57079633], [ 0. , 1.57079633]), - ([ 1.22173048, 1.57079633], [ 1.57079633, 3.14159265]), - ([ 1.22173048, 1.57079633], [ 3.14159265, 4.71238898]), - ([ 1.22173048, 1.57079633], [ 4.71238898, 6.28318531])], - dtype=[(u'dec_limit', '>> grid = gridlink_sphere(thetamax) # doctest: +NORMALIZE_WHITESPACE + >>> print(grid) + [([-1.57079633, -1.04719755], [ 0. , 3.14159265]) + ([-1.57079633, -1.04719755], [ 3.14159265, 6.28318531]) + ([-1.04719755, -0.52359878], [ 0. , 3.14159265]) + ([-1.04719755, -0.52359878], [ 3.14159265, 6.28318531]) + ([-0.52359878, 0. ], [ 0. , 1.25663706]) + ([-0.52359878, 0. ], [ 1.25663706, 2.51327412]) + ([-0.52359878, 0. ], [ 2.51327412, 3.76991118]) + ([-0.52359878, 0. ], [ 3.76991118, 5.02654825]) + ([-0.52359878, 0. ], [ 5.02654825, 6.28318531]) + ([ 0. , 0.52359878], [ 0. , 1.25663706]) + ([ 0. , 0.52359878], [ 1.25663706, 2.51327412]) + ([ 0. , 0.52359878], [ 2.51327412, 3.76991118]) + ([ 0. , 0.52359878], [ 3.76991118, 5.02654825]) + ([ 0. , 0.52359878], [ 5.02654825, 6.28318531]) + ([ 0.52359878, 1.04719755], [ 0. , 3.14159265]) + ([ 0.52359878, 1.04719755], [ 3.14159265, 6.28318531]) + ([ 1.04719755, 1.57079633], [ 0. , 3.14159265]) + ([ 1.04719755, 1.57079633], [ 3.14159265, 6.28318531])] + >>> grid = gridlink_sphere(60, dec_refine_factor=3, ra_refine_factor=2) # doctest: +NORMALIZE_WHITESPACE + >>> print(grid) + [([-1.57079633, -1.22173048], [ 0. , 1.57079633]) + ([-1.57079633, -1.22173048], [ 1.57079633, 3.14159265]) + ([-1.57079633, -1.22173048], [ 3.14159265, 4.71238898]) + ([-1.57079633, -1.22173048], [ 4.71238898, 6.28318531]) + ([-1.22173048, -0.87266463], [ 0. , 1.57079633]) + ([-1.22173048, -0.87266463], [ 1.57079633, 3.14159265]) + ([-1.22173048, -0.87266463], [ 3.14159265, 4.71238898]) + ([-1.22173048, -0.87266463], [ 4.71238898, 6.28318531]) + ([-0.87266463, -0.52359878], [ 0. , 1.57079633]) + ([-0.87266463, -0.52359878], [ 1.57079633, 3.14159265]) + ([-0.87266463, -0.52359878], [ 3.14159265, 4.71238898]) + ([-0.87266463, -0.52359878], [ 4.71238898, 6.28318531]) + ([-0.52359878, -0.17453293], [ 0. , 1.57079633]) + ([-0.52359878, -0.17453293], [ 1.57079633, 3.14159265]) + ([-0.52359878, -0.17453293], [ 3.14159265, 4.71238898]) + ([-0.52359878, -0.17453293], [ 4.71238898, 6.28318531]) + ([-0.17453293, 0.17453293], [ 0. , 1.57079633]) + ([-0.17453293, 0.17453293], [ 1.57079633, 3.14159265]) + ([-0.17453293, 0.17453293], [ 3.14159265, 4.71238898]) + ([-0.17453293, 0.17453293], [ 4.71238898, 6.28318531]) + ([ 0.17453293, 0.52359878], [ 0. , 1.57079633]) + ([ 0.17453293, 0.52359878], [ 1.57079633, 3.14159265]) + ([ 0.17453293, 0.52359878], [ 3.14159265, 4.71238898]) + ([ 0.17453293, 0.52359878], [ 4.71238898, 6.28318531]) + ([ 0.52359878, 0.87266463], [ 0. , 1.57079633]) + ([ 0.52359878, 0.87266463], [ 1.57079633, 3.14159265]) + ([ 0.52359878, 0.87266463], [ 3.14159265, 4.71238898]) + ([ 0.52359878, 0.87266463], [ 4.71238898, 6.28318531]) + ([ 0.87266463, 1.22173048], [ 0. , 1.57079633]) + ([ 0.87266463, 1.22173048], [ 1.57079633, 3.14159265]) + ([ 0.87266463, 1.22173048], [ 3.14159265, 4.71238898]) + ([ 0.87266463, 1.22173048], [ 4.71238898, 6.28318531]) + ([ 1.22173048, 1.57079633], [ 0. , 1.57079633]) + ([ 1.22173048, 1.57079633], [ 1.57079633, 3.14159265]) + ([ 1.22173048, 1.57079633], [ 3.14159265, 4.71238898]) + ([ 1.22173048, 1.57079633], [ 4.71238898, 6.28318531])] """ diff --git a/README.rst b/README.rst index 9f712f64..f7287df6 100644 --- a/README.rst +++ b/README.rst @@ -350,7 +350,6 @@ with the code, including using it in commercial application. Project URL =========== -- website (https://manodeep.github.io/Corrfunc/) - documentation (http://corrfunc.rtfd.io/) - version control (https://github.com/manodeep/Corrfunc) diff --git a/common.mk b/common.mk index e1f93d8c..455e2187 100644 --- a/common.mk +++ b/common.mk @@ -40,8 +40,8 @@ OPT += -DUSE_OMP ### You should NOT edit below this line DISTNAME:=Corrfunc MAJOR:=2 -MINOR:=0 -PATCHLEVEL:=1 +MINOR:=1 +PATCHLEVEL:=0 VERSION:=$(MAJOR).$(MINOR).$(PATCHLEVEL) ABI_COMPAT_VERSION:=$(MAJOR).0 # Whenever conda needs to be checked again @@ -212,7 +212,7 @@ ifeq ($(DO_CHECKS), 1) CFLAGS += -Werror -Wno-unknown-warning-option endif - GSL_FOUND := $(shell gsl-config --version) + GSL_FOUND := $(shell gsl-config --version 2>/dev/null) ifndef GSL_FOUND $(error $(ccred)Error:$(ccreset) GSL not found in path - please install GSL before installing $(DISTNAME).$(VERSION) $(ccreset)) endif diff --git a/docs/source/all-interfaces.rst b/docs/source/all-interfaces.rst index 418abb29..dd81782a 100644 --- a/docs/source/all-interfaces.rst +++ b/docs/source/all-interfaces.rst @@ -28,7 +28,8 @@ associated with each type of clustering statistic: Clustering Statistic Python Interface Static library Command-line (executable name) ====================== ================================ ======================================== ==================================== :math:`\xi(r)` :py:mod:`Corrfunc.theory.DD` ``theory/DD/libcountpairs.a`` ``theory/DD/DD`` -:math:`\xi(r_p,\pi)` :py:mod:`Corrfunc.theory.DDrppi` ``theory/DDrppi/libcountpairs_rp_pi.a`` ``theory/DDrppi/DDrppi`` +:math:`\xi(r_p,\pi)` :py:mod:`Corrfunc.theory.DDrppi` ``theory/DDrppi/libcountpairs_rp_pi.a`` ``theory/DDrppi/DDrppi`` +:math:`\xi(s,\mu)` :py:mod:`Corrfunc.theory.DDsmu` ``theory/DDsmu/libcountpairs_s_mu.a`` ``theory/DDsmu/DDsmu`` :math:`w_p(r_p)` :py:mod:`Corrfunc.theory.wp` ``theory/wp/libcountpairs_wp.a`` ``theory/wp/wp`` :math:`\xi(r)` :py:mod:`Corrfunc.theory.xi` ``theory/xi/libcountpairs_xi.a`` ``theory/xi/xi`` :math:`pN(n)` :py:mod:`Corrfunc.theory.vpf` ``theory/vpf/libcountspheres.a`` ``theory/vpf/vpf`` @@ -44,6 +45,7 @@ command-line executables are: Clustering Statistic Python Interface Static library Command-line (executable name) ====================== ====================================== ===================================================== ===================================== :math:`\xi(r_p,\pi)` :py:mod:`Corrfunc.mocks.DDrppi_mocks` ``mocks/DDrppi_mocks/libcountpairs_rp_pi_mocks.a`` ``mocks/DDrppi_mocks/DDrppi_mocks`` +:math:`\xi(s,\mu)` :py:mod:`Corrfunc.mocks.DDsmu_mocks` ``mocks/DDsmu_mocks/libcountpairs_s_mu_mocks.a`` ``mocks/DDsmu_mocks/DDsmu_mocks`` :math:`\omega(\theta)` :py:mod:`Corrfunc.mocks.DDtheta_mocks` ``mocks/DDtheta_mocks/libcountpairs_theta_mocks.a`` ``mocks/DDtheta_mocks/DDtheta_mocks`` :math:`pN(n)` :py:mod:`Corrfunc.mocks.vpf_mocks` ``mocks/vpf_mocks/libcountspheres_mocks`` ``mocks/vpf_mocks/vpf_mocks`` ====================== ====================================== ===================================================== ===================================== diff --git a/docs/source/api/Corrfunc.mocks.rst b/docs/source/api/Corrfunc.mocks.rst index e8a40caa..633c5cf0 100644 --- a/docs/source/api/Corrfunc.mocks.rst +++ b/docs/source/api/Corrfunc.mocks.rst @@ -1,5 +1,5 @@ -Corrfunc.mocks package -====================== +Corrfunc\.mocks package +======================= .. automodule:: Corrfunc.mocks :members: @@ -9,24 +9,32 @@ Corrfunc.mocks package Submodules ---------- -Corrfunc.mocks.DDrppi_mocks module ----------------------------------- +Corrfunc\.mocks\.DDrppi\_mocks module +------------------------------------- .. automodule:: Corrfunc.mocks.DDrppi_mocks :members: :undoc-members: :show-inheritance: -Corrfunc.mocks.DDtheta_mocks module ------------------------------------ +Corrfunc\.mocks\.DDsmu\_mocks module +------------------------------------ + +.. automodule:: Corrfunc.mocks.DDsmu_mocks + :members: + :undoc-members: + :show-inheritance: + +Corrfunc\.mocks\.DDtheta\_mocks module +-------------------------------------- .. automodule:: Corrfunc.mocks.DDtheta_mocks :members: :undoc-members: :show-inheritance: -Corrfunc.mocks.vpf_mocks module -------------------------------- +Corrfunc\.mocks\.vpf\_mocks module +---------------------------------- .. automodule:: Corrfunc.mocks.vpf_mocks :members: diff --git a/docs/source/api/Corrfunc.rst b/docs/source/api/Corrfunc.rst index c75f401d..818832da 100644 --- a/docs/source/api/Corrfunc.rst +++ b/docs/source/api/Corrfunc.rst @@ -17,40 +17,40 @@ Subpackages Submodules ---------- -Corrfunc.call_correlation_functions module ------------------------------------------- +Corrfunc\.call\_correlation\_functions module +--------------------------------------------- .. automodule:: Corrfunc.call_correlation_functions :members: :undoc-members: :show-inheritance: -Corrfunc.call_correlation_functions_mocks module ------------------------------------------------- +Corrfunc\.call\_correlation\_functions\_mocks module +---------------------------------------------------- .. automodule:: Corrfunc.call_correlation_functions_mocks :members: :undoc-members: :show-inheritance: -Corrfunc.io module ------------------- +Corrfunc\.io module +------------------- .. automodule:: Corrfunc.io :members: :undoc-members: :show-inheritance: -Corrfunc.tests module ---------------------- +Corrfunc\.tests module +---------------------- .. automodule:: Corrfunc.tests :members: :undoc-members: :show-inheritance: -Corrfunc.utils module ---------------------- +Corrfunc\.utils module +---------------------- .. automodule:: Corrfunc.utils :members: diff --git a/docs/source/api/Corrfunc.theory.rst b/docs/source/api/Corrfunc.theory.rst index 445767b7..cef35db5 100644 --- a/docs/source/api/Corrfunc.theory.rst +++ b/docs/source/api/Corrfunc.theory.rst @@ -1,5 +1,5 @@ -Corrfunc.theory package -======================= +Corrfunc\.theory package +======================== .. automodule:: Corrfunc.theory :members: @@ -9,40 +9,48 @@ Corrfunc.theory package Submodules ---------- -Corrfunc.theory.DD module -------------------------- +Corrfunc\.theory\.DD module +--------------------------- .. automodule:: Corrfunc.theory.DD :members: :undoc-members: :show-inheritance: -Corrfunc.theory.DDrppi module ------------------------------ +Corrfunc\.theory\.DDrppi module +------------------------------- .. automodule:: Corrfunc.theory.DDrppi :members: :undoc-members: :show-inheritance: -Corrfunc.theory.vpf module --------------------------- +Corrfunc\.theory\.DDsmu module +------------------------------ + +.. automodule:: Corrfunc.theory.DDsmu + :members: + :undoc-members: + :show-inheritance: + +Corrfunc\.theory\.vpf module +---------------------------- .. automodule:: Corrfunc.theory.vpf :members: :undoc-members: :show-inheritance: -Corrfunc.theory.wp module -------------------------- +Corrfunc\.theory\.wp module +--------------------------- .. automodule:: Corrfunc.theory.wp :members: :undoc-members: :show-inheritance: -Corrfunc.theory.xi module -------------------------- +Corrfunc\.theory\.xi module +--------------------------- .. automodule:: Corrfunc.theory.xi :members: diff --git a/docs/source/modules/converting_3d_counts.rst b/docs/source/modules/converting_3d_counts.rst index b9a20420..23695d49 100644 --- a/docs/source/modules/converting_3d_counts.rst +++ b/docs/source/modules/converting_3d_counts.rst @@ -18,6 +18,7 @@ wrapper :py:mod:`Corrfunc.theory.DD` >>> # Read the supplied galaxies on a periodic box >>> X, Y, Z = read_catalog() + >>> N = len(X) >>> boxsize = 420.0 >>> nthreads = 2 diff --git a/docs/source/modules/converting_ddtheta_mocks.rst b/docs/source/modules/converting_ddtheta_mocks.rst index 9e60abad..4dd4f559 100644 --- a/docs/source/modules/converting_ddtheta_mocks.rst +++ b/docs/source/modules/converting_ddtheta_mocks.rst @@ -12,6 +12,7 @@ wrapper :py:mod:`Corrfunc.mocks.DDtheta_mocks` .. code-block:: python >>> from os.path import dirname, abspath, join as pjoin + >>> import numpy as np >>> import Corrfunc >>> from Corrfunc.mocks.DDtheta_mocks import DDtheta_mocks >>> from Corrfunc.io import read_catalog @@ -28,11 +29,15 @@ wrapper :py:mod:`Corrfunc.mocks.DDtheta_mocks` >>> random_catalog=pjoin(dirname(abspath(Corrfunc.__file__)), ... "../mocks/tests/data", "Mr19_randoms_northonly.rdcz.ff") >>> rand_RA, rand_DEC, _ = read_catalog(random_catalog) - + >>> rand_N = len(rand_RA) + # Setup the bins >>> nbins = 10 >>> bins = np.linspace(0.1, 10.0, nbins + 1) # note the +1 to nbins + # Number of threads to use + >>> nthreads = 2 + # Auto pair counts in DD >>> autocorr=1 >>> DD_counts = DDtheta_mocks(autocorr, nthreads, bins, diff --git a/docs/source/modules/converting_rp_pi_counts.rst b/docs/source/modules/converting_rp_pi_counts.rst index 77cc492d..0450e597 100644 --- a/docs/source/modules/converting_rp_pi_counts.rst +++ b/docs/source/modules/converting_rp_pi_counts.rst @@ -8,12 +8,14 @@ by using the helper function :py:mod:`Corrfunc.utils.convert_rp_pi_counts_to_wp` .. code-block:: python + >>> import numpy as np >>> from Corrfunc.theory import DDrppi >>> from Corrfunc.io import read_catalog >>> from Corrfunc.utils import convert_rp_pi_counts_to_wp # Read the supplied galaxies on a periodic box >>> X, Y, Z = read_catalog() + >>> N = len(X) >>> boxsize = 420.0 # Generate randoms on the box diff --git a/docs/source/modules/mock_functions.rst b/docs/source/modules/mock_functions.rst index 8307f9ba..443fce49 100644 --- a/docs/source/modules/mock_functions.rst +++ b/docs/source/modules/mock_functions.rst @@ -11,6 +11,7 @@ Clustering in 2-D ------------------- * Pair counts (auto or cross) correlations for :math:`\xi(rp, \pi)` -- :py:mod:`Corrfunc.mocks.DDrppi_mocks` +* Pair counts (auto or cross) correlations for :math:`\xi(s, \mu)` -- :py:mod:`Corrfunc.mocks.DDsmu_mocks` Angular clustering -------------------- diff --git a/docs/source/modules/theory_functions.rst b/docs/source/modules/theory_functions.rst index 05ca039a..b08efc94 100644 --- a/docs/source/modules/theory_functions.rst +++ b/docs/source/modules/theory_functions.rst @@ -16,7 +16,8 @@ Clustering in 3-D Clustering in 2-D ------------------ -* Pair counts (auto or cross) correlations for :math:`\xi(rp, \pi)` -- :py:mod:`Corrfunc.theory.DDrppi` +* Pair counts (auto or cross) correlations for :math:`\xi(rp, \pi)` -- :py:mod:`Corrfunc.theory.DDrppi` +* Pair counts (auto or cross) correlations for :math:`\xi(s, \mu)` -- :py:mod:`Corrfunc.theory.DDsmu` * Projected auto-correlation function, :math:`wp(rp)` -- :py:mod:`Corrfunc.theory.wp` Counts-in-cells diff --git a/docs/source/modules/which_corrfunc.rst b/docs/source/modules/which_corrfunc.rst index 8a710e51..3ce2a6dd 100644 --- a/docs/source/modules/which_corrfunc.rst +++ b/docs/source/modules/which_corrfunc.rst @@ -20,8 +20,12 @@ type of data, **and** the desired correlation function you want, the following t | X, Y, Z | True or False | Arbitrary | :math:`\xi(r)` | Pair-counts in 3-D real-space |:py:mod:`Corrfunc.theory.DD` | | | | +-----------------------------------------+-------------------------------+---------------------------------------+ | | | | :math:`\xi(r_p, \pi)` | Pair-counts in 2-D |:py:mod:`Corrfunc.theory.DDrppi` | +| | | +-----------------------------------------+-------------------------------+---------------------------------------+ +| | | | :math:`\xi(s, \mu)` | Pair-counts in 2-D |:py:mod:`Corrfunc.theory.DDsmu` | +-------------------+---------------+-----------------+-----------------------------------------+-------------------------------+---------------------------------------+ | ra, dec, cz | False | Arbitrary | :math:`\xi(r_p, \pi)` | Pair-counts in 2-D |:py:mod:`Corrfunc.mocks.DDrppi_mocks` | +| | | +-----------------------------------------+-------------------------------+---------------------------------------+ +| | | | :math:`\xi(s, \mu)` | Pair-counts in 2-D |:py:mod:`Corrfunc.mocks.DDsmu_mocks` | +-------------------+---------------+-----------------+-----------------------------------------+-------------------------------+---------------------------------------+ | ra, dec | False | Arbitrary | :math:`\omega(\theta)` | Pair-counts in angular space |:py:mod:`Corrfunc.mocks.DDtheta_mocks` | +-------------------+---------------+-----------------+-----------------------------------------+-------------------------------+---------------------------------------+ diff --git a/docs/source/python-interface.rst b/docs/source/python-interface.rst index 1ef04079..8e0a28b2 100644 --- a/docs/source/python-interface.rst +++ b/docs/source/python-interface.rst @@ -104,6 +104,13 @@ clustering functions: # Specify the distance to integrate along line of sight >>> pimax = 40.0 + + # Specify the max. of the cosine of the angle to the LOS for + # DD(s, mu) + >>> mu_max = 1.0 + + # Specify the number of linear bins in `mu` + >>> nmu_bins = 20 # Specify that an autocorrelation is wanted >>> autocorr = 1 @@ -120,6 +127,8 @@ clustering functions: nbins = 20 rbins = np.logspace(np.log10(rmin), np.log10(rmax), nbins + 1) pimax = 40.0 + mu_max = 1.0 + nmu_bins = 20 autocorr = 1 @@ -169,6 +178,7 @@ bin is ``0.0`` for an autocorrelation, then the self-pairs *will* be counted. Calculating 2-D pair-counts (``Corrfunc.theory.DDrppi``) -------------------------------------------------------- + Corrfunc can return the pair counts in 2-D real-space for a set of arrays. The calculation can be either auto or cross-correlation, *and* with or without periodic boundaries. The projected separation, :math:`r_p` is calculated in the X-Y plane while the @@ -182,6 +192,31 @@ bin is ``0.0`` for an autocorrelation, then the self-pairs *will* be counted. from Corrfunc.theory.DDrppi import DDrppi results_DDrppi = DDrppi(autocorr, nthreads, pimax, rbins, X, Y, Z, boxsize=boxsize) +Calculating 2-D pair-counts (``Corrfunc.theory.DDsmu``) +-------------------------------------------------------- + +Corrfunc can return the pair counts in 2-D real-space for a set of arrays. The +calculation can be either auto or cross-correlation, *and* with or without periodic +boundaries. The spatial separation, :math:`s` is calculated in 3-D while +:math:`mu` is the cosine of angle to the line-of-sight and is calculated +assuming that the Z-axis is the line-of-sight. + +.. math:: + + \mathbf{s} &= \mathbf{v_1} - \mathbf{v_2}, \\ + {\mu} &= \frac{\left(z_1 - z_2 \right)}{\Vert\mathbf{s}\Vert} + +where, :math:`\mathbf{v_1}:=(x_1, y_1, z_1)` and :math:`\mathbf{v_2}:=(x_2, y_2, z_2)` are the vectors for the +two points under consideration, and, :math:`\Vert\mathbf{s}\Vert=\sqrt{(x_1 - x_2)^2 + (y_1 - y_2)^2 + (z_1 - z_2)^2}` + +The pairs are always double-counted. Additionally, if the smallest +bin is ``0.0`` for an autocorrelation, then the self-pairs *will* be counted. + +.. testcode:: theory + + from Corrfunc.theory.DDsmu import DDsmu + results_DDsmu = DDsmu(autocorr, nthreads, rbins, mu_max, nmu_bins, X, Y, Z, boxsize=boxsize) + Calculating the Counts-in-Cells (``Corrfunc.theory.vpf``) --------------------------------------------------------- @@ -249,6 +284,13 @@ sets up the default arrays and parameters for the actual clustering calculations # Specify the distance to integrate along line of sight pimax = 40.0 + # Specify the max. of the cosine of the angle to the LOS + # for DD(s, mu) + mu_max = 1.0 + + # Specify the number of linear bins in `mu` + nmu_bins = 20 + # Specify that an autocorrelation is wanted autocorr = 1 @@ -270,10 +312,11 @@ sets up the default arrays and parameters for the actual clustering calculations nbins = 20 rbins = np.logspace(np.log10(rmin), np.log10(rmax), nbins + 1) pimax = 40.0 + mu_max = 1.0 + nmu_bins = 20 autocorr = 1 - Calculating 2-D pair counts (``Corrfunc.mocks.DDrppi_mocks``) ------------------------------------------------------------- Corrfunc can calculate pair counts for mock catalogs. The input positions are @@ -291,19 +334,46 @@ equations from `Zehavi et al. 2002 `_ + +.. math:: + + \mathbf{s} &= \mathbf{v_1} - \mathbf{v_2}, \\ + \mathbf{l} &= \frac{1}{2}\left(\mathbf{v_1} + \mathbf{v_2}\right), \\ + \mu &= \left(\mathbf{s} \cdot \mathbf{l}\right)/\left(\Vert\mathbf{l}\Vert \Vert\mathbf{s}\Vert \right) + +where, :math:`\mathbf{v_1}:=(x_1, y_1, z_1)` and :math:`\mathbf{v_2}:=(x_2, y_2, z_2)` are the vectors for the +two points under consideration, and, :math:`\Vert\mathbf{s}\Vert=\sqrt{(x_1 - x_2)^2 + (y_1 - y_2)^2 + (z_1 - z_2)^2}` + +Here is the python code to call ``Corrfunc.mocks.DDsmu_mocks``: + +.. testcode:: mocks + + from Corrfunc.mocks.DDsmu_mocks import DDsmu_mocks + results_DDsmu_mocks = DDsmu_mocks(autocorr, cosmology, nthreads, mu_max, nmu_bins, rbins, RA, DEC, CZ) + + Calculating angular pair-counts (``Corrfunc.mocks.DDtheta_mocks``) ------------------------------------------------------------------- Corrfunc can compute angular pair counts for mock catalogs. The input positions diff --git a/index.md b/index.md index 6d5fe276..c568c70a 100644 --- a/index.md +++ b/index.md @@ -337,10 +337,8 @@ with the code including using it in commercial application. Project URL =========== - -- website (https://manodeep.github.io/Corrfunc/) +- website & version control (https://github.com/manodeep/Corrfunc) - documentation (http://corrfunc.rtfd.io/) -- version control (https://github.com/manodeep/Corrfunc) .. |Release| image:: https://img.shields.io/github/release/manodeep/Corrfunc.svg :target: https://github.com/manodeep/Corrfunc/releases/latest diff --git a/meta.yaml b/meta.yaml index 22131649..d66332a5 100644 --- a/meta.yaml +++ b/meta.yaml @@ -48,7 +48,7 @@ test: - python -c "import Corrfunc.call_correlation_functions_mocks as m; m.main()" about: - home: http://manodeep.github.io/Corrfunc/ + home: https://github.com/manodeep/Corrfunc license: MIT license_file: LICENSE summary: Blazing fast correlation functions on the CPU diff --git a/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_kernels.c.src b/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_kernels.c.src index 99bddc12..dde05f57 100644 --- a/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_kernels.c.src +++ b/mocks/DDrppi_mocks/countpairs_rp_pi_mocks_kernels.c.src @@ -372,8 +372,8 @@ static inline int countpairs_rp_pi_mocks_avx_intrinsics_DOUBLE(const int64_t N0, pair.pary.d = pary; pair.parz.d = parz; - pairweight = fallback_weight_func(&pair); - } + pairweight = fallback_weight_func(&pair); + } for(int kbin=nbin-1;kbin>=1;kbin--) { if(sqr_Dperp >= rupp_sqr[kbin-1]) { @@ -705,8 +705,7 @@ static inline int countpairs_rp_pi_mocks_sse_intrinsics_DOUBLE(const int64_t N0, const DOUBLE sqr_s = perpx*perpx + perpy*perpy + perpz*perpz; if(sqr_s >= sqr_max_sep) continue; - /* const DOUBLE dot_product = (parx*perpx+pary*perpy+parz*perpz); */ - const DOUBLE norm = (parx*parx+pary*pary+parz*parz); + const DOUBLE norm = (parx*parx + pary*pary + parz*parz); const DOUBLE tmp = dot_product * dot_product; if(tmp >= sqr_pimax * norm) continue; const DOUBLE sqr_Dpar = (dot_product * dot_product)/norm; @@ -724,6 +723,10 @@ static inline int countpairs_rp_pi_mocks_sse_intrinsics_DOUBLE(const int64_t N0, pair.dy.d = perpy; pair.dz.d = perpz; + pair.parx.d = parx; + pair.pary.d = pary; + pair.parz.d = parz; + pairweight = fallback_weight_func(&pair); } @@ -895,9 +898,9 @@ static inline int countpairs_rp_pi_mocks_fallback_DOUBLE(const int64_t N0, DOUBL pair.dy.d = perpy; pair.dz.d = perpz; - pair.dx.d = parx; - pair.dy.d = pary; - pair.dz.d = parz; + pair.parx.d = parx; + pair.pary.d = pary; + pair.parz.d = parz; pairweight = weight_func(&pair); } diff --git a/mocks/DDsmu_mocks/DDsmu_mocks.c b/mocks/DDsmu_mocks/DDsmu_mocks.c new file mode 100644 index 00000000..d24cb00b --- /dev/null +++ b/mocks/DDsmu_mocks/DDsmu_mocks.c @@ -0,0 +1,335 @@ +/* File: DDsmu_mocks.c */ +/* + This file is a part of the Corrfunc package + Copyright (C) 2015-- Manodeep Sinha (manodeep@gmail.com) + License: MIT LICENSE. See LICENSE file under the top-level + directory at https://github.com/manodeep/Corrfunc/ +*/ + +/* PROGRAM DDsmu + +--- DDsmu file1 format1 file2 format2 sbinfile Nmu cosmology numthreads [weight_method weights_file1 weights_format1 [weights_file2 weights_format2]] > DDfile +--- Measure the cross-correlation function xi(rp,pi) for two different + data files (or autocorrelation if data1=data2). + * file1 = name of first data file + * format1 = format of first data file (a=ascii, c=csv, f=fast-food) + * file2 = name of second data file + * format2 = format of second data file (a=ascii, c=csv, f=fast-food) + * sbinfile = name of ascii file containing the r-bins (rmin rmax for each bin) + * mu_max = maximum mu value (>0 and <= 1.0) + * nmu_bins = number of mu bins + * cosmology = flag to pick-up the cosmology combination to use (set as an array of combinations in ../utils/cosmology_params.c) + * numthreads = number of threads to use +--- OPTIONAL ARGS: + * weight_method = the type of pair weighting to apply. Options are: 'pair_product', 'none'. Default: 'none'. + * weights_file1 = name of file containing the weights corresponding to the first data file + * weights_format1 = format of file containing the weights corresponding to the first data file + * weights_file2 = name of file containing the weights corresponding to the second data file + * weights_format2 = format of file containing the weights corresponding to the second data file +---OUTPUT: + > DDfile = name of output file. Contains + +*/ + +#include +#include +#include +#include +#include + +#include "defs.h" //for basic API + all macros +#include "function_precision.h" //definition of DOUBLE +#include "countpairs_s_mu_mocks.h" //function proto-type for countpairs +#include "io.h" //function proto-type for file input +#include "utils.h" //general utilities + + +void Printhelp(void); + +int main(int argc, char *argv[]) +{ + /*---Arguments-------------------------*/ + char *file1=NULL,*file2=NULL, *weights_file1=NULL,*weights_file2=NULL; + char *fileformat1=NULL,*fileformat2=NULL, *weights_fileformat1=NULL,*weights_fileformat2=NULL; + char *sbinfile=NULL; + char *weight_method_str=NULL; + int nmu_bins; + DOUBLE mu_max; + + weight_method_t weight_method = NONE; + int num_weights = 0; + + /*---Data-variables--------------------*/ + int64_t ND1,ND2 ; + + DOUBLE *thetaD1,*phiD1,*czD1, *weights1[MAX_NUM_WEIGHTS]={NULL}; + DOUBLE *thetaD2,*phiD2,*czD2, *weights2[MAX_NUM_WEIGHTS]={NULL}; + + struct timeval t_end,t_start,t0,t1; + double read_time=0.0; + gettimeofday(&t_start,NULL); + int nthreads=1; + + /*---Corrfunc-variables----------------*/ +#if defined(_OPENMP) + const char argnames[][30]={"file1","format1","file2","format2","sbinfile","mu_max","nmu_bins","cosmology flag","numthreads"}; +#else + const char argnames[][30]={"file1","format1","file2","format2","sbinfile","mu_max","nmu_bins","cosmology flag"}; +#endif + const char optargnames[][30]={"weight_method", "weights_file1","weights_format1","weights_file2","weights_format2"}; + + int nargs=sizeof(argnames)/(sizeof(char)*30); + int noptargs=sizeof(optargnames)/(sizeof(char)*30); + + int cosmology=1; + + /*---Read-arguments-----------------------------------*/ + if(argc< (nargs+1)) { + Printhelp() ; + fprintf(stderr,"\nFound: %d parameters\n ",argc-1); + int i; + for(i=1;i = `%s' \n",argv[i]); + } + fprintf(stderr,"\nMissing required parameters \n"); + for(i=argc;i<=nargs;i++) + fprintf(stderr,"\t\t %s = `?'\n",argnames[i-1]); + return EXIT_FAILURE; + } + + /* Validate optional arguments */ + int noptargs_given = argc - (nargs + 1); + if(noptargs_given != 0 && noptargs_given != 3 && noptargs_given != 5){ + Printhelp(); + fprintf(stderr,"\nFound: %d optional arguments; must be 0 (no weights), 3 (for one set of weights) or 5 (for two sets)\n ", noptargs_given); + int i; + for(i=nargs+1;i = `%s' \n",argv[i]); + } + return EXIT_FAILURE; + } + + file1=argv[1]; + fileformat1=argv[2]; + file2=argv[3]; + fileformat2=argv[4]; + sbinfile=argv[5]; + + mu_max=1.0; + sscanf(argv[6],"%"REAL_FORMAT,&mu_max) ; + nmu_bins=-10; + sscanf(argv[7],"%d",&nmu_bins) ; + cosmology = atoi(argv[8]); + +#if defined(USE_OMP) && defined(_OPENMP) + nthreads=atoi(argv[9]); + assert(nthreads >= 1 && "Number of threads must be at least 1"); +#endif + + if(noptargs_given >= 3){ + weight_method_str = argv[nargs + 1]; + int wstatus = get_weight_method_by_name(weight_method_str, &weight_method); + if(wstatus != EXIT_SUCCESS){ + fprintf(stderr, "Error: Unknown weight method \"%s\"\n", weight_method_str); + return EXIT_FAILURE; + } + num_weights = get_num_weights_by_method(weight_method); + + weights_file1 = argv[nargs + 2]; + weights_fileformat1 = argv[nargs + 3]; + } + if(noptargs_given >= 5){ + weights_file2 = argv[nargs + 4]; + weights_fileformat2 = argv[nargs + 5]; + } + + int autocorr=0; + if(strcmp(file1,file2)==0) { + autocorr=1; + } + + fprintf(stderr,"Running `%s' with the parameters \n",argv[0]); + fprintf(stderr,"\n\t\t -------------------------------------\n"); + for(int i=1;i = `%s' \n",argv[i]); + } + } + fprintf(stderr,"\t\t -------------------------------------\n"); + + + /*---Read-data1-file----------------------------------*/ + gettimeofday(&t0,NULL); + ND1=read_positions(file1,fileformat1,sizeof(DOUBLE), 3, &phiD1, &thetaD1, &czD1); + gettimeofday(&t1,NULL); + read_time += ADD_DIFF_TIME(t0,t1); + gettimeofday(&t0,NULL); + + /* Read weights file 1 */ + if(weights_file1 != NULL){ + gettimeofday(&t0,NULL); + int64_t wND1 = read_columns_into_array(weights_file1,weights_fileformat1, sizeof(DOUBLE), num_weights, (void **) weights1); + gettimeofday(&t1,NULL); + read_time += ADD_DIFF_TIME(t0,t1); + + if(wND1 != ND1){ + fprintf(stderr, "Error: read %"PRId64" lines from %s, but read %"PRId64" from %s\n", wND1, weights_file1, ND1, file1); + return EXIT_FAILURE; + } + } + + if (autocorr==0) { + /*---Read-data2-file----------------------------------*/ + ND2=read_positions(file2,fileformat2,sizeof(DOUBLE), 3, &phiD2, &thetaD2, &czD2); + gettimeofday(&t1,NULL); + read_time += ADD_DIFF_TIME(t0,t1); + + if(weights_file2 != NULL){ + gettimeofday(&t0,NULL); + int64_t wND2 = read_columns_into_array(weights_file2,weights_fileformat2, sizeof(DOUBLE), num_weights, (void **) weights2); + gettimeofday(&t1,NULL); + read_time += ADD_DIFF_TIME(t0,t1); + + if(wND2 != ND2){ + fprintf(stderr, "Error: read %"PRId64" lines from %s, but read %"PRId64" from %s\n", wND2, weights_file2, ND2, file2); + return EXIT_FAILURE; + } + } + } else { + //None of these are required. But I prefer to preserve the possibility + ND2 = ND1; + thetaD2 = thetaD1; + phiD2 = phiD1; + czD2 = czD1; + for(int w = 0; w < MAX_NUM_WEIGHTS; w++){ + weights2[w] = weights1[w]; + } + } + + + + /*---Count-pairs--------------------------------------*/ + results_countpairs_mocks_s_mu results; + struct config_options options = get_config_options(); + + /* Pack weights into extra options */ + struct extra_options extra = get_extra_options(weight_method); + for(int w = 0; w < num_weights; w++){ + extra.weights0.weights[w] = (void *) weights1[w]; + extra.weights1.weights[w] = (void *) weights2[w]; + } + + int status = countpairs_mocks_s_mu(ND1,phiD1,thetaD1,czD1, + ND2,phiD2,thetaD2,czD2, + nthreads, + autocorr, + sbinfile, + mu_max, + nmu_bins, + cosmology, + &results, + &options, + &extra); + + free(phiD1);free(thetaD1);free(czD1); + for(int w = 0; w < num_weights; w++){ + free(weights1[w]); + } + if(autocorr == 0) { + free(phiD2);free(thetaD2);free(czD2); + for(int w = 0; w < num_weights; w++){ + free(weights2[w]); + } + } + + if(status != EXIT_SUCCESS) { + return status; + } + + const DOUBLE dmu = mu_max/(DOUBLE)results.nmu_bins ; + const int nmubin = results.nmu_bins; + for(int i=1;i Done - ND1=%"PRId64" ND2=%"PRId64". Time taken = %6.2lf seconds, read-in time = %6.2lf seconds \n",ND1,ND2,ADD_DIFF_TIME(t_start,t_end),read_time); + return EXIT_SUCCESS; +} + +/*---Print-help-information---------------------------*/ +void Printhelp(void) +{ + fprintf(stderr,"=========================================================================\n") ; +#if defined(USE_OMP) && defined(_OPENMP) + fprintf(stderr," --- DDsmu file1 format1 file2 format2 sbinfile nmu_bins mu_max cosmology numthreads [weight_method weights_file1 weights_format1 [weights_file2 weights_format2]] > DDfile\n") ; +#else + fprintf(stderr," --- DDsmu file1 format1 file2 format2 sbinfile nmu_bins mu_max cosmology [weight_method weights_file1 weights_format1 [weights_file2 weights_format2]] > DDfile\n") ; +#endif + fprintf(stderr," --- Measure the cross-correlation function xi(rp,pi) for two different\n") ; + fprintf(stderr," data files (or autocorrelation if data1=data2).\n") ; + fprintf(stderr," * data1 = name of first data file\n") ; + fprintf(stderr," * format1 = format of first data file (a=ascii, c=csv, f=fast-food)\n") ; + fprintf(stderr," * data2 = name of second data file\n") ; + fprintf(stderr," * format2 = format of second data file (a=ascii, c=csv, f=fast-food)\n") ; + fprintf(stderr," * sbinfile = name of ascii file containing the r-bins (rmin rmax for each bin)\n") ; + fprintf(stderr," * nmu_bins = number of mu bins\n") ; + fprintf(stderr," * mu_max = maximum mu value (>0 and <= 1.0)\n") ; + fprintf(stderr," * cosmology = flag to pick-up the cosmology combination to use (set as an array of combinations in ../utils/cosmology_params.c)\n") ; +#if defined(USE_OMP) && defined(_OPENMP) + fprintf(stderr," * numthreads = number of threads to use\n"); +#endif + fprintf(stderr," --- OPTIONAL ARGS:\n"); + fprintf(stderr," * weight_method = the type of pair weighting to apply. Options are: 'pair_product', 'none'. Default: 'none'.\n"); + fprintf(stderr," * weights_file1 = name of file containing the weights corresponding to the first data file\n"); + fprintf(stderr," * weights_format1 = format of file containing the weights corresponding to the first data file\n"); + fprintf(stderr," * weights_file2 = name of file containing the weights corresponding to the second data file\n"); + fprintf(stderr," * weights_format2 = format of file containing the weights corresponding to the second data file\n"); + fprintf(stderr," ---OUTPUT:\n") ; + fprintf(stderr," > DDfile = name of output file. Contains \n") ; + + fprintf(stderr,"\n\tCompile options: \n"); + +#ifdef OUTPUT_SAVG + fprintf(stderr,"Output SAVG = True\n"); +#else + fprintf(stderr,"Output SAVG = False\n"); +#endif + +#ifdef DOUBLE_PREC + fprintf(stderr,"Precision = double\n"); +#else + fprintf(stderr,"Precision = float\n"); +#endif + +#if defined(USE_AVX) && defined(__AVX__) + fprintf(stderr,"Use AVX = True\n"); +#else + fprintf(stderr,"Use AVX = False\n"); +#endif + +#if defined(USE_OMP) && defined(_OPENMP) + fprintf(stderr,"Use OMP = True\n"); +#else + fprintf(stderr,"Use OMP = False\n"); +#endif + + fprintf(stderr,"=========================================================================\n") ; +} diff --git a/mocks/DDsmu_mocks/Makefile b/mocks/DDsmu_mocks/Makefile new file mode 100644 index 00000000..22571ea3 --- /dev/null +++ b/mocks/DDsmu_mocks/Makefile @@ -0,0 +1,62 @@ +ROOT_DIR := ../.. +INSTALL_HEADERS_DIR := $(ROOT_DIR)/include +INSTALL_LIB_DIR := $(ROOT_DIR)/lib +INSTALL_BIN_DIR := $(ROOT_DIR)/bin +UTILS_DIR := $(ROOT_DIR)/utils +IO_DIR := $(ROOT_DIR)/io + +include $(ROOT_DIR)/mocks.options $(ROOT_DIR)/common.mk + +LIBNAME := countpairs_s_mu_mocks +LIBRARY := lib$(LIBNAME).a +LIBSRC := countpairs_s_mu_mocks.c countpairs_s_mu_mocks_impl_double.c countpairs_s_mu_mocks_impl_float.c \ + $(UTILS_DIR)/gridlink_mocks_impl_float.c $(UTILS_DIR)/gridlink_mocks_impl_double.c \ + $(UTILS_DIR)/utils.c $(UTILS_DIR)/progressbar.c $(UTILS_DIR)/cpu_features.c \ + $(UTILS_DIR)/set_cosmo_dist.c $(UTILS_DIR)/cosmology_params.c +LIBRARY_HEADERS := $(LIBNAME).h + +TARGET := DDsmu_mocks +TARGETS := $(TARGET) +TARGETSRC:= $(TARGET).c $(IO_DIR)/io.c $(IO_DIR)/ftread.c $(LIBSRC) +INCL := countpairs_s_mu_mocks_kernels_float.c countpairs_s_mu_mocks_kernels_double.c countpairs_s_mu_mocks_kernels.c.src \ + countpairs_s_mu_mocks_impl.c.src countpairs_s_mu_mocks_impl.h.src countpairs_s_mu_mocks_impl_double.h countpairs_s_mu_mocks_impl_float.h \ + countpairs_s_mu_mocks.h \ + $(IO_DIR)/io.h $(IO_DIR)/ftread.h $(IO_DIR)/io.h \ + $(UTILS_DIR)/gridlink_mocks_impl_double.h $(UTILS_DIR)/gridlink_mocks_impl_float.h $(UTILS_DIR)/gridlink_mocks_impl.h.src \ + $(UTILS_DIR)/cellarray_mocks_float.h $(UTILS_DIR)/cellarray_mocks_double.h $(UTILS_DIR)/cellarray_mocks.h.src \ + $(UTILS_DIR)/set_cosmo_dist.h $(UTILS_DIR)/cosmology_params.h $(UTILS_DIR)/progressbar.h $(UTILS_DIR)/cpu_features.h \ + $(UTILS_DIR)/utils.h $(UTILS_DIR)/function_precision.h $(UTILS_DIR)/avx_calls.h $(UTILS_DIR)/defs.h \ + $(UTILS_DIR)/weight_functions_double.h $(UTILS_DIR)/weight_functions_float.h $(UTILS_DIR)/weight_functions.h.src \ + $(UTILS_DIR)/weight_defs_double.h $(UTILS_DIR)/weight_defs_float.h $(UTILS_DIR)/weight_defs.h.src + +TARGETOBJS:=$(TARGETSRC:.c=.o) +LIBOBJS:=$(LIBSRC:.c=.o) + +all: $(TARGETS) $(TARGETSRC) $(ROOT_DIR)/mocks.options $(ROOT_DIR)/common.mk Makefile + +EXTRA_INCL:=$(GSL_CFLAGS) +EXTRA_LINK:=$(GSL_LINK) + +countpairs_s_mu_mocks_impl_double.o:countpairs_s_mu_mocks_impl_double.c countpairs_s_mu_mocks_impl_double.h countpairs_s_mu_mocks_kernels_double.c $(UTILS_DIR)/gridlink_mocks_impl_double.h $(UTILS_DIR)/cellarray_mocks_double.h +countpairs_s_mu_mocks_impl_float.o:countpairs_s_mu_mocks_impl_float.c countpairs_s_mu_mocks_impl_float.h countpairs_s_mu_mocks_kernels_float.c $(UTILS_DIR)/gridlink_mocks_impl_float.h $(UTILS_DIR)/cellarray_mocks_float.h +countpairs_s_mu_mocks.o:countpairs_s_mu_mocks.c countpairs_s_mu_mocks_impl_double.h countpairs_s_mu_mocks_impl_float.h $(INCL) + + +libs: lib +lib: $(LIBRARY) + +install: $(INSTALL_BIN_DIR)/$(TARGET) $(INSTALL_LIB_DIR)/$(LIBRARY) $(INSTALL_HEADERS_DIR)/$(LIBRARY_HEADERS) + +tests: + $(MAKE) -C ../tests DDsmu_mocks + +clean: + $(RM) $(TARGETS) $(TARGETOBJS) $(LIBRARY) countpairs_s_mu_mocks_impl_float.[ch] countpairs_s_mu_mocks_impl_double.[ch] countpairs_s_mu_mocks_kernels_double.c countpairs_s_mu_mocks_kernels_float.c + $(RM) -R *.dSYM + +distclean:clean | $(INSTALL_LIB_DIR) $(INSTALL_HEADERS_DIR) $(INSTALL_BIN_DIR) + cd $(INSTALL_LIB_DIR) && $(RM) $(LIBRARY) + cd $(INSTALL_HEADERS_DIR) && $(RM) $(LIBRARY_HEADERS) + cd $(INSTALL_BIN_DIR) && $(RM) $(TARGETS) + +include $(ROOT_DIR)/rules.mk diff --git a/mocks/DDsmu_mocks/countpairs_s_mu_mocks.c b/mocks/DDsmu_mocks/countpairs_s_mu_mocks.c new file mode 100644 index 00000000..3197cf9f --- /dev/null +++ b/mocks/DDsmu_mocks/countpairs_s_mu_mocks.c @@ -0,0 +1,77 @@ +/* File: countpairs_s_mu_mocks.c */ +/* + This file is a part of the Corrfunc package + Copyright (C) 2015-- Manodeep Sinha (manodeep@gmail.com) + License: MIT LICENSE. See LICENSE file under the top-level + directory at https://github.com/manodeep/Corrfunc/ +*/ + +#include +#include +#include + +#include "countpairs_s_mu_mocks.h" //function proto-type for API +#include "countpairs_s_mu_mocks_impl_double.h"//actual implementations for double +#include "countpairs_s_mu_mocks_impl_float.h"//actual implementations for float + +void free_results_mocks_s_mu(results_countpairs_mocks_s_mu *results) +{ + if(results==NULL) + return; + + free(results->npairs); + free(results->supp); + free(results->savg); + free(results->weightavg); +} + + +int countpairs_mocks_s_mu(const int64_t ND1, void *phi1, void *theta1, void *czD1, + const int64_t ND2, void *phi2, void *theta2, void *czD2, + const int numthreads, + const int autocorr, + const char *sbinfile, + const double mu_max, + const int nmu_bins, + const int cosmology, + results_countpairs_mocks_s_mu *results, + struct config_options *options, + struct extra_options *extra) +{ + if( ! (options->float_type == sizeof(float) || options->float_type == sizeof(double))){ + fprintf(stderr,"ERROR: In %s> Can only handle doubles or floats. Got an array of size = %zu\n", + __FUNCTION__, options->float_type); + return EXIT_FAILURE; + } + + if( strncmp(options->version, STR(VERSION), sizeof(options->version)/sizeof(char)-1) != 0) { + fprintf(stderr,"Error: Do not know this API version = `%s'. Expected version = `%s'\n", options->version, STR(VERSION)); + return EXIT_FAILURE; + } + + if(options->float_type == sizeof(float)) { + return countpairs_mocks_s_mu_float(ND1, (float *) phi1, (float *) theta1, (float *) czD1, + ND2, (float *) phi2, (float *) theta2, (float *) czD2, + numthreads, + autocorr, + sbinfile, + mu_max, + nmu_bins, + cosmology, + results, + options, + extra); + } else { + return countpairs_mocks_s_mu_double(ND1, (double *) phi1, (double *) theta1, (double *) czD1, + ND2, (double *) phi2, (double *) theta2, (double *) czD2, + numthreads, + autocorr, + sbinfile, + mu_max, + nmu_bins, + cosmology, + results, + options, + extra); + } +} diff --git a/mocks/DDsmu_mocks/countpairs_s_mu_mocks.h b/mocks/DDsmu_mocks/countpairs_s_mu_mocks.h new file mode 100644 index 00000000..2dc7d11d --- /dev/null +++ b/mocks/DDsmu_mocks/countpairs_s_mu_mocks.h @@ -0,0 +1,46 @@ +/* File: countpairs_s_mu_mocks.h */ +/* + This file is a part of the Corrfunc package + Copyright (C) 2015-- Manodeep Sinha (manodeep@gmail.com) + License: MIT LICENSE. See LICENSE file under the top-level + directory at https://github.com/manodeep/Corrfunc/ +*/ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include "defs.h" +#include //for uint64_t + + //define the results structure + typedef struct{ + uint64_t *npairs; + double *supp; + double *savg; + double mu_max; + double mu_min;//not used -> assumed to be 0.0 + double *weightavg; + int nsbin; + int nmu_bins; + } results_countpairs_mocks_s_mu; + + int countpairs_mocks_s_mu(const int64_t ND1, void *theta1, void *phi1, void *czD1, + const int64_t ND2, void *theta2, void *phi2, void *czD2, + const int numthreads, + const int autocorr, + const char *sbinfile, + const double mu_max, + const int nmu_bins, + const int cosmology, + results_countpairs_mocks_s_mu *results, + struct config_options *options, + struct extra_options *extra); + + void free_results_mocks_s_mu(results_countpairs_mocks_s_mu *results); + +#ifdef __cplusplus +} +#endif diff --git a/mocks/DDsmu_mocks/countpairs_s_mu_mocks_impl.c.src b/mocks/DDsmu_mocks/countpairs_s_mu_mocks_impl.c.src new file mode 100644 index 00000000..b4fbf26a --- /dev/null +++ b/mocks/DDsmu_mocks/countpairs_s_mu_mocks_impl.c.src @@ -0,0 +1,796 @@ +// # -*- mode: c -*- +/* File: countpairs_s_mu_mocks_impl.c.src */ +/* + This file is a part of the Corrfunc package + Copyright (C) 2015-- Manodeep Sinha (manodeep@gmail.com) + License: MIT LICENSE. See LICENSE file under the top-level + directory at https://github.com/manodeep/Corrfunc/ +*/ + +#include +#include +#include +#include +#include +#include + + +#include "countpairs_s_mu_mocks_impl_DOUBLE.h" +#include "countpairs_s_mu_mocks_kernels_DOUBLE.c" +#include "cellarray_mocks_DOUBLE.h" +#include "gridlink_mocks_impl_DOUBLE.h" + +#include "defs.h" +#include "utils.h" +#include "cosmology_params.h" +#include "set_cosmo_dist.h" +#include "cpu_features.h" +#include "progressbar.h" + +#if defined(_OPENMP) +#include +#endif + +int interrupt_status_DDsmu_mocks_DOUBLE=EXIT_SUCCESS; + +void interrupt_handler_countpairs_s_mu_mocks_DOUBLE(int signo) +{ + fprintf(stderr,"Received signal = `%s' (signo = %d). Aborting \n",strsignal(signo), signo); + interrupt_status_DDsmu_mocks_DOUBLE = EXIT_FAILURE; +} + + +int check_ra_dec_cz_s_mu_DOUBLE(const int64_t N, DOUBLE *phi, DOUBLE *theta, DOUBLE *cz) +{ + + if(N==0) { + return EXIT_SUCCESS; + } + if(phi == NULL || theta == NULL || cz == NULL) { + fprintf(stderr,"Input arrays can not be NULL. Have RA = %p DEC = %p cz = %p\n", phi, theta, cz); + return EXIT_FAILURE; + } + + int fix_cz = 0; + int fix_ra = 0; + int fix_dec = 0; + + const DOUBLE max_cz_threshold = 10.0;//if I find that max cz is smaller than this threshold, then I will assume z has been supplied rather than cz + DOUBLE max_cz = 0.0; + //Check input cz -> ensure that cz contains cz and not z + for(int64_t i=0;i max_cz) max_cz = cz[i]; + if(phi[i] < 0.0) { + fix_ra = 1; + } + if(theta[i] > 90.0) { + fix_dec = 1; + } + if(theta[i] > 180) { + fprintf(stderr,"theta[%"PRId64"] = %"REAL_FORMAT"should be less than 180 deg\n", i, theta[i]); + return EXIT_FAILURE; + } + } + if(max_cz < max_cz_threshold) fix_cz = 1; + + //Only run the loop if something needs to be fixed + if(fix_cz==1 || fix_ra == 1 || fix_dec == 1) { + if(fix_ra == 1) { + fprintf(stderr,"%s> Out of range values found for ra. Expected ra to be in the range [0.0,360.0]. Found ra values in [-180,180] -- fixing that\n", __FUNCTION__); + } + if(fix_dec == 1) { + fprintf(stderr,"%s> Out of range values found for dec. Expected dec to be in the range [-90.0,90.0]. Found dec values in [0,180] -- fixing that\n", __FUNCTION__); + } + if(fix_cz == 1) { + fprintf(stderr,"%s> Out of range values found for cz. Expected input to be `cz' but found `z' instead. max_cz (found in input) = %"REAL_FORMAT" threshold " + "= %"REAL_FORMAT"\n",__FUNCTION__,max_cz,max_cz_threshold); + } + + for(int64_t i=0;i convert to cz + } + } + } + + return EXIT_SUCCESS; +} + + +countpairs_mocks_func_ptr_DOUBLE countpairs_s_mu_mocks_driver_DOUBLE(const struct config_options *options) +{ + + static countpairs_mocks_func_ptr_DOUBLE function = NULL; + static isa old_isa=-1; + if(old_isa == options->instruction_set) { + return function; + } + + /* Array of function pointers */ + countpairs_mocks_func_ptr_DOUBLE allfunctions[] = { +#ifdef __AVX__ + countpairs_s_mu_mocks_avx_intrinsics_DOUBLE, +#endif +#ifdef __SSE4_2__ + countpairs_s_mu_mocks_sse_intrinsics_DOUBLE, +#endif + countpairs_s_mu_mocks_fallback_DOUBLE + }; + + const int num_functions = sizeof(allfunctions)/sizeof(void *); + const int fallback_offset = num_functions - 1; +#if defined(__AVX__) || defined __SSE4_2__ + const int highest_isa = instrset_detect(); +#endif + int curr_offset = 0; + + /* Now check if AVX is supported by the CPU */ + int avx_offset = fallback_offset; +#ifdef __AVX__ + avx_offset = highest_isa >= 7 ? curr_offset:fallback_offset; + curr_offset++; +#endif + + /* Is the SSE function supported at runtime and enabled at compile-time?*/ + int sse_offset = fallback_offset; +#ifdef __SSE4_2__ + sse_offset = highest_isa >= 6 ? curr_offset:fallback_offset; + curr_offset++; +#endif + if( curr_offset != fallback_offset) { + fprintf(stderr,"ERROR: Bug in code (current offset = %d *should equal* fallback function offset = %d)\n", + curr_offset, fallback_offset); + return NULL; + } + + int function_dispatch=0; + /* Check that cpu supports feature */ + if(options->instruction_set >= 0) { + switch(options->instruction_set) { + case(AVX512F): + case(AVX2): + case(AVX):function_dispatch=avx_offset;break; + case(SSE42): function_dispatch=sse_offset;break; + default:function_dispatch=fallback_offset;break; + } + } + + if(function_dispatch >= num_functions) { + fprintf(stderr,"In %s> ERROR: Could not resolve the correct function.\n Function index = %d must lie between [0, %d)\n", + __FUNCTION__, function_dispatch, num_functions); + return NULL; + } + function = allfunctions[function_dispatch]; + old_isa = options->instruction_set; + + if(options->verbose){ + // This must be first (AVX/SSE may be aliased to fallback) + if(function_dispatch == fallback_offset){ + fprintf(stderr,"Using fallback kernel\n"); + } else if(function_dispatch == avx_offset){ + fprintf(stderr,"Using AVX kernel\n"); + } else if(function_dispatch == sse_offset){ + fprintf(stderr,"Using SSE kernel\n"); + } else { + printf("Unknown kernel!\n"); + } + } + + return function; +} + + +int countpairs_mocks_s_mu_DOUBLE(const int64_t ND1, DOUBLE *ra1, DOUBLE *dec1, DOUBLE *czD1, + const int64_t ND2, DOUBLE *ra2, DOUBLE *dec2, DOUBLE *czD2, + const int numthreads, + const int autocorr, + const char *sbinfile, + const double max_mu, + const int nmu_bins, + const int cosmology, + results_countpairs_mocks_s_mu *results, + struct config_options *options, struct extra_options *extra) +{ + + if(options->float_type != sizeof(DOUBLE)) { + fprintf(stderr,"ERROR: In %s> Can only handle arrays of size=%zu. Got an array of size = %zu\n", + __FUNCTION__, sizeof(DOUBLE), options->float_type); + return EXIT_FAILURE; + } + + // If no extra options were passed, create dummy options + // This allows us to pass arguments like "extra->weights0" below; + // they'll just be NULLs, which is the correct behavior + struct extra_options dummy_extra; + if(extra == NULL){ + weight_method_t dummy_method = NONE; + dummy_extra = get_extra_options(dummy_method); + extra = &dummy_extra; + } + + int need_weightavg = extra->weight_method != NONE; + + options->sort_on_z = 1; + struct timeval t0; + if(options->c_api_timer) { + gettimeofday(&t0, NULL); + } + + //Check inputs + if(ND1 == 0 || (autocorr == 0 && ND2 == 0)) { + return EXIT_SUCCESS; + } + + //Check inputs + int status1 = check_ra_dec_cz_s_mu_DOUBLE(ND1, ra1, dec1, czD1); + if(status1 != EXIT_SUCCESS) { + return status1; + } + if(autocorr==0) { + int status2 = check_ra_dec_cz_s_mu_DOUBLE(ND2, ra2, dec2, czD2); + if(status2 != EXIT_SUCCESS) { + return status2; + } + } + +#if defined(_OPENMP) + omp_set_num_threads(numthreads); +#else + (void) numthreads; +#endif + + if(options->max_cells_per_dim == 0) { + fprintf(stderr,"Warning: Max. cells per dimension is set to 0 - resetting to `NLATMAX' = %d\n", NLATMAX); + options->max_cells_per_dim = NLATMAX; + } + for(int i=0;i<3;i++) { + if(options->bin_refine_factors[i] < 1) { + fprintf(stderr,"Warning: bin refine factor along axis = %d *must* be >=1. Instead found bin refine factor =%d\n", + i, options->bin_refine_factors[i]); + reset_bin_refine_factors(options); + break;/* all factors have been reset -> no point continuing with the loop */ + } + } + + /* setup interrupt handler -> mostly useful during the python execution. + Let's Ctrl-C abort the extension */ + SETUP_INTERRUPT_HANDLERS(interrupt_handler_countpairs_s_mu_mocks_DOUBLE); + + //Try to initialize cosmology - code will exit if comoslogy is not implemented. + //Putting in a different scope so I can call the variable status + { + int status = init_cosmology(cosmology); + if(status != EXIT_SUCCESS) { + return status; + } + } + + /*********************** + *initializing the bins + ************************/ + double *supp; + int nsbin; + double smin,smax; + setup_bins(sbinfile,&smin,&smax,&nsbin,&supp); + if( ! (smin > 0.0 && smax > 0.0 && smin < smax && nsbin > 0)) { + fprintf(stderr,"Error: Could not setup with S bins correctly. (smin = %lf, smax = %lf, with nbins = %d). Expected non-zero smin/smax with smax > smin and nbins >=1 \n", + smin, smax, nsbin); + return EXIT_FAILURE; + } + + + if(max_mu <= 0.0 || max_mu > 1.0) { + fprintf(stderr,"Error: max_mu (max. value for the cosine of the angle with line of sight) must be greater than 0 and at most 1).\n" + "The passed value is max_mu = %lf. Please change it to be > 0 and <= 1.0\n", max_mu); + return EXIT_FAILURE; + } + + if(nmu_bins < 1 ) { + fprintf(stderr,"Error: Number of mu bins = %d must be at least 1\n", nmu_bins); + return EXIT_FAILURE; + } + + //Change cz into co-moving distance + DOUBLE *D1 = NULL, *D2 = NULL; + if(options->is_comoving_dist == 0) { + D1 = my_malloc(sizeof(*D1),ND1); + D2 = autocorr == 0 ? my_malloc(sizeof(*D2),ND2):D1; + } else { + D1 = czD1; + D2 = autocorr == 0 ? czD2:czD1; + } + + if(D1 == NULL || D2 == NULL) { + free(D1);free(D2); + return EXIT_FAILURE; + } + + + if(options->is_comoving_dist == 0) { + //Setup variables to do the cz->comoving distance + DOUBLE czmax = 0.0; + const DOUBLE inv_speed_of_light = 1.0/SPEED_OF_LIGHT; + get_max_DOUBLE(ND1, czD1, &czmax); + if(autocorr == 0) { + get_max_DOUBLE(ND2, czD2, &czmax); + } + const double zmax = czmax * inv_speed_of_light + 0.01; + + const int workspace_size = 10000; + double *interp_redshift = my_calloc(sizeof(*interp_redshift), workspace_size);//the interpolation is done in 'z' and not in 'cz' + double *interp_comoving_dist = my_calloc(sizeof(*interp_comoving_dist),workspace_size); + int Nzdc = set_cosmo_dist(zmax, workspace_size, interp_redshift, interp_comoving_dist, cosmology); + if(Nzdc < 0) { + free(interp_redshift);free(interp_comoving_dist); + return EXIT_FAILURE; + } + + gsl_interp *interpolation; + gsl_interp_accel *accelerator; + accelerator = gsl_interp_accel_alloc(); + interpolation = gsl_interp_alloc (gsl_interp_linear,Nzdc); + gsl_interp_init(interpolation, interp_redshift, interp_comoving_dist, Nzdc); + for(int64_t i=0;ibin_refine_factors[0] = 1; + } + if(smax < 0.05*ydiff) { + options->bin_refine_factors[1] = 1; + } + if(smax < 0.05*zdiff) { + options->bin_refine_factors[2] = 1; + } + } + + /*---Create 3-D lattice--------------------------------------*/ + int nmesh_x=0,nmesh_y=0,nmesh_z=0; + cellarray_mocks_index_particles_DOUBLE *lattice1 = gridlink_mocks_index_particles_DOUBLE(ND1, X1, Y1, Z1, D1, &(extra->weights0), + xmin, xmax, ymin, ymax, zmin, zmax, + smax, smax, smax, + options->bin_refine_factors[0], + options->bin_refine_factors[1], + options->bin_refine_factors[2], + &nmesh_x, &nmesh_y, &nmesh_z, + options); + if(lattice1 == NULL) { + return EXIT_FAILURE; + } + + /* If there too few cells (BOOST_CELL_THRESH is ~10), and the number of cells can be increased, then boost bin refine factor by ~1*/ + const double avg_np = ((double)ND1)/(nmesh_x*nmesh_y*nmesh_z); + const int8_t max_nmesh = fmax(nmesh_x, fmax(nmesh_y, nmesh_z)); + if((max_nmesh <= BOOST_CELL_THRESH || avg_np >= BOOST_NUMPART_THRESH) + && max_nmesh < options->max_cells_per_dim) { + fprintf(stderr,"%s> gridlink seems inefficient. nmesh = (%d, %d, %d); avg_np = %.3g. ", __FUNCTION__, nmesh_x, nmesh_y, nmesh_z, avg_np); + if(get_bin_refine_scheme(options) == BINNING_DFL) { + fprintf(stderr,"Boosting bin refine factor - should lead to better performance\n"); + // Only boost the first two dimensions. Prevents excessive refinement. + for(int i=0;i<2;i++) { + options->bin_refine_factors[i] += BOOST_BIN_REF; + } + + free_cellarray_mocks_index_particles_DOUBLE(lattice1, nmesh_x * (int64_t) nmesh_y * nmesh_z); + lattice1 = gridlink_mocks_index_particles_DOUBLE(ND1, X1, Y1, Z1, D1, &(extra->weights0), + xmin, xmax, ymin, ymax, zmin, zmax, + smax, smax, smax, + options->bin_refine_factors[0], + options->bin_refine_factors[1], + options->bin_refine_factors[2], + &nmesh_x, &nmesh_y, &nmesh_z, + options); + if(lattice1 == NULL) { + return EXIT_FAILURE; + } + } else { + fprintf(stderr,"Boosting bin refine factor could have helped. However, since custom bin refine factors " + "= (%d, %d, %d) are being used - continuing with inefficient mesh\n", options->bin_refine_factors[0], + options->bin_refine_factors[1], options->bin_refine_factors[2]); + + } + } + + cellarray_mocks_index_particles_DOUBLE *lattice2 = NULL; + if(autocorr==0) { + int ngrid2_x=0,ngrid2_y=0,ngrid2_z=0; + lattice2 = gridlink_mocks_index_particles_DOUBLE(ND2, X2, Y2, Z2, D2, &(extra->weights1), + xmin, xmax, + ymin, ymax, + zmin, zmax, + smax, smax, smax, + options->bin_refine_factors[0], + options->bin_refine_factors[1], + options->bin_refine_factors[2], + &ngrid2_x, &ngrid2_y, &ngrid2_z, options); + if(lattice2 == NULL) { + return EXIT_FAILURE; + } + if( ! (nmesh_x == ngrid2_x && nmesh_y == ngrid2_y && nmesh_z == ngrid2_z) ) { + fprintf(stderr,"Error: The two sets of 3-D lattices do not have identical bins. First has dims (%d, %d, %d) while second has (%d, %d, %d)\n", + nmesh_x, nmesh_y, nmesh_z, ngrid2_x, ngrid2_y, ngrid2_z); + return EXIT_FAILURE; + } + } else { + lattice2 = lattice1; + } + free(X1);free(Y1);free(Z1); + if(autocorr == 0) { + free(X2);free(Y2);free(Z2); + } + + if(options->is_comoving_dist == 0) { + free(D1); + if(autocorr == 0) { + free(D2); + } + } + + + + const int64_t totncells = (int64_t) nmesh_x * (int64_t) nmesh_y * (int64_t) nmesh_z; + { + int status = assign_ngb_cells_mocks_index_particles_DOUBLE(lattice1, lattice2, totncells, + options->bin_refine_factors[0], options->bin_refine_factors[1], options->bin_refine_factors[2], + nmesh_x, nmesh_y, nmesh_z, + autocorr); + if(status != EXIT_SUCCESS) { + free_cellarray_mocks_index_particles_DOUBLE(lattice1, totncells); + if(autocorr == 0) { + free_cellarray_mocks_index_particles_DOUBLE(lattice2, totncells); + } + free(supp); + return EXIT_FAILURE; + } + } + /*---Gridlink-variables----------------*/ + const int totnbins = (nmu_bins+1)*(nsbin+1); +#if defined(_OPENMP) + uint64_t **all_npairs = (uint64_t **) matrix_calloc(sizeof(uint64_t), numthreads, totnbins); + DOUBLE **all_savg = NULL; + if(options->need_avg_sep){ + all_savg = (DOUBLE **) matrix_calloc(sizeof(DOUBLE),numthreads,totnbins); + } + DOUBLE **all_weightavg = NULL; + if(need_weightavg) { + all_weightavg = (DOUBLE **) matrix_calloc(sizeof(DOUBLE),numthreads,totnbins); + } + +#else //USE_OMP + uint64_t npairs[totnbins]; + DOUBLE savg[totnbins], weightavg[totnbins]; + + for(int i=0; i need_avg_sep) { + savg[i] = ZERO; + } + if(need_weightavg) { + weightavg[i] = ZERO; + } + } +#endif //USE_OMP + + /* runtime dispatch - get the function pointer */ + countpairs_mocks_func_ptr_DOUBLE countpairs_s_mu_mocks_function_DOUBLE = countpairs_s_mu_mocks_driver_DOUBLE(options); + if(countpairs_s_mu_mocks_function_DOUBLE == NULL) { + return EXIT_FAILURE; + } + + int interrupted=0,numdone=0, abort_status=EXIT_SUCCESS; + if(options->verbose) { + init_my_progressbar(totncells,&interrupted); + } + + +#if defined(_OPENMP) +#pragma omp parallel shared(numdone, abort_status, interrupt_status_DDsmu_mocks_DOUBLE) + { + const int tid = omp_get_thread_num(); + uint64_t npairs[totnbins]; + DOUBLE savg[totnbins], weightavg[totnbins]; + for(int i=0;ineed_avg_sep) { + savg[i] = ZERO; + } + if(need_weightavg) { + weightavg[i] = ZERO; + } + } + +#pragma omp for schedule(dynamic) +#endif//USE_OMP + + /*---Loop-over-Data1-particles--------------------*/ + for(int64_t index1=0;index1verbose) { +#if defined(_OPENMP) + if (omp_get_thread_num() == 0) +#endif + my_progressbar(numdone,&interrupted); + + +#if defined(_OPENMP) +#pragma omp atomic +#endif + numdone++; + } + + const cellarray_mocks_index_particles_DOUBLE *first = &(lattice1[index1]); + if(first->nelements == 0) { + continue; + } + DOUBLE *x1 = first->x; + DOUBLE *y1 = first->y; + DOUBLE *z1 = first->z; + DOUBLE *d1 = first->cz; + const weight_struct_DOUBLE *weights1 = &(first->weights); + const int64_t N1 = first->nelements; + + if(autocorr == 1) { + int same_cell = 1; + DOUBLE *this_savg = options->need_avg_sep ? &(savg[0]):NULL; + DOUBLE *this_weightavg = need_weightavg ? weightavg:NULL; + const int status = countpairs_s_mu_mocks_function_DOUBLE(N1, x1, y1, z1, d1, weights1, + N1, x1, y1, z1, d1, weights1, + same_cell, + options->fast_divide, + smax, smin, nsbin, + nmu_bins, supp_sqr, mu_max, + this_savg, npairs, + this_weightavg, extra->weight_method); + /* This actually causes a race condition under OpenMP - but mostly + I care that an error occurred - rather than the exact value of + the error status */ + abort_status |= status; + } + + for(int64_t ngb=0;ngbnum_ngb;ngb++){ + const cellarray_mocks_index_particles_DOUBLE *second = first->ngb_cells[ngb]; + if(second->nelements == 0) { + continue; + } + const int same_cell = 0; + DOUBLE *x2 = second->x; + DOUBLE *y2 = second->y; + DOUBLE *z2 = second->z; + DOUBLE *d2 = second->cz; + const weight_struct_DOUBLE *weights2 = &(second->weights); + const int64_t N2 = second->nelements; + DOUBLE *this_savg = options->need_avg_sep ? &(savg[0]):NULL; + DOUBLE *this_weightavg = need_weightavg ? weightavg:NULL; + const int status = countpairs_s_mu_mocks_function_DOUBLE(N1, x1, y1, z1, d1, weights1, + N2, x2, y2, z2, d2, weights2, + same_cell, + options->fast_divide, + smax, smin, nsbin, + nmu_bins, supp_sqr, mu_max, + this_savg, npairs, + this_weightavg, extra->weight_method); + /* This actually causes a race condition under OpenMP - but mostly + I care that an error occurred - rather than the exact value of + the error status */ + abort_status |= status; + }//loop over ngb cells + }//abort_status check + }//i loop over ND1 particles +#if defined(_OPENMP) + for(int i=0;ineed_avg_sep) { + all_savg[tid][i] = savg[i]; + } + if(need_weightavg) { + all_weightavg[tid][i] = weightavg[i]; + } + } + }//close the omp parallel region +#endif//USE_OMP + + free_cellarray_mocks_index_particles_DOUBLE(lattice1,totncells); + if(autocorr == 0) { + free_cellarray_mocks_index_particles_DOUBLE(lattice2,totncells); + } + + if(abort_status != EXIT_SUCCESS || interrupt_status_DDsmu_mocks_DOUBLE != EXIT_SUCCESS) { + /* Cleanup memory here if aborting */ + free(supp); +#if defined(_OPENMP) + matrix_free((void **) all_npairs, numthreads); + if(options->need_avg_sep) { + matrix_free((void **) all_savg, numthreads); + } + if(need_weightavg) { + matrix_free((void **) all_weightavg, numthreads); + } +#endif + return EXIT_FAILURE; + } + + if(options->verbose) { + finish_myprogressbar(&interrupted); + } + + + +#if defined(_OPENMP) + uint64_t npairs[totnbins]; + DOUBLE savg[totnbins], weightavg[totnbins]; + for(int i=0;ineed_avg_sep) { + savg[i] = ZERO; + } + if(need_weightavg) { + weightavg[i] = ZERO; + } + } + + for(int i=0;ineed_avg_sep) { + savg[j] += all_savg[i][j]; + } + if(need_weightavg) { + weightavg[j] += all_weightavg[i][j]; + } + } + } + matrix_free((void **) all_npairs, numthreads); + if(options->need_avg_sep) { + matrix_free((void **) all_savg, numthreads); + } + if(need_weightavg) { + matrix_free((void **) all_weightavg, numthreads); + } +#endif //USE_OMP + + //The code does not double count for autocorrelations + //which means the npairs and savg values need to be doubled; + if(autocorr == 1) { + const uint64_t int_fac = 2; + const DOUBLE dbl_fac = (DOUBLE) 2.0; + for(int i=0;ineed_avg_sep) { + savg[i] *= dbl_fac; + } + if(need_weightavg) { + weightavg[i] *= dbl_fac; + } + } + } + + for(int i=0;i 0) { + if(options->need_avg_sep) { + savg[i] /= (DOUBLE) npairs[i] ; + } + if(need_weightavg) { + weightavg[i] /= (DOUBLE) npairs[i]; + } + } + } + + results->nsbin = nsbin; + results->nmu_bins = nmu_bins; + results->mu_max = max_mu;//NOTE max_mu which is double and not mu_max (which might be float) + results->mu_min = ZERO; + results->npairs = my_malloc(sizeof(*(results->npairs)), totnbins); + results->supp = my_malloc(sizeof(*(results->supp)) , nsbin); + results->savg = my_malloc(sizeof(*(results->savg)) , totnbins); + results->weightavg = my_calloc(sizeof(double) , totnbins); + if(results->npairs == NULL || results->supp == NULL || results->savg == NULL || results->weightavg == NULL) { + free_results_mocks_s_mu(results); + free(supp); + return EXIT_FAILURE; + } + + for(int i=0;isupp[i] = supp[i]; + for(int j=0;j= totnbins ) { + fprintf(stderr, "ERROR: In %s> index = %d must be in range [0, %d)\n", __FUNCTION__, index, totnbins); + free_results_mocks_s_mu(results); + free(supp); + return EXIT_FAILURE; + } + results->npairs[index] = npairs[index]; + results->savg[index] = ZERO; + results->weightavg[index] = ZERO; + if(options->need_avg_sep) { + results->savg[index] = savg[index]; + } + if(need_weightavg) { + results->weightavg[index] = weightavg[index]; + } + } + } + free(supp); + + /* reset interrupt handlers to default */ + RESET_INTERRUPT_HANDLERS(); + reset_bin_refine_factors(options); + + if(options->c_api_timer) { + struct timeval t1; + gettimeofday(&t1, NULL); + options->c_api_time = ADD_DIFF_TIME(t0, t1); + } + + return EXIT_SUCCESS; +} diff --git a/mocks/DDsmu_mocks/countpairs_s_mu_mocks_impl.h.src b/mocks/DDsmu_mocks/countpairs_s_mu_mocks_impl.h.src new file mode 100644 index 00000000..acf0b6bb --- /dev/null +++ b/mocks/DDsmu_mocks/countpairs_s_mu_mocks_impl.h.src @@ -0,0 +1,51 @@ +// # -*- mode: c -*- +/* File: countpairs_s_mu_mocks_impl.h.src */ +/* + This file is a part of the Corrfunc package + Copyright (C) 2015-- Manodeep Sinha (manodeep@gmail.com) + License: MIT LICENSE. See LICENSE file under the top-level + directory at https://github.com/manodeep/Corrfunc/ +*/ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include "defs.h" //for struct config_options +#include "weight_defs_DOUBLE.h" +#include //for uint64_t + +#include "countpairs_s_mu_mocks.h" //for definition of results_countpairs_mocks + + extern void interrupt_handler_countpairs_s_mu_mocks_DOUBLE(int signo); + + typedef int (*countpairs_mocks_func_ptr_DOUBLE)(const int64_t N0, DOUBLE *x0, DOUBLE *y0, DOUBLE *z0, DOUBLE *d0, const weight_struct_DOUBLE *weights0, + const int64_t N1, DOUBLE *x1, DOUBLE *y1, DOUBLE *z1, DOUBLE *d1, const weight_struct_DOUBLE *weights1, + const int same_cell, + const int fast_divide, + const DOUBLE smax, const DOUBLE smin, const int nsbin, + const int nmu_bins, const DOUBLE *supp_sqr, + const DOUBLE mu_max, + DOUBLE *src_savg, uint64_t *src_npairs, + DOUBLE *src_weightavg, const weight_method_t weight_method); + + extern countpairs_mocks_func_ptr_DOUBLE countpairs_s_mu_mocks_driver_DOUBLE(const struct config_options *options) __attribute__((warn_unused_result)); + + extern int countpairs_mocks_s_mu_DOUBLE(const int64_t ND1, DOUBLE *theta1, DOUBLE *phi1, DOUBLE *czD1, + const int64_t ND2, DOUBLE *theta2, DOUBLE *phi2, DOUBLE *czD2, + const int numthreads, + const int autocorr, + const char *sbinfile, + const double mu_max, + const int nmu_bins, + const int cosmology, + results_countpairs_mocks_s_mu *results, + struct config_options *options, struct extra_options *extra); + + extern int check_ra_dec_cz_s_mu_DOUBLE(const int64_t N, DOUBLE *phi, DOUBLE *theta, DOUBLE *cz); + +#ifdef __cplusplus +} +#endif diff --git a/mocks/DDsmu_mocks/countpairs_s_mu_mocks_kernels.c.src b/mocks/DDsmu_mocks/countpairs_s_mu_mocks_kernels.c.src new file mode 100644 index 00000000..077a76e9 --- /dev/null +++ b/mocks/DDsmu_mocks/countpairs_s_mu_mocks_kernels.c.src @@ -0,0 +1,884 @@ +// # -*- mode: c -*- +/* File: countpairs_s_mu_mocks_kernels.c */ +/* + This file is a part of the Corrfunc package + Copyright (C) 2015-- Manodeep Sinha (manodeep@gmail.com) + License: MIT LICENSE. See LICENSE file under the top-level + directory at https://github.com/manodeep/Corrfunc/ +*/ + + +#include +#include +#include +#include +#include + +#include "defs.h" +#include "function_precision.h" +#include "utils.h" + +#include "weight_functions_DOUBLE.h" + +#if defined(__AVX__) +#include "avx_calls.h" + +static inline int countpairs_s_mu_mocks_avx_intrinsics_DOUBLE(const int64_t N0, DOUBLE *x0, DOUBLE *y0, DOUBLE *z0, DOUBLE *d0, const weight_struct_DOUBLE *weights0, + const int64_t N1, DOUBLE *x1, DOUBLE *y1, DOUBLE *z1, DOUBLE *d1, const weight_struct_DOUBLE *weights1, + const int same_cell, + const int fast_divide, + const DOUBLE smax, const DOUBLE smin, const int nsbin,const int nmu_bins, + const DOUBLE *supp_sqr, const DOUBLE mu_max, + DOUBLE *src_savg, + uint64_t *src_npairs, DOUBLE *src_weightavg, const weight_method_t weight_method) +{ + if(N0 == 0 || N1 == 0) { + return EXIT_SUCCESS; + } + + if(src_npairs == NULL) { + return EXIT_FAILURE; + } + + const int32_t need_savg = src_savg != NULL; + const int32_t need_weightavg = src_weightavg != NULL; + + const int64_t totnbins = (nmu_bins+1)*(nsbin+1); + const DOUBLE sqr_mumax = mu_max*mu_max; + const DOUBLE sqr_smax = smax*smax; + const DOUBLE sqr_smin = smin*smin; + + AVX_FLOATS m_supp_sqr[nsbin]; + AVX_FLOATS m_kbin[nsbin]; + for(int i=0;i -smax) break; + d1++; n_off++; + } + if(prev_j == N1) { + break; + } + j = prev_j; + } + DOUBLE *locald1 = d1; + DOUBLE *localx1 = x1 + n_off; + DOUBLE *localy1 = y1 + n_off; + DOUBLE *localz1 = z1 + n_off; + for(int w = 0; w < local_w1.num_weights; w++){ + local_w1.weights[w] = weights1->weights[w] + n_off; + } + + AVX_FLOATS m_xpos = AVX_SET_FLOAT(xpos); + AVX_FLOATS m_ypos = AVX_SET_FLOAT(ypos); + AVX_FLOATS m_zpos = AVX_SET_FLOAT(zpos); + AVX_FLOATS m_dpos = AVX_SET_FLOAT(dpos); + union int8 { + AVX_INTS m_ibin; + int ibin[AVX_NVEC]; + }; + + union float8{ + AVX_FLOATS m_sep; + DOUBLE sep[AVX_NVEC]; + }; + + const AVX_FLOATS m_sqr_smax = AVX_SET_FLOAT(sqr_smax); + const AVX_FLOATS m_sqr_smin = AVX_SET_FLOAT(sqr_smin); + const AVX_FLOATS m_sqr_mumax = AVX_SET_FLOAT(sqr_mumax); + const AVX_FLOATS m_inv_dmu = AVX_SET_FLOAT(inv_dmu); + const AVX_FLOATS m_nmu_bins = AVX_SET_FLOAT((DOUBLE) nmu_bins); + const AVX_FLOATS m_zero = AVX_SET_FLOAT(ZERO); + const AVX_FLOATS m_one = AVX_SET_FLOAT((DOUBLE) 1); + + for(;j<=(N1-AVX_NVEC);j+=AVX_NVEC){ + const AVX_FLOATS m_x2 = AVX_LOAD_FLOATS_UNALIGNED(localx1); + const AVX_FLOATS m_y2 = AVX_LOAD_FLOATS_UNALIGNED(localy1); + const AVX_FLOATS m_z2 = AVX_LOAD_FLOATS_UNALIGNED(localz1); + const AVX_FLOATS m_d2 = AVX_LOAD_FLOATS_UNALIGNED(locald1); + + localx1 += AVX_NVEC; + localy1 += AVX_NVEC; + localz1 += AVX_NVEC; + locald1 += AVX_NVEC; + + for(int w = 0; w < pair.num_weights; w++){ + pair.weights1[w].a = AVX_LOAD_FLOATS_UNALIGNED(local_w1.weights[w]); + local_w1.weights[w] += AVX_NVEC; + } + + union float8_weights{ + AVX_FLOATS m_weights; + DOUBLE weights[NVEC]; + }; + union float8_weights union_mweight; + + const AVX_FLOATS m_perpx = AVX_SUBTRACT_FLOATS(m_xpos, m_x2); + const AVX_FLOATS m_perpy = AVX_SUBTRACT_FLOATS(m_ypos, m_y2); + const AVX_FLOATS m_perpz = AVX_SUBTRACT_FLOATS(m_zpos, m_z2); + + const AVX_FLOATS m_parx = AVX_ADD_FLOATS(m_x2, m_xpos); + const AVX_FLOATS m_pary = AVX_ADD_FLOATS(m_y2, m_ypos); + const AVX_FLOATS m_parz = AVX_ADD_FLOATS(m_z2, m_zpos); + + AVX_FLOATS m_sqr_mu, m_sqr_s; + { + /* + //Technically l := 1/2 (v1 + v2) but the factor of 1/2 occurs both in numerator and denominator + and cancels out. + + s \dot l := (parx*perpx + pary*perpy + parz*perp) + := (x1 + x2)*(x1 - x2) + (y1 + y2)*(y1 - y2) + (z1 + z2)*(z1 - z2) + := (x1^2 + y1^2 + z1^2) - (x2^2 + y2^2 + z2^2) + := d1^2 - d2^2 + */ + const AVX_FLOATS m_s_dot_l = AVX_SUBTRACT_FLOATS(AVX_SQUARE_FLOAT(m_d2), AVX_SQUARE_FLOAT(m_dpos)); + const AVX_FLOATS m_sqr_s_dot_l = AVX_SQUARE_FLOAT(m_s_dot_l);// numerator := |s.l|^2 + const AVX_FLOATS m_sqr_perpx = AVX_SQUARE_FLOAT(m_perpx); + const AVX_FLOATS m_sqr_perpy = AVX_SQUARE_FLOAT(m_perpy); + const AVX_FLOATS m_sqr_perpz = AVX_SQUARE_FLOAT(m_perpz); + m_sqr_s = AVX_ADD_FLOATS(m_sqr_perpx, AVX_ADD_FLOATS(m_sqr_perpy, m_sqr_perpz));//3-d separation + + //Create a mask where s^2 < smax^2 + const AVX_FLOATS m_mask_3d_sep = AVX_COMPARE_FLOATS(m_sqr_s, m_sqr_smax, _CMP_LT_OQ); + if(AVX_TEST_COMPARISON(m_mask_3d_sep) == 0) { + continue; + } + const AVX_FLOATS m_sqr_norm_l = AVX_ADD_FLOATS(AVX_SQUARE_FLOAT(m_parx), + AVX_ADD_FLOATS(AVX_SQUARE_FLOAT(m_pary), + AVX_SQUARE_FLOAT(m_parz))); + + // \mu^2 := cos^2(\theta_between_s_and_l) = |s.l|^2 / (|s|^2 * |l|^2) + const AVX_FLOATS m_sqr_norm_l_norm_s = AVX_MULTIPLY_FLOATS(m_sqr_norm_l, m_sqr_s); + if (fast_divide == 0) { + m_sqr_mu = AVX_DIVIDE_FLOATS(m_sqr_s_dot_l, m_sqr_norm_l_norm_s); + //The divide is the actual operation we need + // but divides are about 10x slower than multiplies. So, I am replacing it + //with a approximate reciprocal in floating point + // + 2 iterations of newton-raphson in case of DOUBLE + } else { + //following blocks do an approximate reciprocal followed by two iterations of Newton-Raphson + +#ifndef DOUBLE_PREC + //Taken from Intel's site: https://software.intel.com/en-us/articles/wiener-filtering-using-intel-advanced-vector-extensions + // (which has bugs in it, just FYI). Plus, https://techblog.lankes.org/2014/06/16/avx-isnt-always-faster-then-see/ + __m256 rc = _mm256_rcp_ps(m_sqr_norm_l_norm_s); +#else + //we have to do this for doubles now. + //if the vrcpps instruction is not generated, there will + //be a ~70 cycle performance hit from switching between + //AVX and SSE modes. + __m128 float_tmp1 = _mm256_cvtpd_ps(m_sqr_norm_l_norm_s); + __m128 float_inv_tmp1 = _mm_rcp_ps(float_tmp1); + AVX_FLOATS rc = _mm256_cvtps_pd(float_inv_tmp1); +#endif//DOUBLE_PREC + + //We have the double->float->approx. reciprocal->double process done. + //Now improve the accuracy of the divide with newton-raphson. + + //Ist iteration of NewtonRaphson + AVX_FLOATS two = AVX_SET_FLOAT((DOUBLE) 2.0); + AVX_FLOATS rc1 = AVX_MULTIPLY_FLOATS(rc, + AVX_SUBTRACT_FLOATS(two, + AVX_MULTIPLY_FLOATS(m_sqr_norm_l_norm_s,rc))); + //2nd iteration of NewtonRaphson + AVX_FLOATS rc2 = AVX_MULTIPLY_FLOATS(rc1, + AVX_SUBTRACT_FLOATS(two, + AVX_MULTIPLY_FLOATS(m_sqr_norm_l_norm_s,rc1))); + m_sqr_mu = AVX_MULTIPLY_FLOATS(m_sqr_s_dot_l,rc2); + } //end of FAST_DIVIDE + } + + const AVX_FLOATS m_mu = AVX_SQRT_FLOAT(m_sqr_mu); + + AVX_FLOATS m_mask_left; + //Do the mask filters in a separate scope + { + const AVX_FLOATS m_mask_mumax = AVX_COMPARE_FLOATS(m_sqr_mu,m_sqr_mumax,_CMP_LT_OQ); + const AVX_FLOATS m_smax_mask = AVX_COMPARE_FLOATS(m_sqr_s, m_sqr_smax, _CMP_LT_OQ); + const AVX_FLOATS m_smin_mask = AVX_COMPARE_FLOATS(m_sqr_s, m_sqr_smin, _CMP_GE_OQ); + const AVX_FLOATS m_s_mask = AVX_BITWISE_AND(m_smax_mask, m_smin_mask); + + m_mask_left = AVX_BITWISE_AND(m_mask_mumax, m_s_mask); + if(AVX_TEST_COMPARISON(m_mask_left)==0) { + continue; + } + m_sqr_s = AVX_BLEND_FLOATS_WITH_MASK(m_zero,m_sqr_s,m_mask_left); + m_sqr_mu = AVX_BLEND_FLOATS_WITH_MASK(m_sqr_mumax,m_sqr_mu,m_mask_left); + } + + union float8 union_msep; + if(need_savg) { + union_msep.m_sep = AVX_SQRT_FLOAT(m_sqr_s); + } + if(need_weightavg){ + pair.dx.a = m_perpx; + pair.dy.a = m_perpy; + pair.dz.a = m_perpz; + + pair.parx.a = m_parx; + pair.pary.a = m_pary; + pair.parz.a = m_parz; + + union_mweight.m_weights = avx_weight_func(&pair); + } + + const AVX_FLOATS m_mask = m_mask_left; + AVX_FLOATS m_sbin = AVX_SET_FLOAT((DOUBLE) 0); + for(int kbin=nsbin-1;kbin>=1;kbin--) { + const AVX_FLOATS m_mask_low = AVX_COMPARE_FLOATS(m_sqr_s,m_supp_sqr[kbin-1],_CMP_GE_OQ); + const AVX_FLOATS m_bin_mask = AVX_BITWISE_AND(m_mask_low,m_mask_left); + m_sbin = AVX_BLEND_FLOATS_WITH_MASK(m_sbin,m_kbin[kbin], m_bin_mask); + m_mask_left = AVX_COMPARE_FLOATS(m_sqr_s, m_supp_sqr[kbin-1],_CMP_LT_OQ); + if(AVX_TEST_COMPARISON(m_mask_left) == 0) { + break; + } + } + + /* Compute the 1-D index to the [sbin, mubin] := sbin*(nmu_bins+1) + mubin */ + const AVX_FLOATS m_tmp2 = AVX_MULTIPLY_FLOATS(m_mu,m_inv_dmu); + const AVX_FLOATS m_mubin = AVX_BLEND_FLOATS_WITH_MASK(m_nmu_bins, m_tmp2, m_mask); + const AVX_FLOATS m_nmu_bins_p1 = AVX_ADD_FLOATS(m_nmu_bins,m_one); + const AVX_FLOATS m_binproduct = AVX_ADD_FLOATS(AVX_MULTIPLY_FLOATS(m_sbin,m_nmu_bins_p1),m_mubin); + union int8 union_finalbin; + union_finalbin.m_ibin = AVX_TRUNCATE_FLOAT_TO_INT(m_binproduct); + +#if __INTEL_COMPILER +#pragma unroll(AVX_NVEC) +#endif + for(int jj=0;jj= sqr_smax || sqr_s < sqr_smin) continue; + + const DOUBLE norm_l = (parx*parx + pary*pary + parz*parz);// := |l|^2 + const DOUBLE sqr_s_dot_l = s_dot_l * s_dot_l; + const DOUBLE sqr_mu = sqr_s_dot_l/(norm_l * sqr_s); + const int mubin = (sqr_mu >= sqr_mumax) ? nmu_bins:(int) (SQRT(sqr_mu)*inv_dmu); + DOUBLE s, pairweight; + if(need_savg) { + s = SQRT(sqr_s); + } + if(need_weightavg){ + pair.dx.d = perpx; + pair.dy.d = perpy; + pair.dz.d = perpz; + + pair.parx.d = parx; + pair.pary.d = pary; + pair.parz.d = parz; + + pairweight = fallback_weight_func(&pair); + } + + for(int kbin=nsbin-1;kbin>=1;kbin--) { + if(sqr_s >= supp_sqr[kbin-1]) { + const int ibin = kbin*(nmu_bins+1) + mubin; + npairs[ibin]++; + if(need_savg) { + savg[ibin] += s; + } + if(need_weightavg){ + weightavg[ibin] += pairweight; + } + break; + } + } + }//remainder jloop + }//i-loop + + for(int i=0;i -smax) break; + d1++; n_off++; + } + if(prev_j == N1) { + break; + } + j = prev_j; + } + DOUBLE *locald1 = d1; + DOUBLE *localx1 = x1 + n_off; + DOUBLE *localy1 = y1 + n_off; + DOUBLE *localz1 = z1 + n_off; + for(int w = 0; w < local_w1.num_weights; w++){ + local_w1.weights[w] = weights1->weights[w] + n_off; + } + + const SSE_FLOATS m_xpos = SSE_SET_FLOAT(xpos); + const SSE_FLOATS m_ypos = SSE_SET_FLOAT(ypos); + const SSE_FLOATS m_zpos = SSE_SET_FLOAT(zpos); + const SSE_FLOATS m_dpos = SSE_SET_FLOAT(dpos); + + union int8 { + SSE_INTS m_ibin; + int ibin[SSE_NVEC]; + }; + + + union float8{ + SSE_FLOATS m_sep; + DOUBLE sep[SSE_NVEC]; + }; + + const SSE_FLOATS m_sqr_smax = SSE_SET_FLOAT(sqr_smax); + const SSE_FLOATS m_sqr_smin = SSE_SET_FLOAT(sqr_smin); + const SSE_FLOATS m_sqr_mumax = SSE_SET_FLOAT(sqr_mumax); + const SSE_FLOATS m_inv_dmu = SSE_SET_FLOAT(inv_dmu); + const SSE_FLOATS m_nmu_bins = SSE_SET_FLOAT((DOUBLE) nmu_bins); + const SSE_FLOATS m_zero = SSE_SET_FLOAT(ZERO); + const SSE_FLOATS m_one = SSE_SET_FLOAT((DOUBLE) 1); + + for(;j<=(N1-SSE_NVEC);j+=SSE_NVEC){ + const SSE_FLOATS m_x2 = SSE_LOAD_FLOATS_UNALIGNED(localx1); + const SSE_FLOATS m_y2 = SSE_LOAD_FLOATS_UNALIGNED(localy1); + const SSE_FLOATS m_z2 = SSE_LOAD_FLOATS_UNALIGNED(localz1); + const SSE_FLOATS m_d2 = SSE_LOAD_FLOATS_UNALIGNED(locald1); + + localx1 += SSE_NVEC; + localy1 += SSE_NVEC; + localz1 += SSE_NVEC; + locald1 += SSE_NVEC; + + for(int w = 0; w < pair.num_weights; w++){ + pair.weights1[w].s = SSE_LOAD_FLOATS_UNALIGNED(local_w1.weights[w]); + local_w1.weights[w] += SSE_NVEC; + } + + union float4_weights{ + SSE_FLOATS m_weights; + DOUBLE weights[SSE_NVEC]; + }; + union float4_weights union_mweight; + + const SSE_FLOATS m_perpx = SSE_SUBTRACT_FLOATS(m_xpos, m_x2); + const SSE_FLOATS m_perpy = SSE_SUBTRACT_FLOATS(m_ypos, m_y2); + const SSE_FLOATS m_perpz = SSE_SUBTRACT_FLOATS(m_zpos, m_z2); + + const SSE_FLOATS m_parx = SSE_ADD_FLOATS(m_x2, m_xpos); + const SSE_FLOATS m_pary = SSE_ADD_FLOATS(m_y2, m_ypos); + const SSE_FLOATS m_parz = SSE_ADD_FLOATS(m_z2, m_zpos); + + SSE_FLOATS m_sqr_s, m_sqr_mu; + { + const SSE_FLOATS m_s_dot_l = SSE_SUBTRACT_FLOATS(SSE_SQUARE_FLOAT(m_d2), SSE_SQUARE_FLOAT(m_dpos)); + + const SSE_FLOATS m_sqr_s_dot_l = SSE_SQUARE_FLOAT(m_s_dot_l); + const SSE_FLOATS m_sqr_perpx = SSE_SQUARE_FLOAT(m_perpx); + const SSE_FLOATS m_sqr_perpy = SSE_SQUARE_FLOAT(m_perpy); + const SSE_FLOATS m_sqr_perpz = SSE_SQUARE_FLOAT(m_perpz); + m_sqr_s = SSE_ADD_FLOATS(m_sqr_perpx, SSE_ADD_FLOATS(m_sqr_perpy, m_sqr_perpz));//3-d separation + + const SSE_FLOATS m_mask_3d_sep = SSE_COMPARE_FLOATS_LT(m_sqr_s, m_sqr_smax); + const SSE_FLOATS m_sqr_norm_l = SSE_ADD_FLOATS(SSE_SQUARE_FLOAT(m_parx), SSE_ADD_FLOATS(SSE_SQUARE_FLOAT(m_pary), SSE_SQUARE_FLOAT(m_parz))); + + if(SSE_TEST_COMPARISON(m_mask_3d_sep)==0) { + continue; + } + + // \mu^2 = \pi^2 / s^2 + const SSE_FLOATS m_sqr_norm_l_norm_s = SSE_MULTIPLY_FLOATS(m_sqr_norm_l, m_sqr_s); + m_sqr_mu = SSE_DIVIDE_FLOATS(m_sqr_s_dot_l,m_sqr_norm_l_norm_s); + } + + + const SSE_FLOATS m_mu = SSE_SQRT_FLOAT(m_sqr_mu); + + SSE_FLOATS m_mask_left; + //Do the mask filters in a separate scope + { + const SSE_FLOATS m_mask_mumax = SSE_COMPARE_FLOATS_LT(m_sqr_mu,m_sqr_mumax); + const SSE_FLOATS m_smax_mask = SSE_COMPARE_FLOATS_LT(m_sqr_s, m_sqr_smax); + const SSE_FLOATS m_smin_mask = SSE_COMPARE_FLOATS_GE(m_sqr_s, m_sqr_smin); + const SSE_FLOATS m_s_mask = SSE_BITWISE_AND(m_smax_mask,m_smin_mask); + + m_mask_left = SSE_BITWISE_AND(m_mask_mumax, m_s_mask); + if(SSE_TEST_COMPARISON(m_mask_left)==0) { + continue; + } + + m_sqr_s = SSE_BLEND_FLOATS_WITH_MASK(m_zero,m_sqr_s,m_mask_left); + m_sqr_mu = SSE_BLEND_FLOATS_WITH_MASK(m_sqr_mumax,m_sqr_mu,m_mask_left); + } + union float8 union_msep; + if(need_savg) { + union_msep.m_sep = SSE_SQRT_FLOAT(m_sqr_s); + } + if(need_weightavg){ + pair.dx.s = m_perpx; + pair.dy.s = m_perpy; + pair.dz.s = m_perpz; + + pair.parx.s = m_parx; + pair.pary.s = m_pary; + pair.parz.s = m_parz; + + union_mweight.m_weights = sse_weight_func(&pair); + } + + const SSE_FLOATS m_mask = m_mask_left; + SSE_FLOATS m_sbin = SSE_SET_FLOAT((DOUBLE) 0); + for(int kbin=nsbin-1;kbin>=1;kbin--) { + const SSE_FLOATS m_mask_low = SSE_COMPARE_FLOATS_GE(m_sqr_s,m_supp_sqr[kbin-1]); + const SSE_FLOATS m_bin_mask = SSE_BITWISE_AND(m_mask_low,m_mask_left); + m_sbin = SSE_BLEND_FLOATS_WITH_MASK(m_sbin,m_kbin[kbin], m_bin_mask); + m_mask_left = SSE_COMPARE_FLOATS_LT(m_sqr_s, m_supp_sqr[kbin-1]); + if(SSE_TEST_COMPARISON(m_mask_left) == 0) { + break; + } + } + + /* Compute the 1-D index to the [sbin, mubin] := sbin*(nmu_bins+1) + mubin */ + const SSE_FLOATS m_tmp2 = SSE_MULTIPLY_FLOATS(m_mu,m_inv_dmu); + const SSE_FLOATS m_mubin = SSE_BLEND_FLOATS_WITH_MASK(m_nmu_bins, m_tmp2, m_mask); + const SSE_FLOATS m_nmu_bins_p1 = SSE_ADD_FLOATS(m_nmu_bins,m_one); + const SSE_FLOATS m_binproduct = SSE_ADD_FLOATS(SSE_MULTIPLY_FLOATS(m_sbin,m_nmu_bins_p1),m_mubin); + union int8 union_finalbin; + union_finalbin.m_ibin = SSE_TRUNCATE_FLOAT_TO_INT(m_binproduct); + +#if __INTEL_COMPILER +#pragma unroll(SSE_NVEC) +#endif + for(int jj=0;jj= sqr_smax || sqr_s < sqr_smin) continue; + + const DOUBLE norm_l = (parx*parx + pary*pary + parz*parz); + const DOUBLE sqr_s_dot_l = s_dot_l * s_dot_l; + const DOUBLE sqr_mu = sqr_s_dot_l/(norm_l * sqr_s); + const int mubin = (sqr_mu >= sqr_mumax) ? nmu_bins:(int) (SQRT(sqr_mu)*inv_dmu); + DOUBLE s, pairweight; + if(need_savg) { + s = SQRT(sqr_s); + } + if(need_weightavg){ + pair.dx.d = perpx; + pair.dy.d = perpy; + pair.dz.d = perpz; + + pair.parx.d = parx; + pair.pary.d = pary; + pair.parz.d = parz; + + pairweight = fallback_weight_func(&pair); + } + + + for(int kbin=nsbin-1;kbin>=1;kbin--) { + if(sqr_s >= supp_sqr[kbin-1]) { + const int ibin = kbin*(nmu_bins+1) + mubin; + npairs[ibin]++; + if(need_savg){ + savg[ibin] += s; + } + if(need_weightavg){ + weightavg[ibin] += pairweight; + } + break; + } + } + }//remainder jloop + }//i-loop + + for(int i=0;i 0) { + /*Particles are sorted on 'd', in increasing order */ + const DOUBLE dz = *d1 - dpos; + if(dz > -smax) break; + d1++; n_off++; + nleft--; + } + /*If no particle in the second cell satisfies distance constraints on 'dz' for the current 'i'th particle in first cell, + then there can be no more pairs from any particles in the first cell (since the first cell is also sorted in increasing order in 'd') + */ + if(nleft == 0) { + i=N0; + break; + } + } + + DOUBLE *localx1 = x1 + n_off; + DOUBLE *localy1 = y1 + n_off; + DOUBLE *localz1 = z1 + n_off; + DOUBLE *locald1 = d1; + for(int w = 0; w < pair.num_weights; w++){ + local_w1.weights[w] = weights1->weights[w] + n_off; + } + + for(int64_t j=0;j= sqr_smax || sqr_s < sqr_smin) continue; + + const DOUBLE sqr_l = (parx*parx + pary*pary + parz*parz); + const DOUBLE sqr_s_dot_l = s_dot_l * s_dot_l; + const DOUBLE sqr_mu = sqr_s_dot_l/(sqr_l * sqr_s); + const int mubin = (sqr_mu >= sqr_mumax) ? nmu_bins:(int) (SQRT(sqr_mu)*inv_dmu); + DOUBLE s, pairweight; + if(need_savg) { + s = SQRT(sqr_s); + } + if(need_weightavg){ + pair.dx.d = perpx; + pair.dy.d = perpy; + pair.dz.d = perpz; + + pair.parx.d = parx; + pair.pary.d = pary; + pair.parz.d = parz; + + pairweight = weight_func(&pair); + } + + for(int kbin=nsbin-1;kbin>=1;kbin--) { + if(sqr_s >= supp_sqr[kbin-1]) { + const int ibin = kbin*(nmu_bins+1) + mubin; + npairs[ibin]++; + if(need_savg) { + savg[ibin]+=s; + } + if(need_weightavg){ + weightavg[ibin] += pairweight; + } + break; + } + }//finding kbin + }//j loop over second set of particles + }//i loop over first set of particles + + for(int i=0;ibin_refine_factors[1]=numthreads; } #endif - for(int i=0;i<3;i++) { + /* Only check the ra and dec bin refine factors (not all 3 bin refs)*/ + for(int i=0;i<2;i++) { if(options->bin_refine_factors[i] < 1) { fprintf(stderr,"Warning: bin refine factor along axis = %d *must* be >=1. Instead found bin refine factor =%d\n", i, options->bin_refine_factors[i]); diff --git a/mocks/Makefile b/mocks/Makefile index 177b3c6d..44853a5a 100644 --- a/mocks/Makefile +++ b/mocks/Makefile @@ -1,28 +1,28 @@ include ../mocks.options ../common.mk -TARGETS:= dirs DDrppi_mocks DDtheta_mocks vpf_mocks examples +TARGETS:= dirs DDrppi_mocks DDtheta_mocks DDsmu_mocks vpf_mocks examples ifneq ($(COMPILE_PYTHON_EXT), 0) TARGETS += python_bindings else $(warning $(ccmagenta) Not compiling C extensions for mocks. Either python or numpy not available $(ccreset)) endif -all: $(TARGETS) +all: $(TARGETS) dirs: | ../lib ../bin ../include ../lib ../bin ../include: mkdir -p $@ -.PHONY: clean celna clena celan $(TARGETS) tests distclean realclean distclena realclena dirs test python_bindings libs all +.PHONY: clean celna clena celan $(TARGETS) tests distclean realclean distclena realclena dirs test python_bindings libs all -DDrppi_mocks DDtheta_mocks vpf_mocks: +DDrppi_mocks DDtheta_mocks vpf_mocks DDsmu_mocks: $(MAKE) -C $@ examples: libs $(MAKE) -C examples -python_bindings: libs +python_bindings: libs $(MAKE) -C $@ distclean:realclean @@ -32,6 +32,7 @@ realclena:realclean realclean: $(MAKE) clean $(MAKE) -C DDrppi_mocks distclean + $(MAKE) -C DDsmu_mocks distclean $(MAKE) -C DDtheta_mocks distclean $(MAKE) -C vpf_mocks distclean $(MAKE) -C python_bindings distclean @@ -40,6 +41,7 @@ realclean: clean: $(MAKE) -C DDrppi_mocks clean + $(MAKE) -C DDsmu_mocks clean $(MAKE) -C DDtheta_mocks clean $(MAKE) -C vpf_mocks clean $(MAKE) -C examples clean @@ -50,18 +52,19 @@ clena: clean celan: clean celna: clean -install: examples | dirs +install: examples | dirs $(MAKE) -C DDrppi_mocks install + $(MAKE) -C DDsmu_mocks install $(MAKE) -C DDtheta_mocks install $(MAKE) -C vpf_mocks install $(MAKE) -C python_bindings install libs: | dirs $(MAKE) -C DDrppi_mocks lib + $(MAKE) -C DDsmu_mocks lib $(MAKE) -C DDtheta_mocks lib $(MAKE) -C vpf_mocks lib test: tests tests: $(MAKE) -C tests - diff --git a/mocks/examples/Makefile b/mocks/examples/Makefile index 3047cfbf..230a7d16 100644 --- a/mocks/examples/Makefile +++ b/mocks/examples/Makefile @@ -7,10 +7,12 @@ DATA_DIR := ../tests/data MOCKS_DIR := $(ROOT_DIR)/mocks DDrppi_mocks_DIR := $(MOCKS_DIR)/DDrppi_mocks +DDsmu_mocks_DIR := $(MOCKS_DIR)/DDsmu_mocks DDTHETA_mocks_DIR := $(MOCKS_DIR)/DDtheta_mocks VPF_mocks_DIR := $(MOCKS_DIR)/vpf_mocks DDrppi_mocks_LIB := countpairs_rp_pi_mocks +DDsmu_mocks_LIB := countpairs_s_mu_mocks DDTHETA_mocks_LIB := countpairs_theta_mocks VPF_mocks_LIB := countspheres_mocks @@ -20,14 +22,16 @@ TARGET := run_correlations_mocks TARGETSRC := $(TARGET).c $(IO_DIR)/ftread.c $(IO_DIR)/io.c $(UTILS_DIR)/utils.c $(UTILS_DIR)/progressbar.c \ $(UTILS_DIR)/cosmology_params.c TARGETOBJS := $(TARGETSRC:.c=.o) -C_LIBRARIES := $(DDrppi_mocks_DIR)/lib$(DDrppi_mocks_LIB).a $(DDTHETA_mocks_DIR)/lib$(DDTHETA_mocks_LIB).a $(VPF_mocks_DIR)/lib$(VPF_mocks_LIB).a -INCL := $(DDrppi_mocks_DIR)/$(DDrppi_mocks_LIB).h $(DDTHETA_mocks_DIR)/$(DDTHETA_mocks_LIB).h $(VPF_mocks_DIR)/$(VPF_mocks_LIB).h \ +C_LIBRARIES := $(DDrppi_mocks_DIR)/lib$(DDrppi_mocks_LIB).a $(DDsmu_mocks_DIR)/lib$(DDsmu_mocks_LIB).a \ + $(DDTHETA_mocks_DIR)/lib$(DDTHETA_mocks_LIB).a $(VPF_mocks_DIR)/lib$(VPF_mocks_LIB).a +INCL := $(DDrppi_mocks_DIR)/$(DDrppi_mocks_LIB).h $(DDsmu_mocks_DIR)/$(DDsmu_mocks_LIB).h \ + $(DDTHETA_mocks_DIR)/$(DDTHETA_mocks_LIB).h $(VPF_mocks_DIR)/$(VPF_mocks_LIB).h \ $(UTILS_DIR)/defs.h $(IO_DIR)/io.h $(IO_DIR)/ftread.h \ $(UTILS_DIR)/utils.h $(UTILS_DIR)/gridlink_mocks_impl_double.h $(UTILS_DIR)/gridlink_mocks_impl_float.h \ $(UTILS_DIR)/function_precision.h $(UTILS_DIR)/cellarray_mocks_double.h $(UTILS_DIR)/cellarray_mocks_float.h \ $(UTILS_DIR)/progressbar.h $(UTILS_DIR)/cosmology_params.h -LIBRARY_INCL := -I$(DDrppi_mocks_DIR) -I$(DDTHETA_mocks_DIR) -I$(VPF_mocks_DIR) +LIBRARY_INCL := -I$(DDrppi_mocks_DIR) -I$(DDsmu_mocks_DIR) -I$(DDTHETA_mocks_DIR) -I$(VPF_mocks_DIR) all: $(TARGET) $(TARGETSRC) $(C_LIBRARIES) $(INCL) $(ROOT_DIR)/mocks.options $(ROOT_DIR)/common.mk Makefile @@ -39,6 +43,9 @@ $(TARGET):$(C_LIBRARIES) $(DDrppi_mocks_DIR)/lib$(DDrppi_mocks_LIB).a: $(DDrppi_mocks_DIR)/*.c $(DDrppi_mocks_DIR)/*.c.src $(DDrppi_mocks_DIR)/*.h.src $(ROOT_DIR)/mocks.options $(ROOT_DIR)/common.mk $(MAKE) -C $(DDrppi_mocks_DIR) libs +$(DDsmu_mocks_DIR)/lib$(DDsmu_mocks_LIB).a: $(DDsmu_mocks_DIR)/*.c $(DDsmu_mocks_DIR)/*.c.src $(DDsmu_mocks_DIR)/*.h.src $(ROOT_DIR)/mocks.options $(ROOT_DIR)/common.mk + $(MAKE) -C $(DDsmu_mocks_DIR) libs + $(DDTHETA_mocks_DIR)/lib$(DDTHETA_mocks_LIB).a: $(DDTHETA_mocks_DIR)/*.c $(DDTHETA_mocks_DIR)/*.c.src $(DDTHETA_mocks_DIR)/*.h.src $(ROOT_DIR)/mocks.options $(ROOT_DIR)/common.mk $(MAKE) -C $(DDTHETA_mocks_DIR) libs diff --git a/mocks/examples/run_correlations_mocks.c b/mocks/examples/run_correlations_mocks.c index a0930b6b..0c9248cf 100644 --- a/mocks/examples/run_correlations_mocks.c +++ b/mocks/examples/run_correlations_mocks.c @@ -26,6 +26,7 @@ /* Library proto-types + struct definitions in the ../..//include directory */ #include "countpairs_rp_pi_mocks.h" +#include "countpairs_s_mu_mocks.h" #include "countpairs_theta_mocks.h" #include "countspheres_mocks.h" @@ -45,6 +46,8 @@ void Printhelp(void) fprintf(stderr," * binfile = name of ascii file containing the r-bins (rmin rmax for each bin)\n") ; fprintf(stderr," * pimax = pimax (in same units as X/Y/Z of the data)\n"); fprintf(stderr," * cosmology = flag to pick-up the cosmology combination to use (set as an array of combinations in ../utils/cosmology_params.c)\n"); + fprintf(stderr," * mu_max = Max. value of the cosine of the angle to the LOS (must be within [0.0, 1.0])\n"); + fprintf(stderr," * nmu_bins = Number of linear bins to create (the bins themselves range from [0.0, mu_max]\n"); #if defined(USE_OMP) && defined(_OPENMP) fprintf(stderr," * numthreads = number of threads to use\n"); #endif @@ -61,7 +64,9 @@ int main(int argc, char **argv) DOUBLE pimax; int cosmology=1; int nthreads=1; - + int nmu_bins; + DOUBLE mu_max; + struct config_options options = get_config_options(); options.verbose=1; options.periodic=0; @@ -70,9 +75,9 @@ int main(int argc, char **argv) #if defined(_OPENMP) nthreads=4;//default to 4 threads - const char argnames[][30]={"file","format","binfile","pimax","cosmology","Nthreads"}; + const char argnames[][30]={"file","format","binfile","pimax","cosmology","mu_max", "nmu_bins", "Nthreads"}; #else - const char argnames[][30]={"file","format","binfile","pimax","cosmology"}; + const char argnames[][30]={"file","format","binfile","pimax","cosmology", "mu_max", "nmu_bins"}; #endif int nargs=sizeof(argnames)/(sizeof(char)*30); @@ -89,8 +94,10 @@ int main(int argc, char **argv) my_snprintf(binfile,MAXLEN,"%s",argv[3]); pimax=atof(argv[4]); cosmology=atoi(argv[5]); + mu_max=atof(argv[6]); + nmu_bins=atoi(argv[7]); #if defined(_OPENMP) - nthreads = atoi(argv[6]); + nthreads = atoi(argv[8]); #endif } } else { @@ -99,6 +106,8 @@ int main(int argc, char **argv) my_snprintf(binfile, MAXLEN,"%s","../tests/bins"); pimax=40.0; cosmology=1; + mu_max=1.0; + nmu_bins=10; } fprintf(stderr,ANSI_COLOR_BLUE "Running `%s' with the parameters \n",argv[0]); @@ -108,8 +117,10 @@ int main(int argc, char **argv) fprintf(stderr,"\t\t %-10s = %s \n",argnames[2],binfile); fprintf(stderr,"\t\t %-10s = %10.4lf\n",argnames[3],pimax); fprintf(stderr,"\t\t %-10s = %d\n",argnames[4],cosmology); + fprintf(stderr,"\t\t %-10s = %10.4lf\n",argnames[5],mu_max); + fprintf(stderr,"\t\t %-10s = %dlf\n",argnames[6],nmu_bins); #if defined(_OPENMP) - fprintf(stderr,"\t\t %-10s = %d\n",argnames[5],nthreads); + fprintf(stderr,"\t\t %-10s = %d\n",argnames[7],nthreads); #endif fprintf(stderr,"\t\t -------------------------------------" ANSI_COLOR_RESET "\n"); @@ -135,10 +146,10 @@ int main(int argc, char **argv) gettimeofday(&t0,NULL); #if defined(_OPENMP) fprintf(stderr,ANSI_COLOR_MAGENTA "Command-line for running equivalent DD(rp,pi) calculation would be:\n `%s %s %s %s %s %s %lf %d %d'" ANSI_COLOR_RESET "\n", - "../DDrppi/DDrppi_mocks",file,fileformat,file,fileformat,binfile,pimax,cosmology,nthreads); + "../DDrppi_mocks/DDrppi_mocks",file,fileformat,file,fileformat,binfile,pimax,cosmology,nthreads); #else fprintf(stderr,ANSI_COLOR_MAGENTA "Command-line for running equivalent DD(rp,pi) calculation would be:\n `%s %s %s %s %s %s %lf %d'" ANSI_COLOR_RESET "\n", - "../DDrppi/DDrppi_mocks",file,fileformat,file,fileformat,binfile,pimax,cosmology); + "../DDrppi_mocks/DDrppi_mocks",file,fileformat,file,fileformat,binfile,pimax,cosmology); #endif results_countpairs_mocks results; @@ -177,15 +188,62 @@ int main(int argc, char **argv) - //Do the w(theta) counts + //Do the DD(s, mu) counts + { + gettimeofday(&t0,NULL); +#if defined(_OPENMP) + fprintf(stderr,ANSI_COLOR_MAGENTA "Command-line for running equivalent DD(s,mu) calculation would be:\n `%s %s %s %s %s %s %lf %d %d %d'"ANSI_COLOR_RESET"\n", + "../DDsmu_mocks/DDsmu_mocks",file,fileformat,file,fileformat,binfile,mu_max,nmu_bins,cosmology,nthreads); +#else + fprintf(stderr,ANSI_COLOR_MAGENTA "Command-line for running equivalent DD(s,mu) calculation would be:\n `%s %s %s %s %s %s %lf %d %d'"ANSI_COLOR_RESET"\n", + "../DDsmu_mocks/DDsmu_mocks",file,fileformat,file,fileformat,binfile,mu_max,nmu_bins,cosmology); +#endif + + results_countpairs_mocks_s_mu results; + int status = countpairs_mocks_s_mu(ND1,ra1,dec1,cz1, + ND2,ra2,dec2,cz2, + nthreads, + autocorr, + binfile, + mu_max, + nmu_bins, + cosmology, + &results, + &options, NULL); + if(status != EXIT_SUCCESS) { + return status; + } + + gettimeofday(&t1,NULL); + double pair_time = ADD_DIFF_TIME(t0,t1); +#if 0 + const DOUBLE dmu = mu_max/(DOUBLE)results.nmu_bins ; + const int nmubin = results.nmu_bins; + for(int i=1;i LasDamas cosmology. Om=0.25, Ol=0.75 (other values are not used)\n" + " 2 -> Planck cosmology. Om=0.302, Ol=0.698 \n" + " To setup a new cosmology, add an entry to the function, `init_cosmology` in \n" + " `ROOT/utils/cosmology_params.c` and recompile the package.\n" + "\n" + "nthreads: integer\n" + " The number of OpenMP threads to use. Has no effect if OpenMP was not used\n" + " during library compilation. \n" + "\n" + "mu_max: double \n" + " The maximum mu value to use; must be > 0 and <= 1.0\n" + "\n" + "nmu_bins: int \n" + " The number of "MU_CHAR" bins to use, binning from [0.0, mumax)\n" + "\n" + "binfile: filename\n" + " Filename containing the radial bins for the correlation function. The file\n" + " is expected to contain white-space separated ``smin smax`` with the bin\n" + " edges. Units must be Mpc/h (see the ``bins`` file in the tests directory\n" + " for a sample). For usual logarithmic bins, ``logbins``in the root directory\n" + " of this package will create a compatible ``binfile``.\n" + "\n" + "RA1: array-like, float/double (default double)\n" + " The right-ascension of the galaxy, in the range [0, 360]. If there are\n" + " negative RA's in the supplied array (input RA in the range [-180, 180]),\n" + " then the code will shift the entire array by 180 to put RA's in the\n" + " [0, 360] range.\n" + "\n" + "DEC1: array-like, float/double (default double)\n" + " The declination of the galaxy, in the range [-90, 90]. If there are\n" + " declinations > 90 in the supplied array (input dec in the range [0, 180]),\n" + " then the code will shift the entire array by -90 to put declinations in\n" + " the [-90, 90] range. If the code finds declinations more than 180, then\n" + " it assumes RA and DEC have been swapped and aborts with that message.\n" + "\n" + "CZ1: array-like, float/double (default double)\n" + " The redshift multiplied by speed of light for the galaxies. The code will\n" + " checks that cz has been supplied by comparing with a threshold (currently\n" + " set to 10, defined in function check_ra_dec_cz in file\n" + " `DDrppi/countpairs_rp_pi_mocks_impl.c.src`) and multiplies by the speed of light if\n" + " max z is less than that threshold. If you really want to change the speed\n" + " of light, then edit the macro in `ROOT/utils/set_cosmo_dist.h`.\n" + "\n" + + "weights1 : array-like, real (float/double), shape (n_particles,) or (n_weights_per_particle,n_particles), optional\n" + " Weights for computing a weighted pair count.\n\n" + + "weight_type : str, optional\n" + " The type of pair weighting to apply.\n" + " Options: \"pair_product\", None\n" + " Default: None.\n\n" + + "RA2/DEC2/CZ2: float/double (default double)\n" + " Same as for RA1/DEC1/CZ1\n" + "\n" + + "weights2\n : array-like, real (float/double), shape (n_particles,) or (n_weights_per_particle,n_particles), optional\n" + " Weights for computing a weighted pair count." + + "is_comoving_dist: boolean (default false)\n" + " Boolean flag to indicate that ``cz`` values have already been\n" + " converted into co-moving distances. This flag allows arbitrary\n" + " cosmologies to be used in ``Corrfunc``.\n" + "\n" + "verbose : boolean (default false)\n" + " Boolean flag to control output of informational messages\n" + "\n" + "output_savg : boolean (default false)\n" + " Boolean flag to output the average ``s`` for each bin. Code will\n" + " run slightly slower if you set this flag. Also, note, if you are calculating\n" + " in single-precision, ``savg`` will suffer from numerical loss of\n" + " precision and can not be trusted. If you need accurate ``savg``\n" + " values, then pass in double precision arrays for the particle positions.\n" + "\n" + "fast_divide: boolean (default false)\n" + " Boolean flag to replace the division in ``AVX`` implementation with an\n" + " approximate reciprocal, followed by a Newton-Raphson step. Improves\n" + " runtime by ~15-20%. Loss of precision is at the 5-6th decimal place.\n" + "\n" + "(xyz)bin_refine_factor: integer (default (2,2,1) typical values in [1-3]) \n" + " Controls the refinement on the cell sizes. Can have up to a 20% impact \n" + " on runtime. \n" + "\n" + "max_cells_per_dim: integer (default 100, typical values in [50-300]) \n" + " Controls the maximum number of cells per dimension. Total number of cells \n" + " can be up to (max_cells_per_dim)^3. Only increase if ``rmax`` is too small \n" + " relative to the boxsize (and increasing helps the runtime). \n" + "\n" + "c_api_timer : boolean (default false)\n" + " Boolean flag to measure actual time spent in the C libraries. Here\n" + " to allow for benchmarking and scaling studies.\n" + "\n" + "isa : integer (default -1)\n" + " Controls the runtime dispatch for the instruction set to use. Possible\n" + " options are: [-1, AVX, SSE42, FALLBACK]\n\n" + " Setting isa to -1 will pick the fastest available instruction\n" + " set on the current computer. However, if you set ``isa`` to, say,\n" + " ``AVX`` and ``AVX`` is not available on the computer, then the code will\n" + " revert to using ``FALLBACK`` (even though ``SSE42`` might be available).\n\n" + + " Unless you are benchmarking the different instruction sets, you should\n" + " always leave ``isa`` to the default value. And if you *are* benchmarking,\n" + " then the integer values correspond to the ``enum`` for the instruction set\n" + " defined in ``utils/defs.h``.\n" + "\n" + "Returns\n" + "--------\n" + "\n" + "a Python list containing [smin, smax, savg, "MU_CHAR", npairs, weightavg] \n" + "for each "MU_CHAR"-bin (up to 1.0) for each radial bin specified in\n" + "the ``binfile``.\n" + "\n" + "Example\n" + "-------\n" + ">>> import numpy as np\n" + ">>> from Corrfunc._countpairs_mocks import countpairs_s_mu_mocks\n" + ">>> ra,dec,cz = np.genfromtxt('../mocks/tests/data/Mr19_mock_northonly.rdcz.dat',dtype=np.float,unpack=True)\n" + ">>> cosmology=1\n" + ">>> autocorr=1\n" + ">>> nthreads=4\n" + ">>> binfile='../mocks/tests/bins'\n" + ">>> nmu_bins=10\n" + ">>> mu_max=1.0\n" + ">>> (DDsmu, time) = countpairs_s_mu_mocks(autocorr, cosmology, nthreads, mu_max, nmu_bins, binfile,\n" + " ra,dec,cz,ra,dec,cz,\n" + " verbose=True)\n" + "\n" + }, {"countpairs_theta_mocks" ,(PyCFunction) countpairs_countpairs_theta_mocks ,METH_VARARGS | METH_KEYWORDS, "countpairs_theta_mocks(autocorr, nthreads, binfile,\n" " RA1, DEC1, weights1=None, weight_type=None,\n" @@ -282,21 +440,21 @@ static PyMethodDef module_methods[] = { " then the code will shift the entire array by -90 to put declinations in\n" " the [-90, 90] range. If the code finds declinations more than 180, then\n" " it assumes RA and DEC have been swapped and aborts with that message.\n" - + "weights1 : array-like, real (float/double), shape (n_particles,) or (n_weights_per_particle,n_particles), optional\n" " Weights for computing a weighted pair count.\n\n" - + "weight_type : str, optional\n" " The type of pair weighting to apply.\n" - " Options: \"pair_product\", None\n" + " Options: \"pair_product\", None\n" " Default: None.\n\n" - + "RA2/DEC2: float/double (default double)\n" " Same as for RA1/DEC1\n" - + "weights2\n : array-like, real (float/double), shape (n_particles,) or (n_weights_per_particle,n_particles), optional\n" " Weights for computing a weighted pair count." - + "verbose : boolean (default false)\n" " Boolean flag to control output of informational messages\n" "\n" @@ -554,7 +712,7 @@ static PyMethodDef module_methods[] = { " a sphere of radius ``rmax`` contains *exactly* ``N`` galaxies. For \n" " example, pN[0] (p0, the void probibility function) is the probability\n" " that a sphere of radius ``rmax`` contains 0 galaxies.\n" - "\n" + "\n" "time : double\n" " if ``c_api_timer`` is set, then the return value contains the time spent\n" " in the API; otherwise time is set to 0.0\n" @@ -593,7 +751,7 @@ static PyMethodDef module_methods[] = { " RA, DEC, CZ,\n" " verbose=True,\n" " is_comoving_dist=True)\n" - "\n" + "\n" }, {NULL, NULL, 0, NULL} }; @@ -604,7 +762,7 @@ static PyObject *countpairs_mocks_error_out(PyObject *module, const char *msg) #if PY_MAJOR_VERSION < 3 (void) module;//to avoid unused warning with python2 #endif - + struct module_state *st = GETSTATE(module); PyErr_SetString(st->error, msg); PyErr_Print(); @@ -667,7 +825,7 @@ PyObject *PyInit__countpairs_mocks(void) import_array(); highest_isa_mocks = instrset_detect(); - + #if PY_MAJOR_VERSION >= 3 return module; #endif @@ -677,18 +835,18 @@ PyObject *PyInit__countpairs_mocks(void) static int print_kwlist_into_msg(char *msg, const size_t totsize, size_t len, char *kwlist[], const size_t nitems) { for(size_t i=0;i= totsize-2) { return EXIT_FAILURE; } - + memcpy(msg+len, kwlist[i], strlen(kwlist[i])); len += strlen(kwlist[i]); msg[len] = ','; msg[len+1] = ' '; len += 2; } - + msg[len]='\0'; return EXIT_SUCCESS; } @@ -699,24 +857,24 @@ static int print_kwlist_into_msg(char *msg, const size_t totsize, size_t len, ch static int64_t check_dims_and_datatype(PyObject *module, PyArrayObject *x1_obj, PyArrayObject *y1_obj, PyArrayObject *z1_obj, PyArrayObject *weights1_obj, size_t *element_size) { char msg[1024]; - + const int check_weights = weights1_obj != NULL; /* All the position arrays should be 1-D*/ const int nxdims = PyArray_NDIM(x1_obj); const int nydims = PyArray_NDIM(y1_obj); const int nzdims = PyArray_NDIM(z1_obj); - + if(nxdims != 1 || nydims != 1 || nzdims != 1) { snprintf(msg, 1024, "ERROR: Expected 1-D numpy arrays.\nFound (nxdims, nydims, nzdims) = (%d, %d, %d) instead", nxdims, nydims, nzdims); countpairs_mocks_error_out(module, msg); return -1; } - + /* The weights array can be 1-D or 2-D of shape (n_weights, n_particles) */ const int n_weight_dims = check_weights ? PyArray_NDIM(weights1_obj) : 1; - + if(n_weight_dims != 1 && n_weight_dims != 2) { snprintf(msg, 1024, "ERROR: Expected 1-D or 2-D weight array.\nFound n_weight_dims = %d instead", n_weight_dims); countpairs_mocks_error_out(module, msg); @@ -750,7 +908,7 @@ static int64_t check_dims_and_datatype(PyObject *module, PyArrayObject *x1_obj, countpairs_mocks_error_out(module, msg); return -1; } - + // Current version of the code only supports weights of the same dtype as positions if( x_type != y_type || y_type != z_type || (check_weights && z_type != weights_type)) { PyArray_Descr *x_descr = PyArray_DescrFromType(x_type); @@ -770,12 +928,12 @@ static int64_t check_dims_and_datatype(PyObject *module, PyArrayObject *x1_obj, countpairs_mocks_error_out(module, msg); return -1; } - + /* Check if the number of elements in the 3 Python arrays are identical */ const int64_t nx1 = (int64_t)PyArray_SIZE(x1_obj); const int64_t ny1 = (int64_t)PyArray_SIZE(y1_obj); const int64_t nz1 = (int64_t)PyArray_SIZE(z1_obj); - + if(nx1 != ny1 || ny1 != nz1) { snprintf(msg, 1024, "ERROR: Expected arrays to have the same number of elements in all 3-dimensions.\nFound (nx, ny, nz) = (%"PRId64", %"PRId64", %"PRId64") instead", nx1, ny1, nz1); @@ -800,7 +958,7 @@ static int64_t check_dims_and_datatype(PyObject *module, PyArrayObject *x1_obj, } else { *element_size = sizeof(double); } - + return nx1; } @@ -859,7 +1017,7 @@ static int64_t check_dims_and_datatype_ra_dec(PyObject *module, PyArrayObject *x countpairs_mocks_error_out(module, msg); return -1; } - + /* Check if the number of elements in the 3 Python arrays are identical */ const int64_t nx1 = (int64_t)PyArray_SIZE(x1_obj); const int64_t ny1 = (int64_t)PyArray_SIZE(y1_obj); @@ -878,7 +1036,7 @@ static int64_t check_dims_and_datatype_ra_dec(PyObject *module, PyArrayObject *x } else { *element_size = sizeof(double); } - + return nx1; } @@ -888,11 +1046,11 @@ static PyObject *countpairs_countpairs_rp_pi_mocks(PyObject *self, PyObject *arg //Error-handling is global in python2 -> stored in struct module_state _struct declared at the top of this file #if PY_MAJOR_VERSION < 3 (void) self; - PyObject *module = NULL;//should not be used -> setting to NULL so any attempts to dereference will result in a crash. + PyObject *module = NULL;//should not be used -> setting to NULL so any attempts to dereference will result in a crash. #else //In python3, self is simply the module object that was returned earlier by init PyObject *module = self; -#endif +#endif //x1->ra (phi), y1-> declination (theta1), z1->cz (cz1) //x2->ra (ph2), y2-> declination (theta2), z2->cz (cz2) @@ -971,16 +1129,16 @@ static PyObject *countpairs_countpairs_rp_pi_mocks(PyObject *self, PyObject *arg char msg[1024]; int len=snprintf(msg, 1024,"ArgumentError: In DDrppi_mocks> Could not parse the arguments. Input parameters are: \n"); - + /* How many keywords do we have? Subtract 1 because of the last NULL */ const size_t nitems = sizeof(kwlist)/sizeof(*kwlist) - 1; int status = print_kwlist_into_msg(msg, 1024, len, kwlist, nitems); if(status != EXIT_SUCCESS) { fprintf(stderr,"Error message does not contain all of the keywords\n"); } - + countpairs_mocks_error_out(module,msg); - + Py_RETURN_NONE; } @@ -998,16 +1156,16 @@ static PyObject *countpairs_countpairs_rp_pi_mocks(PyObject *self, PyObject *arg } - + /* We have numpy arrays and all the required inputs*/ /* How many data points are there? And are they all of floating point type */ size_t element_size; const int64_t ND1 = check_dims_and_datatype(module, x1_obj, y1_obj, z1_obj, weights1_obj, &element_size); if(ND1 == -1) { - //Error has already been set -> simply return + //Error has already been set -> simply return Py_RETURN_NONE; } - + /* Ensure the weights are of the right shape (n_weights, n_particles) */ if(weights1_obj != NULL){ // A numpy dimension of length -1 will be expanded to n_weights @@ -1015,7 +1173,7 @@ static PyObject *countpairs_countpairs_rp_pi_mocks(PyObject *self, PyObject *arg PyArray_Dims pdims = {.ptr = &(dims[0]), .len = 2}; weights1_obj = (PyArrayObject *) PyArray_Newshape(weights1_obj, &pdims, NPY_CORDER); } - + /* Validate the user's choice of weighting method */ weight_method_t weighting_method; int wstatus = get_weight_method_by_name(weighting_method_str, &weighting_method); @@ -1034,7 +1192,7 @@ static PyObject *countpairs_countpairs_rp_pi_mocks(PyObject *self, PyObject *arg countpairs_mocks_error_out(module, msg); Py_RETURN_NONE; } - + if(extra.weights0.num_weights > 0 && found_weights > MAX_NUM_WEIGHTS){ char msg[1024]; snprintf(msg, 1024, "ValueError: In %s: Provided %d weights-per-particle, but the code was compiled with MAX_NUM_WEIGHTS=%d.\n", @@ -1058,11 +1216,11 @@ static PyObject *countpairs_countpairs_rp_pi_mocks(PyObject *self, PyObject *arg countpairs_mocks_error_out(module, msg); Py_RETURN_NONE; } - + size_t element_size2; ND2 = check_dims_and_datatype(module, x2_obj, y2_obj, z2_obj, weights2_obj, &element_size2); if(ND2 == -1) { - //Error has already been set -> simply return + //Error has already been set -> simply return Py_RETURN_NONE; } /* Ensure the weights are of the right shape (n_weights, n_particles) */ @@ -1071,7 +1229,7 @@ static PyObject *countpairs_countpairs_rp_pi_mocks(PyObject *self, PyObject *arg PyArray_Dims pdims = {.ptr = &(dims[0]), .len = 2}; weights2_obj = (PyArrayObject *) PyArray_Newshape(weights2_obj, &pdims, NPY_CORDER); } - + if(element_size != element_size2) { snprintf(msg, 1024, "TypeError: In %s: The two arrays must have the same data-type. First array is of type %s while second array is of type %s\n", __FUNCTION__, element_size == 4 ? "floats":"doubles", element_size2 == 4 ? "floats":"doubles"); @@ -1138,7 +1296,7 @@ static PyObject *countpairs_countpairs_rp_pi_mocks(PyObject *self, PyObject *arg } } options.float_type = element_size; - + /* Pack the weights into extra_options */ for(int64_t w = 0; w < extra.weights0.num_weights; w++){ extra.weights0.weights[w] = (char *) weights1 + w*ND1*element_size; @@ -1146,7 +1304,7 @@ static PyObject *countpairs_countpairs_rp_pi_mocks(PyObject *self, PyObject *arg extra.weights1.weights[w] = (char *) weights2 + w*ND2*element_size; } } - + NPY_BEGIN_THREADS_DEF; NPY_BEGIN_THREADS; @@ -1166,7 +1324,7 @@ static PyObject *countpairs_countpairs_rp_pi_mocks(PyObject *self, PyObject *arg c_api_time = options.c_api_time; } NPY_END_THREADS; - + /* Clean up. */ Py_DECREF(x1_array);Py_DECREF(y1_array);Py_DECREF(z1_array);Py_XDECREF(weights1_array);//x1 should absolutely not be NULL Py_XDECREF(x2_array);Py_XDECREF(y2_array);Py_XDECREF(z2_array);Py_XDECREF(weights2_array);//x2 might be NULL depending on value of autocorr @@ -1174,8 +1332,8 @@ static PyObject *countpairs_countpairs_rp_pi_mocks(PyObject *self, PyObject *arg if(status != EXIT_SUCCESS) { Py_RETURN_NONE; } - - + + #if 0 /* Output pairs*/ for(int i=1;i stored in struct module_state _struct declared at the top of this file +#if PY_MAJOR_VERSION < 3 + (void) self; + PyObject *module = NULL;//should not be used -> setting to NULL so any attempts to dereference will result in a crash. +#else + //In python3, self is simply the module object that was returned earlier by init + PyObject *module = self; +#endif + + //x1->ra (phi), y1-> declination (theta1), z1->cz (cz1) + //x2->ra (ph2), y2-> declination (theta2), z2->cz (cz2) + PyArrayObject *x1_obj=NULL, *y1_obj=NULL, *z1_obj=NULL, *weights1_obj=NULL; + PyArrayObject *x2_obj=NULL, *y2_obj=NULL, *z2_obj=NULL, *weights2_obj=NULL; + + struct config_options options = get_config_options(); + options.is_comoving_dist = 0; + options.verbose = 0; + options.instruction_set = -1; + options.periodic = 0; + options.fast_divide=0; + options.c_api_timer = 0; + int8_t xbin_ref=options.bin_refine_factors[0], + ybin_ref=options.bin_refine_factors[1], + zbin_ref=options.bin_refine_factors[2]; + + int autocorr=1; + int nthreads=4; + int cosmology=1; + int nmu_bins=10; + double mu_max=1.0; + char *binfile, *weighting_method_str = NULL; + + static char *kwlist[] = { + "autocorr", + "cosmology", + "nthreads", + "mu_max", + "nmu_bins", + "binfile", + "RA1", + "DEC1", + "CZ1", + "weights1", + "RA2", + "DEC2", + "CZ2", + "weights2", + "is_comoving_dist", + "verbose", /* keyword verbose -> print extra info at runtime + progressbar */ + "output_savg", + "fast_divide", + "xbin_refine_factor", + "ybin_refine_factor", + "zbin_refine_factor", + "max_cells_per_dim", + "c_api_timer", + "isa",/* instruction set to use of type enum isa; valid values are AVX, SSE, FALLBACK (enum) */ + "weight_type", + NULL + }; + + if ( ! PyArg_ParseTupleAndKeywords(args, kwargs, "iiidisO!O!O!|O!O!O!O!O!bbbbbbbhbis", kwlist, + &autocorr,&cosmology,&nthreads,&mu_max,&nmu_bins,&binfile, + &PyArray_Type,&x1_obj, + &PyArray_Type,&y1_obj, + &PyArray_Type,&z1_obj, + &PyArray_Type,&weights1_obj, + &PyArray_Type,&x2_obj,//optional parameters -> if autocorr == 1, not checked; required if autocorr=0 + &PyArray_Type,&y2_obj, + &PyArray_Type,&z2_obj, + &PyArray_Type,&weights2_obj, + &(options.is_comoving_dist), + &(options.verbose), + &(options.need_avg_sep), + &(options.fast_divide), + &xbin_ref, &ybin_ref, &zbin_ref, + &(options.max_cells_per_dim), + &(options.c_api_timer), + &(options.instruction_set), + &weighting_method_str) + + ) { + + PyObject_Print(kwargs, stdout, 0); + fprintf(stdout, "\n"); + + char msg[1024]; + int len=snprintf(msg, 1024,"ArgumentError: In DDsmu_mocks> Could not parse the arguments. Input parameters are: \n"); + + /* How many keywords do we have? Subtract 1 because of the last NULL */ + const size_t nitems = sizeof(kwlist)/sizeof(*kwlist) - 1; + int status = print_kwlist_into_msg(msg, 1024, len, kwlist, nitems); + if(status != EXIT_SUCCESS) { + fprintf(stderr,"Error message does not contain all of the keywords\n"); + } + + countpairs_mocks_error_out(module,msg); + + Py_RETURN_NONE; + } + + /*This is for the fastest isa */ + if(options.instruction_set == -1) { + options.instruction_set = highest_isa_mocks; + } + if(xbin_ref != options.bin_refine_factors[0] || + ybin_ref != options.bin_refine_factors[1] || + zbin_ref != options.bin_refine_factors[2]) { + options.bin_refine_factors[0] = xbin_ref; + options.bin_refine_factors[1] = ybin_ref; + options.bin_refine_factors[2] = zbin_ref; + set_bin_refine_scheme(&options, BINNING_CUST);//custom binning -> code will honor requested binning scheme + } + /* We have numpy arrays and all the required inputs*/ + /* How many data points are there? And are they all of floating point type */ + size_t element_size; + const int64_t ND1 = check_dims_and_datatype(module, x1_obj, y1_obj, z1_obj, weights1_obj, &element_size); + if(ND1 == -1) { + //Error has already been set -> simply return + Py_RETURN_NONE; + } + + /* Ensure the weights are of the right shape (n_weights, n_particles) */ + if(weights1_obj != NULL){ + // A numpy dimension of length -1 will be expanded to n_weights + npy_intp dims[2] = {-1, ND1}; + PyArray_Dims pdims = {.ptr = &(dims[0]), .len = 2}; + weights1_obj = (PyArrayObject *) PyArray_Newshape(weights1_obj, &pdims, NPY_CORDER); + } + + /* Validate the user's choice of weighting method */ + weight_method_t weighting_method; + int wstatus = get_weight_method_by_name(weighting_method_str, &weighting_method); + if(wstatus != EXIT_SUCCESS){ + char msg[1024]; + snprintf(msg, 1024, "ValueError: In %s: unknown weight_type %s!", __FUNCTION__, weighting_method_str); + countpairs_mocks_error_out(module, msg); + Py_RETURN_NONE; + } + int found_weights = weights1_obj == NULL ? 0 : PyArray_SHAPE(weights1_obj)[0]; + struct extra_options extra = get_extra_options(weighting_method); + if(extra.weights0.num_weights > 0 && extra.weights0.num_weights != found_weights){ + char msg[1024]; + snprintf(msg, 1024, "ValueError: In %s: specified weighting method %s which requires %"PRId64" weight(s)-per-particle, but found %d weight(s) instead!\n", + __FUNCTION__, weighting_method_str, extra.weights0.num_weights, found_weights); + countpairs_mocks_error_out(module, msg); + Py_RETURN_NONE; + } + + if(extra.weights0.num_weights > 0 && found_weights > MAX_NUM_WEIGHTS){ + char msg[1024]; + snprintf(msg, 1024, "ValueError: In %s: Provided %d weights-per-particle, but the code was compiled with MAX_NUM_WEIGHTS=%d.\n", + __FUNCTION__, found_weights, MAX_NUM_WEIGHTS); + countpairs_mocks_error_out(module, msg); + Py_RETURN_NONE; + } + + int64_t ND2 = ND1; + if(autocorr == 0) { + char msg[1024]; + if(x2_obj == NULL || y2_obj == NULL || z2_obj == NULL) { + snprintf(msg, 1024, "ValueError: In %s: If autocorr is 0, need to pass the second set of positions (X2=numpy array, Y2=numpy array, Z2=numpy array).\n", + __FUNCTION__); + countpairs_mocks_error_out(module, msg); + Py_RETURN_NONE; + } + if((weights1_obj == NULL) != (weights2_obj == NULL)){ + snprintf(msg, 1024, "ValueError: In %s: If autocorr is 0, must pass either zero or two sets of weights.\n", + __FUNCTION__); + countpairs_mocks_error_out(module, msg); + Py_RETURN_NONE; + } + + size_t element_size2; + ND2 = check_dims_and_datatype(module, x2_obj, y2_obj, z2_obj, weights2_obj, &element_size2); + if(ND2 == -1) { + //Error has already been set -> simply return + Py_RETURN_NONE; + } + /* Ensure the weights are of the right shape (n_weights, n_particles) */ + if(weights2_obj != NULL){ + npy_intp dims[2] = {-1, ND2}; + PyArray_Dims pdims = {.ptr = &(dims[0]), .len = 2}; + weights2_obj = (PyArrayObject *) PyArray_Newshape(weights2_obj, &pdims, NPY_CORDER); + } + + if(element_size != element_size2) { + snprintf(msg, 1024, "TypeError: In %s: The two arrays must have the same data-type. First array is of type %s while second array is of type %s\n", + __FUNCTION__, element_size == 4 ? "floats":"doubles", element_size2 == 4 ? "floats":"doubles"); + countpairs_mocks_error_out(module, msg); + Py_RETURN_NONE; + } + } + + /* Interpret the input objects as numpy arrays. */ + const int requirements = NPY_ARRAY_IN_ARRAY; + PyObject *x1_array = NULL, *y1_array = NULL, *z1_array = NULL, *weights1_array = NULL; + PyObject *x2_array = NULL, *y2_array = NULL, *z2_array = NULL, *weights2_array = NULL; + x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); + y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); + z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + if(weights1_obj != NULL){ + weights1_array = PyArray_FromArray(weights1_obj, NOTYPE_DESCR, requirements); + } + + if(autocorr == 0) { + x2_array = PyArray_FromArray(x2_obj, NOTYPE_DESCR, requirements); + y2_array = PyArray_FromArray(y2_obj, NOTYPE_DESCR, requirements); + z2_array = PyArray_FromArray(z2_obj, NOTYPE_DESCR, requirements); + if(weights2_obj != NULL){ + weights2_array = PyArray_FromArray(weights2_obj, NOTYPE_DESCR, requirements); + } + } + + if (x1_array == NULL || y1_array == NULL || z1_array == NULL || + (autocorr == 0 && (x2_array == NULL || y2_array == NULL || z2_array == NULL))) { + Py_XDECREF(x1_array); + Py_XDECREF(y1_array); + Py_XDECREF(z1_array); + Py_XDECREF(weights1_array); + + Py_XDECREF(x2_array); + Py_XDECREF(y2_array); + Py_XDECREF(z2_array); + Py_XDECREF(weights2_array); + char msg[1024]; + snprintf(msg, 1024, "TypeError: In %s: Could not convert input to arrays of allowed floating point types (doubles or floats). Are you passing numpy arrays?", + __FUNCTION__); + countpairs_mocks_error_out(module, msg); + Py_RETURN_NONE; + } + + /* Get pointers to the data as C-types. */ + void *phiD1=NULL, *thetaD1=NULL, *czD1=NULL, *weights1=NULL; + void *phiD2=NULL, *thetaD2=NULL, *czD2=NULL, *weights2=NULL; + + phiD1 = PyArray_DATA((PyArrayObject *)x1_array); + thetaD1 = PyArray_DATA((PyArrayObject *)y1_array); + czD1 = PyArray_DATA((PyArrayObject *)z1_array); + if(weights1_array != NULL){ + weights1 = PyArray_DATA((PyArrayObject *) weights1_array); + } + + if(autocorr == 0) { + phiD2 = PyArray_DATA((PyArrayObject *) x2_array); + thetaD2 = PyArray_DATA((PyArrayObject *) y2_array); + czD2 = PyArray_DATA((PyArrayObject *) z2_array); + if(weights2_array != NULL){ + weights2 = PyArray_DATA((PyArrayObject *) weights2_array); + } + } + options.float_type = element_size; + + /* Pack the weights into extra_options */ + for(int64_t w = 0; w < extra.weights0.num_weights; w++){ + extra.weights0.weights[w] = (char *) weights1 + w*ND1*element_size; + if(autocorr == 0){ + extra.weights1.weights[w] = (char *) weights2 + w*ND2*element_size; + } + } + + NPY_BEGIN_THREADS_DEF; + NPY_BEGIN_THREADS; + + results_countpairs_mocks_s_mu results; + double c_api_time = 0.0; + int status = countpairs_mocks_s_mu(ND1,phiD1,thetaD1,czD1, + ND2,phiD2,thetaD2,czD2, + nthreads, + autocorr, + binfile, + mu_max, + nmu_bins, + cosmology, + &results, + &options, + &extra); + if(options.c_api_timer) { + c_api_time = options.c_api_time; + } + NPY_END_THREADS; + + /* Clean up. */ + Py_DECREF(x1_array);Py_DECREF(y1_array);Py_DECREF(z1_array);Py_XDECREF(weights1_array);//x1 should absolutely not be NULL + Py_XDECREF(x2_array);Py_XDECREF(y2_array);Py_XDECREF(z2_array);Py_XDECREF(weights2_array);//x2 might be NULL depending on value of autocorr + + if(status != EXIT_SUCCESS) { + Py_RETURN_NONE; + } + + +#if 0 + /* Output pairs*/ + for(int i=1;i stored in struct module_state _struct declared at the top of this file #if PY_MAJOR_VERSION < 3 (void) self; - PyObject *module = NULL;//should not be used -> setting to NULL so any attempts to dereference will result in a crash. + PyObject *module = NULL;//should not be used -> setting to NULL so any attempts to dereference will result in a crash. #else //In python3, self is simply the module object that was returned earlier by init PyObject *module = self; -#endif +#endif PyArrayObject *x1_obj=NULL, *y1_obj=NULL, *weights1_obj=NULL; PyArrayObject *x2_obj=NULL, *y2_obj=NULL, *weights2_obj=NULL; @@ -1272,7 +1756,7 @@ static PyObject *countpairs_countpairs_theta_mocks(PyObject *self, PyObject *arg &(options.verbose), &(options.need_avg_sep), &(options.fast_acos), - &ra_bin_ref, &dec_bin_ref, + &ra_bin_ref, &dec_bin_ref, &(options.max_cells_per_dim), &(options.c_api_timer), &(options.instruction_set), @@ -1284,7 +1768,7 @@ static PyObject *countpairs_countpairs_theta_mocks(PyObject *self, PyObject *arg char msg[1024]; int len=snprintf(msg, 1024,"ArgumentError: In DDtheta_mocks> Could not parse the arguments. Input parameters are: \n"); - + /* How many keywords do we have? Subtract 1 because of the last NULL */ const size_t nitems = sizeof(kwlist)/sizeof(*kwlist) - 1; int status = print_kwlist_into_msg(msg, 1024, len, kwlist, nitems); @@ -1308,7 +1792,7 @@ static PyObject *countpairs_countpairs_theta_mocks(PyObject *self, PyObject *arg set_bin_refine_scheme(&options, BINNING_CUST);//custom binning -> code will honor requested binning scheme } - + size_t element_size; /* We have numpy arrays and all the required inputs*/ /* How many data points are there? And are they all of floating point type */ @@ -1317,7 +1801,7 @@ static PyObject *countpairs_countpairs_theta_mocks(PyObject *self, PyObject *arg //Error has already been set -> simply return Py_RETURN_NONE; } - + /* Ensure the weights are of the right shape (n_weights, n_particles) */ if(weights1_obj != NULL){ // A numpy dimension of length -1 will be expanded to n_weights @@ -1325,7 +1809,7 @@ static PyObject *countpairs_countpairs_theta_mocks(PyObject *self, PyObject *arg PyArray_Dims pdims = {.ptr = &(dims[0]), .len = 2}; weights1_obj = (PyArrayObject *) PyArray_Newshape(weights1_obj, &pdims, NPY_CORDER); } - + /* Validate the user's choice of weighting method */ weight_method_t weighting_method; int wstatus = get_weight_method_by_name(weighting_method_str, &weighting_method); @@ -1344,7 +1828,7 @@ static PyObject *countpairs_countpairs_theta_mocks(PyObject *self, PyObject *arg countpairs_mocks_error_out(module, msg); Py_RETURN_NONE; } - + if(extra.weights0.num_weights > 0 && found_weights > MAX_NUM_WEIGHTS){ char msg[1024]; snprintf(msg, 1024, "ValueError: In %s: Provided %d weights-per-particle, but the code was compiled with MAX_NUM_WEIGHTS=%d.\n", @@ -1368,11 +1852,11 @@ static PyObject *countpairs_countpairs_theta_mocks(PyObject *self, PyObject *arg countpairs_mocks_error_out(module, msg); Py_RETURN_NONE; } - + size_t element_size2; ND2 = check_dims_and_datatype_ra_dec(module, x2_obj, y2_obj,&element_size2); if(ND2 == -1) { - //Error has already been set -> simply return + //Error has already been set -> simply return Py_RETURN_NONE; } /* Ensure the weights are of the right shape (n_weights, n_particles) */ @@ -1381,7 +1865,7 @@ static PyObject *countpairs_countpairs_theta_mocks(PyObject *self, PyObject *arg PyArray_Dims pdims = {.ptr = &(dims[0]), .len = 2}; weights2_obj = (PyArrayObject *) PyArray_Newshape(weights2_obj, &pdims, NPY_CORDER); } - + if(element_size != element_size2) { snprintf(msg, 1024, "TypeError: In %s: The two arrays must have the same data-type. First array is of type %s while second array is of type %s\n", __FUNCTION__, element_size == 4 ? "floats":"doubles", element_size2 == 4 ? "floats":"doubles"); @@ -1389,7 +1873,7 @@ static PyObject *countpairs_countpairs_theta_mocks(PyObject *self, PyObject *arg Py_RETURN_NONE; } } - + /* Interpret the input objects as numpy arrays. */ const int requirements = NPY_ARRAY_IN_ARRAY; PyObject *x1_array = NULL, *y1_array = NULL, *weights1_array = NULL; @@ -1408,7 +1892,7 @@ static PyObject *countpairs_countpairs_theta_mocks(PyObject *self, PyObject *arg } } - if (x1_array == NULL || y1_array == NULL || + if (x1_array == NULL || y1_array == NULL || (autocorr == 0 && (x2_array == NULL || y2_array == NULL))) { Py_XDECREF(x1_array); Py_XDECREF(y1_array); @@ -1427,7 +1911,7 @@ static PyObject *countpairs_countpairs_theta_mocks(PyObject *self, PyObject *arg /* Get pointers to the data as C-types. */ void *phiD1 = NULL, *thetaD1 = NULL, *weights1=NULL; void *phiD2 = NULL, *thetaD2 = NULL, *weights2=NULL; - phiD1 = PyArray_DATA((PyArrayObject *) x1_array); + phiD1 = PyArray_DATA((PyArrayObject *) x1_array); thetaD1 = PyArray_DATA((PyArrayObject *) y1_array); if(weights1_array != NULL){ weights1 = PyArray_DATA((PyArrayObject *) weights1_array); @@ -1440,7 +1924,7 @@ static PyObject *countpairs_countpairs_theta_mocks(PyObject *self, PyObject *arg weights2 = PyArray_DATA((PyArrayObject *) weights2_array); } } - + /* Pack the weights into extra_options */ for(int64_t w = 0; w < extra.weights0.num_weights; w++){ extra.weights0.weights[w] = (char *) weights1 + w*ND1*element_size; @@ -1468,7 +1952,7 @@ static PyObject *countpairs_countpairs_theta_mocks(PyObject *self, PyObject *arg } NPY_END_THREADS; - + /* Clean up. */ Py_DECREF(x1_array);Py_DECREF(y1_array);Py_XDECREF(weights1_array);//x1/y1 (representing ra1,dec1) should not be NULL Py_XDECREF(x2_array);Py_XDECREF(y2_array);Py_XDECREF(weights2_array);//x2/y2 may be NULL (in case of autocorr) @@ -1476,7 +1960,7 @@ static PyObject *countpairs_countpairs_theta_mocks(PyObject *self, PyObject *arg if(status != EXIT_SUCCESS) { Py_RETURN_NONE; } - + #if 0 /*---Output-Pairs-------------------------------------*/ double theta_low = results.theta_upp[0]; @@ -1509,11 +1993,11 @@ static PyObject *countpairs_countspheres_vpf_mocks(PyObject *self, PyObject *arg //Error-handling is global in python2 -> stored in struct module_state _struct declared at the top of this file #if PY_MAJOR_VERSION < 3 (void) self; - PyObject *module = NULL;//should not be used -> setting to NULL so any attempts to dereference will result in a crash. + PyObject *module = NULL;//should not be used -> setting to NULL so any attempts to dereference will result in a crash. #else //In python3, self is simply the module object that was returned earlier by init PyObject *module = self; -#endif +#endif //x1->ra (phi), y1-> declination (theta1), z1->cz (cz1) //x2->ra (ph2), y2-> declination (theta2), z2->cz (cz2) @@ -1536,7 +2020,7 @@ static PyObject *countpairs_countspheres_vpf_mocks(PyObject *self, PyObject *arg int8_t xbin_ref=options.bin_refine_factors[0], ybin_ref=options.bin_refine_factors[1], zbin_ref=options.bin_refine_factors[2]; - + static char *kwlist[] = { "rmax", "nbins", @@ -1585,7 +2069,7 @@ static PyObject *countpairs_countspheres_vpf_mocks(PyObject *self, PyObject *arg char msg[1024]; int len=snprintf(msg, 1024,"ArgumentError: In vpf_mocks> Could not parse the arguments. Input parameters are: \n"); - + /* How many keywords do we have? Subtract 1 because of the last NULL */ const size_t nitems = sizeof(kwlist)/sizeof(*kwlist) - 1; int status = print_kwlist_into_msg(msg, 1024, len, kwlist, nitems); @@ -1593,7 +2077,7 @@ static PyObject *countpairs_countspheres_vpf_mocks(PyObject *self, PyObject *arg fprintf(stderr,"Error message does not contain all of the keywords\n"); } countpairs_mocks_error_out(module,msg); - + Py_RETURN_NONE; } /*This is for the fastest isa */ @@ -1609,20 +2093,20 @@ static PyObject *countpairs_countspheres_vpf_mocks(PyObject *self, PyObject *arg options.bin_refine_factors[2] = zbin_ref; set_bin_refine_scheme(&options, BINNING_CUST);//custom binning -> code will honor requested binning scheme } - + size_t element_size; /* We have numpy arrays and all the required inputs*/ /* How many data points are there? And are they all of floating point type */ const int64_t ND1 = check_dims_and_datatype(module, x1_obj, y1_obj, z1_obj, NULL, &element_size); if(ND1 == -1) { - //Error has already been set -> simply return + //Error has already been set -> simply return Py_RETURN_NONE; } size_t element_size2; const int64_t ND2 = check_dims_and_datatype(module, x2_obj, y2_obj, z2_obj, NULL, &element_size2); if(ND2 == -1) { - //Error has already been set -> simply return + //Error has already been set -> simply return Py_RETURN_NONE; } @@ -1634,7 +2118,7 @@ static PyObject *countpairs_countspheres_vpf_mocks(PyObject *self, PyObject *arg Py_RETURN_NONE; } - + /* Interpret the input objects as numpy arrays. */ const int requirements = NPY_ARRAY_IN_ARRAY; PyObject *x1_array = NULL, *y1_array = NULL, *z1_array = NULL; @@ -1666,7 +2150,7 @@ static PyObject *countpairs_countspheres_vpf_mocks(PyObject *self, PyObject *arg /* Get pointers to the data as C-types. */ void *phiD1=NULL, *thetaD1=NULL,*czD1=NULL; void *phiD2=NULL, *thetaD2=NULL,*czD2=NULL; - + phiD1 = PyArray_DATA((PyArrayObject *) x1_array); thetaD1 = PyArray_DATA((PyArrayObject *) y1_array); czD1 = PyArray_DATA((PyArrayObject *) z1_array); @@ -1677,7 +2161,7 @@ static PyObject *countpairs_countspheres_vpf_mocks(PyObject *self, PyObject *arg NPY_BEGIN_THREADS_DEF; NPY_BEGIN_THREADS; - + results_countspheres_mocks results; options.float_type = element_size; double c_api_time = 0.0; @@ -1702,7 +2186,7 @@ static PyObject *countpairs_countspheres_vpf_mocks(PyObject *self, PyObject *arg if(status != EXIT_SUCCESS) { Py_RETURN_NONE; } - + #if 0 // Output the results const double rstep = rmax/(double)nbin ; diff --git a/mocks/python_bindings/call_correlation_functions_mocks.py b/mocks/python_bindings/call_correlation_functions_mocks.py index 5fe5d344..ede6fa47 100644 --- a/mocks/python_bindings/call_correlation_functions_mocks.py +++ b/mocks/python_bindings/call_correlation_functions_mocks.py @@ -15,7 +15,8 @@ from _countpairs_mocks import \ countpairs_rp_pi_mocks as rp_pi_mocks,\ countpairs_theta_mocks as theta_mocks,\ - countspheres_vpf_mocks as vpf_mocks + countspheres_vpf_mocks as vpf_mocks, \ + countpairs_s_mu_mocks as s_mu_mocks try: @@ -80,7 +81,7 @@ def main(): autocorr = 1 numbins_to_print = 5 cosmology = 1 - + print("\nRunning 2-D correlation function xi(rp,pi)") results_DDrppi, _ = rp_pi_mocks(autocorr, cosmology, nthreads, pimax, binfile, @@ -115,9 +116,29 @@ def main(): items = results_DDrppi[ibin] print("{0:12.4f} {1:12.4f} {2:10.4f} {3:10.1f} {4:10d}" .format(items[0], items[1], items[2], items[3], items[4])) - + print("-----------------------------------------------------------") - + + nmu_bins = 10 + mu_max = 1.0 + + print("\nRunning 2-D correlation function xi(s,mu)") + results_DDsmu, _ = s_mu_mocks(autocorr, cosmology, nthreads, + mu_max, nmu_bins, binfile, + ra, dec, cz, weights1=weights, + output_savg=True, verbose=True, + weight_type='pair_product') + print("\n# ****** DD(s,mu): first {0} bins ******* " + .format(numbins_to_print)) + print("# smin smax savg mu_upper npairs weight_avg") + print("##########################################################################") + for ibin in range(numbins_to_print): + items = results_DDsmu[ibin] + print("{0:12.4f} {1:12.4f} {2:10.4f} {3:10.1f} {4:10d} {5:12.4f}" + .format(items[0], items[1], items[2], items[3], items[4], items[5])) + + print("--------------------------------------------------------------------------") + binfile = pjoin(dirname(abspath(__file__)), "../tests/", "angular_bins") print("\nRunning angular correlation function w(theta)") diff --git a/mocks/tests/Makefile b/mocks/tests/Makefile index 6300a11c..05c04d7d 100644 --- a/mocks/tests/Makefile +++ b/mocks/tests/Makefile @@ -8,10 +8,12 @@ IO_DIR := $(ROOT_DIR)/io MOCKS_DIR := $(ROOT_DIR)/mocks DDrppi_mocks_DIR := $(MOCKS_DIR)/DDrppi_mocks +DDsmu_mocks_DIR := $(MOCKS_DIR)/DDsmu_mocks DDtheta_mocks_DIR := $(MOCKS_DIR)/DDtheta_mocks VPF_mocks_DIR := $(MOCKS_DIR)/vpf_mocks DDrppi_mocks_LIB := countpairs_rp_pi_mocks +DDsmu_mocks_LIB := countpairs_s_mu_mocks DDtheta_mocks_LIB := countpairs_theta_mocks VPF_mocks_LIB := countspheres_mocks @@ -28,14 +30,15 @@ endif TARGETSRC := tests_mocks.c $(IO_DIR)/io.c $(IO_DIR)/ftread.c $(UTILS_DIR)/utils.c $(UTILS_DIR)/cosmology_params.c TARGETOBJS := $(TARGETSRC:.c=.o) C_LIBRARIES := $(DDrppi_mocks_DIR)/lib$(DDrppi_mocks_LIB).a $(DDtheta_mocks_DIR)/lib$(DDtheta_mocks_LIB).a \ - $(VPF_mocks_DIR)/lib$(VPF_mocks_LIB).a -INCL := $(IO_DIR)/io.h $(IO_DIR)/ftread.h $(UTILS_DIR)/utils.h \ - $(DDrppi_mocks_DIR)/$(DDrppi_mocks_LIB).h $(DDtheta_mocks_DIR)/$(DDtheta_mocks_LIB).h $(VPF_mocks_DIR)/$(VPF_mocks_LIB).h + $(VPF_mocks_DIR)/lib$(VPF_mocks_LIB).a $(DDsmu_mocks_DIR)/lib$(DDsmu_mocks_LIB).a +INCL := $(IO_DIR)/io.h $(IO_DIR)/ftread.h $(UTILS_DIR)/utils.h $(UTILS_DIR)/tests_common.h \ + $(DDrppi_mocks_DIR)/$(DDrppi_mocks_LIB).h $(DDtheta_mocks_DIR)/$(DDtheta_mocks_LIB).h $(VPF_mocks_DIR)/$(VPF_mocks_LIB).h \ + $(DDsmu_mocks_DIR)/$(DDsmu_mocks_LIB).h -EXTRA_INCL:=-DDOUBLE_PREC -I$(DDrppi_mocks_DIR) -I$(DDtheta_mocks_DIR) -I$(VPF_mocks_DIR) $(GSL_CFLAGS) +EXTRA_INCL:=-DDOUBLE_PREC -I$(DDrppi_mocks_DIR) -I$(DDtheta_mocks_DIR) -I$(VPF_mocks_DIR) -I$(DDsmu_mocks_DIR) $(GSL_CFLAGS) EXTRA_LINK := $(GSL_LINK) -OPT := +OPT := all: tests $(TARGETS) $(INCL) uncompress $(ROOT_DIR)/mocks.options $(ROOT_DIR)/common.mk Makefile @@ -45,6 +48,9 @@ UTILS_SRC := $(UTILS_DIR)/*.[ch] $(UTILS_DIR)/*.c.src $(UTILS_DIR)/*.h.src $(DDrppi_mocks_DIR)/lib$(DDrppi_mocks_LIB).a: $(DDrppi_mocks_DIR)/*.c $(DDrppi_mocks_DIR)/*.c.src $(DDrppi_mocks_DIR)/*.h.src $(ROOT_DIR)/mocks.options $(ROOT_DIR)/common.mk $(UTILS_SRC) $(MAKE) -C $(DDrppi_mocks_DIR) libs +$(DDsmu_mocks_DIR)/lib$(DDsmu_mocks_LIB).a: $(DDsmu_mocks_DIR)/*.c $(DDsmu_mocks_DIR)/*.c.src $(DDsmu_mocks_DIR)/*.h.src $(ROOT_DIR)/mocks.options $(ROOT_DIR)/common.mk $(UTILS_SRC) + $(MAKE) -C $(DDsmu_mocks_DIR) libs + $(DDtheta_mocks_DIR)/lib$(DDtheta_mocks_LIB).a: $(DDtheta_mocks_DIR)/*.c $(DDtheta_mocks_DIR)/*.c.src $(DDtheta_mocks_DIR)/*.h.src $(ROOT_DIR)/mocks.options $(ROOT_DIR)/common.mk $(UTILS_SRC) $(MAKE) -C $(DDtheta_mocks_DIR) libs @@ -52,9 +58,9 @@ $(VPF_mocks_DIR)/lib$(VPF_mocks_LIB).a: $(VPF_mocks_DIR)/*.c $(VPF_mocks_DIR)/*. $(MAKE) -C $(VPF_mocks_DIR) libs python_lib: tests $(TARGETOBJS) $(INCL) $(ROOT_DIR)/mocks.options $(ROOT_DIR)/common.mk Makefile | $(ROOT_DIR)/lib - @echo + @echo @echo "All MOCKS tests are done. Now checking that the C extensions work." - @echo + @echo $(MAKE) -C ../python_bindings tests tests: $(TARGET) @@ -69,14 +75,17 @@ uncompress: | data cd ..; \ } DDrppi_mocks: $(TARGET) - ./$(TARGET) 0 3 + ./$(TARGET) 0 3 DDtheta_mocks: tests_mocks - ./$(TARGET) 1 4 + ./$(TARGET) 1 4 vpf_mocks: tests_mocks ./$(TARGET) 2 5 +DDsmu_mocks: tests_mocks + ./$(TARGET) 6 7 + clean: $(RM) $(TARGETS) $(TARGETOBJS) $(RM) -R *.dSYM diff --git a/mocks/tests/Mr19_mock_DDsmu.DR b/mocks/tests/Mr19_mock_DDsmu.DR new file mode 100644 index 00000000..5dc7f136 --- /dev/null +++ b/mocks/tests/Mr19_mock_DDsmu.DR @@ -0,0 +1,140 @@ + 16 0.12731059 -0.82871857 0.10000000 0.20633790 + 11 0.12515353 -0.82871857 0.20000000 0.15051830 + 18 0.12961133 -0.82871857 0.30000000 0.29915851 + 11 0.12919907 -0.82871857 0.40000000 0.36693781 + 8 0.13127004 -0.82871857 0.50000000 0.18077128 + 11 0.12755128 -0.82871857 0.60000000 0.19729862 + 15 0.12823310 -0.82871857 0.70000000 0.30941661 + 8 0.13026533 -0.82871857 0.80000000 0.38308526 + 11 0.12652695 -0.82871857 0.90000000 0.27457505 + 11 0.12088876 -0.82871857 1.00000000 0.27030041 + 44 0.18988933 -0.65743714 0.10000000 0.25046043 + 40 0.18934067 -0.65743714 0.20000000 0.25819683 + 46 0.18927409 -0.65743714 0.30000000 0.19757136 + 23 0.18814162 -0.65743714 0.40000000 0.28705438 + 44 0.18967457 -0.65743714 0.50000000 0.23617978 + 37 0.18986317 -0.65743714 0.60000000 0.22119408 + 33 0.18751866 -0.65743714 0.70000000 0.21886382 + 49 0.19007635 -0.65743714 0.80000000 0.21821130 + 54 0.18674075 -0.65743714 0.90000000 0.27237927 + 36 0.19187276 -0.65743714 1.00000000 0.24887600 + 144 0.27963534 -0.48615571 0.10000000 0.25038225 + 155 0.28616186 -0.48615571 0.20000000 0.23707988 + 163 0.28191365 -0.48615571 0.30000000 0.26649402 + 120 0.27891034 -0.48615571 0.40000000 0.24197924 + 135 0.28152441 -0.48615571 0.50000000 0.27956477 + 166 0.28448131 -0.48615571 0.60000000 0.25379995 + 124 0.27656690 -0.48615571 0.70000000 0.22831096 + 149 0.27609419 -0.48615571 0.80000000 0.24063340 + 128 0.27998106 -0.48615571 0.90000000 0.27801332 + 143 0.27705729 -0.48615571 1.00000000 0.26451608 + 508 0.41566032 -0.31487428 0.10000000 0.25759224 + 445 0.41393342 -0.31487428 0.20000000 0.25868560 + 444 0.41368517 -0.31487428 0.30000000 0.25914849 + 474 0.41476120 -0.31487428 0.40000000 0.24918414 + 447 0.41821551 -0.31487428 0.50000000 0.24930410 + 458 0.41612023 -0.31487428 0.60000000 0.26149722 + 401 0.41942983 -0.31487428 0.70000000 0.24346060 + 476 0.41491533 -0.31487428 0.80000000 0.26839799 + 450 0.41631213 -0.31487428 0.90000000 0.25158405 + 456 0.41624403 -0.31487428 1.00000000 0.25803716 + 1505 0.61603708 -0.14359285 0.10000000 0.25254030 + 1504 0.61662759 -0.14359285 0.20000000 0.25054681 + 1440 0.61864632 -0.14359285 0.30000000 0.25218452 + 1475 0.61395625 -0.14359285 0.40000000 0.24692343 + 1464 0.61714052 -0.14359285 0.50000000 0.25008302 + 1358 0.61534298 -0.14359285 0.60000000 0.25381702 + 1460 0.61634313 -0.14359285 0.70000000 0.25965402 + 1463 0.61748002 -0.14359285 0.80000000 0.24329196 + 1486 0.61420989 -0.14359285 0.90000000 0.25247011 + 1535 0.61364964 -0.14359285 1.00000000 0.25148934 + 4840 0.91599811 0.02768858 0.10000000 0.25299581 + 4778 0.91322204 0.02768858 0.20000000 0.24907554 + 4892 0.91413760 0.02768858 0.30000000 0.25405492 + 4755 0.91489691 0.02768858 0.40000000 0.24244488 + 4861 0.91404203 0.02768858 0.50000000 0.24703680 + 4849 0.91502518 0.02768858 0.60000000 0.25107696 + 4905 0.91465004 0.02768858 0.70000000 0.25486661 + 4808 0.91361794 0.02768858 0.80000000 0.24922259 + 4731 0.91424215 0.02768858 0.90000000 0.25047345 + 4779 0.91412279 0.02768858 1.00000000 0.25095504 + 15677 1.35610305 0.19897000 0.10000000 0.24670153 + 15526 1.35514701 0.19897000 0.20000000 0.24716871 + 15417 1.35534215 0.19897000 0.30000000 0.24755408 + 15465 1.35677595 0.19897000 0.40000000 0.25058873 + 15516 1.35614667 0.19897000 0.50000000 0.25110275 + 15343 1.35248044 0.19897000 0.60000000 0.24583133 + 15448 1.35441951 0.19897000 0.70000000 0.24819576 + 15314 1.35850655 0.19897000 0.80000000 0.24877819 + 15363 1.35733116 0.19897000 0.90000000 0.25047418 + 15323 1.35480003 0.19897000 1.00000000 0.24727734 + 50173 2.01189660 0.37025143 0.10000000 0.24871628 + 50333 2.01226591 0.37025143 0.20000000 0.25029286 + 49672 2.00924038 0.37025143 0.30000000 0.24951873 + 50105 2.01148289 0.37025143 0.40000000 0.25016451 + 50514 2.01265806 0.37025143 0.50000000 0.24985593 + 50145 2.01417889 0.37025143 0.60000000 0.24999501 + 49984 2.01375145 0.37025143 0.70000000 0.25029335 + 50498 2.01440817 0.37025143 0.80000000 0.25006703 + 50131 2.01264557 0.37025143 0.90000000 0.24950389 + 50712 2.01214364 0.37025143 1.00000000 0.24930654 + 163798 2.98425196 0.54153286 0.10000000 0.24966735 + 163542 2.98354034 0.54153286 0.20000000 0.24918066 + 162483 2.98380717 0.54153286 0.30000000 0.25002588 + 163085 2.98434898 0.54153286 0.40000000 0.24952674 + 162497 2.98289432 0.54153286 0.50000000 0.24977827 + 161895 2.98420528 0.54153286 0.60000000 0.24968099 + 161978 2.98355442 0.54153286 0.70000000 0.25013319 + 162020 2.98299090 0.54153286 0.80000000 0.25021899 + 162310 2.98442359 0.54153286 0.90000000 0.24973058 + 162560 2.98411141 0.54153286 1.00000000 0.24988413 + 528585 4.42722780 0.71281429 0.10000000 0.25005917 + 526245 4.42650446 0.71281429 0.20000000 0.24960208 + 523747 4.42770388 0.71281429 0.30000000 0.25009919 + 524827 4.42799042 0.71281429 0.40000000 0.24995922 + 523306 4.42656072 0.71281429 0.50000000 0.24934124 + 520819 4.42697410 0.71281429 0.60000000 0.24973925 + 522375 4.42750903 0.71281429 0.70000000 0.24927569 + 519933 4.42624285 0.71281429 0.80000000 0.24964784 + 519286 4.42590936 0.71281429 0.90000000 0.24948366 + 523998 4.42625008 0.71281429 1.00000000 0.24982650 + 1701198 6.56722610 0.88409572 0.10000000 0.24981610 + 1695386 6.56615839 0.88409572 0.20000000 0.24980648 + 1689614 6.56582680 0.88409572 0.30000000 0.24970345 + 1680288 6.56634252 0.88409572 0.40000000 0.24980833 + 1676873 6.56555037 0.88409572 0.50000000 0.24982931 + 1672186 6.56526366 0.88409572 0.60000000 0.25029098 + 1669435 6.56508268 0.88409572 0.70000000 0.24980584 + 1667573 6.56444316 0.88409572 0.80000000 0.24978917 + 1666891 6.56512622 0.88409572 0.90000000 0.24970372 + 1681090 6.56461210 0.88409572 1.00000000 0.24942779 + 5469907 9.73975928 1.05537715 0.10000000 0.24980874 + 5427364 9.73869893 1.05537715 0.20000000 0.25010323 + 5392991 9.73928456 1.05537715 0.30000000 0.24976474 + 5362761 9.73865074 1.05537715 0.40000000 0.25003968 + 5336546 9.73801205 1.05537715 0.50000000 0.24989101 + 5309251 9.73660850 1.05537715 0.60000000 0.24982844 + 5290896 9.73708529 1.05537715 0.70000000 0.25010795 + 5281317 9.73624504 1.05537715 0.80000000 0.24995278 + 5275222 9.73499203 1.05537715 0.90000000 0.24979796 + 5314662 9.73553057 1.05537715 1.00000000 0.24971061 + 17319173 14.44207574 1.22665858 0.10000000 0.24983025 + 17152369 14.43989969 1.22665858 0.20000000 0.24972279 + 16982951 14.43981743 1.22665858 0.30000000 0.24975144 + 16852909 14.43816283 1.22665858 0.40000000 0.24973681 + 16728133 14.43656638 1.22665858 0.50000000 0.24975790 + 16615986 14.43514019 1.22665858 0.60000000 0.24969757 + 16520907 14.43442137 1.22665858 0.70000000 0.24973680 + 16439429 14.43367852 1.22665858 0.80000000 0.24977122 + 16409485 14.43252612 1.22665858 0.90000000 0.24970919 + 16544115 14.43317964 1.22665858 1.00000000 0.24992302 + 53981727 21.41207942 1.39794001 0.10000000 0.24972265 + 53169238 21.40834689 1.39794001 0.20000000 0.24970470 + 52467621 21.40406125 1.39794001 0.30000000 0.24966018 + 51795544 21.40083803 1.39794001 0.40000000 0.24964408 + 51210464 21.39821490 1.39794001 0.50000000 0.24965978 + 50699567 21.39559861 1.39794001 0.60000000 0.24966266 + 50268928 21.39291448 1.39794001 0.70000000 0.24956103 + 49994143 21.39486900 1.39794001 0.80000000 0.24963318 + 49913169 21.39462559 1.39794001 0.90000000 0.24972569 + 50385465 21.39529041 1.39794001 1.00000000 0.24966321 diff --git a/mocks/tests/Mr19_mock_DDsmu.RR b/mocks/tests/Mr19_mock_DDsmu.RR new file mode 100644 index 00000000..89c3e16e --- /dev/null +++ b/mocks/tests/Mr19_mock_DDsmu.RR @@ -0,0 +1,140 @@ + 160 0.12628979 -0.82871857 0.10000000 0.21743459 + 128 0.12837521 -0.82871857 0.20000000 0.22917674 + 132 0.12957688 -0.82871857 0.30000000 0.24310458 + 192 0.12604276 -0.82871857 0.40000000 0.22955525 + 168 0.13041355 -0.82871857 0.50000000 0.24816036 + 144 0.12665805 -0.82871857 0.60000000 0.25716712 + 150 0.12622404 -0.82871857 0.70000000 0.24221825 + 150 0.12508100 -0.82871857 0.80000000 0.24376147 + 146 0.12730893 -0.82871857 0.90000000 0.23014688 + 176 0.12536130 -0.82871857 1.00000000 0.27060385 + 482 0.18942787 -0.65743714 0.10000000 0.23813889 + 474 0.18732210 -0.65743714 0.20000000 0.24467089 + 490 0.18882176 -0.65743714 0.30000000 0.24940495 + 454 0.18995518 -0.65743714 0.40000000 0.22977090 + 442 0.19047925 -0.65743714 0.50000000 0.25555259 + 488 0.19030407 -0.65743714 0.60000000 0.23769962 + 428 0.18830197 -0.65743714 0.70000000 0.26006574 + 422 0.18943326 -0.65743714 0.80000000 0.25545760 + 478 0.19054173 -0.65743714 0.90000000 0.25111605 + 474 0.18991154 -0.65743714 1.00000000 0.25105800 + 1484 0.27946766 -0.48615571 0.10000000 0.25082409 + 1438 0.28084465 -0.48615571 0.20000000 0.24232445 + 1460 0.27912751 -0.48615571 0.30000000 0.25310631 + 1506 0.27976848 -0.48615571 0.40000000 0.25017242 + 1490 0.28071458 -0.48615571 0.50000000 0.25109175 + 1546 0.28270309 -0.48615571 0.60000000 0.24910759 + 1468 0.28041406 -0.48615571 0.70000000 0.23917503 + 1490 0.28144473 -0.48615571 0.80000000 0.24959235 + 1492 0.28068602 -0.48615571 0.90000000 0.24560202 + 1352 0.27975949 -0.48615571 1.00000000 0.26223559 + 4768 0.41541954 -0.31487428 0.10000000 0.24492959 + 4956 0.41445371 -0.31487428 0.20000000 0.24907868 + 4932 0.41585921 -0.31487428 0.30000000 0.24676711 + 4808 0.41517653 -0.31487428 0.40000000 0.24578592 + 4978 0.41643178 -0.31487428 0.50000000 0.25628214 + 4816 0.41488549 -0.31487428 0.60000000 0.24999633 + 4854 0.41539651 -0.31487428 0.70000000 0.25658033 + 4898 0.41408620 -0.31487428 0.80000000 0.24757377 + 4838 0.41570765 -0.31487428 0.90000000 0.24527180 + 4902 0.41619938 -0.31487428 1.00000000 0.24264299 + 15732 0.61643753 -0.14359285 0.10000000 0.25292104 + 15504 0.61570594 -0.14359285 0.20000000 0.25387814 + 15904 0.61623943 -0.14359285 0.30000000 0.24902701 + 15700 0.61557887 -0.14359285 0.40000000 0.25024467 + 15624 0.61583671 -0.14359285 0.50000000 0.25071424 + 15570 0.61572044 -0.14359285 0.60000000 0.24510102 + 15746 0.61665016 -0.14359285 0.70000000 0.24785056 + 15804 0.61562741 -0.14359285 0.80000000 0.25135713 + 15830 0.61613368 -0.14359285 0.90000000 0.25063374 + 15718 0.61683780 -0.14359285 1.00000000 0.25100202 + 51812 0.91488229 0.02768858 0.10000000 0.25160493 + 51628 0.91454455 0.02768858 0.20000000 0.25105392 + 51392 0.91383275 0.02768858 0.30000000 0.25054706 + 51458 0.91474181 0.02768858 0.40000000 0.25359633 + 51510 0.91360431 0.02768858 0.50000000 0.24926571 + 51262 0.91525552 0.02768858 0.60000000 0.25006126 + 51450 0.91358300 0.02768858 0.70000000 0.24833332 + 51728 0.91458395 0.02768858 0.80000000 0.25058864 + 51704 0.91366245 0.02768858 0.90000000 0.24943424 + 51702 0.91548296 0.02768858 1.00000000 0.25058458 + 167810 1.35672520 0.19897000 0.10000000 0.24946337 + 167526 1.35600111 0.19897000 0.20000000 0.25016147 + 166978 1.35523003 0.19897000 0.30000000 0.25021493 + 167926 1.35605349 0.19897000 0.40000000 0.24952661 + 166768 1.35587951 0.19897000 0.50000000 0.24994968 + 168016 1.35681133 0.19897000 0.60000000 0.25027993 + 167298 1.35605243 0.19897000 0.70000000 0.25044787 + 167312 1.35605190 0.19897000 0.80000000 0.25107847 + 167906 1.35544064 0.19897000 0.90000000 0.25092785 + 168506 1.35656696 0.19897000 1.00000000 0.24894104 + 548394 2.01171852 0.37025143 0.10000000 0.24953462 + 546334 2.01199335 0.37025143 0.20000000 0.25077123 + 545538 2.01137662 0.37025143 0.30000000 0.25072295 + 543310 2.01199925 0.37025143 0.40000000 0.24916730 + 543920 2.01212113 0.37025143 0.50000000 0.25095361 + 542280 2.01140194 0.37025143 0.60000000 0.25080692 + 541306 2.01175064 0.37025143 0.70000000 0.25040784 + 541920 2.01151853 0.37025143 0.80000000 0.25023523 + 542020 2.01248167 0.37025143 0.90000000 0.24994036 + 545146 2.01203497 0.37025143 1.00000000 0.25020849 + 1768270 2.98415267 0.54153286 0.10000000 0.25006886 + 1762308 2.98429495 0.54153286 0.20000000 0.24985701 + 1763662 2.98471507 0.54153286 0.30000000 0.25041549 + 1757430 2.98413600 0.54153286 0.40000000 0.24999589 + 1751516 2.98453668 0.54153286 0.50000000 0.25001992 + 1749338 2.98420092 0.54153286 0.60000000 0.24972907 + 1748874 2.98467465 0.54153286 0.70000000 0.25019810 + 1749772 2.98389924 0.54153286 0.80000000 0.25010485 + 1750794 2.98361099 0.54153286 0.90000000 0.24981918 + 1757564 2.98452942 0.54153286 1.00000000 0.24969792 + 5707512 4.42654360 0.71281429 0.10000000 0.25031193 + 5690564 4.42654992 0.71281429 0.20000000 0.25020124 + 5669400 4.42686268 0.71281429 0.30000000 0.24997916 + 5653936 4.42656855 0.71281429 0.40000000 0.24999317 + 5639318 4.42601444 0.71281429 0.50000000 0.24992769 + 5628352 4.42607913 0.71281429 0.60000000 0.25015125 + 5620152 4.42638127 0.71281429 0.70000000 0.24997355 + 5618500 4.42564675 0.71281429 0.80000000 0.24994699 + 5625284 4.42559179 0.71281429 0.90000000 0.24989487 + 5649710 4.42599173 0.71281429 1.00000000 0.24991835 + 18367070 6.56656610 0.88409572 0.10000000 0.25007834 + 18265436 6.56639756 0.88409572 0.20000000 0.24998823 + 18187802 6.56562711 0.88409572 0.30000000 0.25012303 + 18108114 6.56540420 0.88409572 0.40000000 0.25021503 + 18036148 6.56545726 0.88409572 0.50000000 0.25010607 + 17968856 6.56496478 0.88409572 0.60000000 0.24997142 + 17937552 6.56470001 0.88409572 0.70000000 0.25005255 + 17909324 6.56448033 0.88409572 0.80000000 0.25011287 + 17917216 6.56436568 0.88409572 0.90000000 0.24998481 + 18012276 6.56435986 0.88409572 1.00000000 0.24995248 + 58768132 9.73925834 1.05537715 0.10000000 0.25010973 + 58317582 9.73862576 1.05537715 0.20000000 0.25010661 + 57885266 9.73757417 1.05537715 0.30000000 0.25005669 + 57491968 9.73672770 1.05537715 0.40000000 0.25001275 + 57177014 9.73627117 1.05537715 0.50000000 0.25006929 + 56896508 9.73533655 1.05537715 0.60000000 0.25009712 + 56640136 9.73511776 1.05537715 0.70000000 0.25001196 + 56498546 9.73470899 1.05537715 0.80000000 0.25000736 + 56459498 9.73377809 1.05537715 0.90000000 0.24999043 + 56883826 9.73435806 1.05537715 1.00000000 0.25002170 + 186432122 14.44310499 1.22665858 0.10000000 0.25004933 + 184233678 14.44135371 1.22665858 0.20000000 0.25002489 + 182290138 14.43972295 1.22665858 0.30000000 0.24994850 + 180433056 14.43783328 1.22665858 0.40000000 0.24998143 + 178833330 14.43601495 1.22665858 0.50000000 0.25003258 + 177435368 14.43435874 1.22665858 0.60000000 0.25001640 + 176203780 14.43298254 1.22665858 0.70000000 0.25001632 + 175369724 14.43202719 1.22665858 0.80000000 0.25000012 + 175082112 14.43180162 1.22665858 0.90000000 0.24996348 + 176546470 14.43210484 1.22665858 1.00000000 0.24994666 + 582460056 21.41432667 1.39794001 0.10000000 0.25001249 + 572504788 21.41001671 1.39794001 0.20000000 0.24998329 + 563314804 21.40574110 1.39794001 0.30000000 0.25001709 + 554941466 21.40174330 1.39794001 0.40000000 0.25000411 + 547346358 21.39784700 1.39794001 0.50000000 0.24997044 + 540582506 21.39487204 1.39794001 0.60000000 0.24997578 + 535047050 21.39250791 1.39794001 0.70000000 0.24997567 + 530980264 21.39004662 1.39794001 0.80000000 0.24997634 + 529103668 21.38854786 1.39794001 0.90000000 0.25001065 + 534167788 21.38951334 1.39794001 1.00000000 0.25001138 diff --git a/mocks/tests/tests_mocks.c b/mocks/tests/tests_mocks.c index f86770d4..2f5a15cd 100644 --- a/mocks/tests/tests_mocks.c +++ b/mocks/tests/tests_mocks.c @@ -6,36 +6,27 @@ directory at https://github.com/manodeep/Corrfunc/ */ -#include -#include -#include -#include -#include -#include - -#ifndef MAXLEN -#define MAXLEN 500 -#endif +#include "tests_common.h" +#include "io.h" +#include "utils.h" +#include "cosmology_params.h" #if !(defined(__INTEL_COMPILER)) && defined(USE_AVX) #warning Test suite for mocks are faster with Intel compiler, icc, AVX libraries. #endif -#include "defs.h" -#include "io.h" -#include "utils.h" -#include "cosmology_params.h" - #include "../DDrppi_mocks/countpairs_rp_pi_mocks.h" +#include "../DDsmu_mocks/countpairs_s_mu_mocks.h" #include "../DDtheta_mocks/countpairs_theta_mocks.h" #include "../vpf_mocks/countspheres_mocks.h" char tmpoutputfile[]="../tests/tests_mocks_output.txt"; int test_DDrppi_mocks(const char *correct_outputfile); -int test_wtheta_mocks(const char *correct_outputfile); +int test_DDtheta_mocks(const char *correct_outputfile); int test_vpf_mocks(const char *correct_outputfile); +int test_DDsmu_mocks(const char *correct_outputfile); void read_data_and_set_globals(const char *firstfilename, const char *firstformat,const char *secondfilename,const char *secondformat); @@ -46,38 +37,19 @@ double *RA1=NULL,*DEC1=NULL,*CZ1=NULL,*weights1=NULL; int ND2; double *RA2=NULL,*DEC2=NULL,*CZ2=NULL,*weights2=NULL; -char binfile[]="../tests/bins"; -char angular_binfile[]="../tests/angular_bins"; -double pimax=40.0; -double boxsize=420.0; -#if defined(_OPENMP) -const int nthreads=4; -#else -const int nthreads=1; -#endif const int cosmology_flag=1; char current_file1[MAXLEN],current_file2[MAXLEN]; -const double maxdiff = 1e-9; -const double maxreldiff = 1e-6; - struct config_options options; -const isa instruction_sets[] = {FALLBACK -#if defined(__SSE4_2__) - , SSE42 -#endif -#if defined(__AVX__) - , AVX -#endif -}; -const int num_isets = sizeof(instruction_sets)/sizeof(instruction_sets[0]); //end of global variables int test_DDrppi_mocks(const char *correct_outputfile) { + results_countpairs_mocks results; + int ret = EXIT_FAILURE; assert(RA1 != NULL && DEC1 != NULL && CZ1 != NULL && "ERROR: In test suite for DDrppi ra/dec/cz can not be NULL pointers"); int autocorr = (RA1==RA2) ? 1:0; - + // Set up the weights pointers weight_method_t weight_method = PAIR_PRODUCT; struct extra_options extra = get_extra_options(weight_method); @@ -85,66 +57,67 @@ int test_DDrppi_mocks(const char *correct_outputfile) extra.weights1.weights[0] = weights2; //Do DD(rp,pi) counts - results_countpairs_mocks results; - int status = countpairs_mocks(ND1,RA1,DEC1,CZ1, - ND2,RA2,DEC2,CZ2, - nthreads, - autocorr, - binfile, - pimax, - cosmology_flag, - &results, - &options, - &extra); - if(status != EXIT_SUCCESS) { - return status; - } + BEGIN_INTEGRATION_TEST_SECTION + int status = countpairs_mocks(ND1,RA1,DEC1,CZ1, + ND2,RA2,DEC2,CZ2, + nthreads, + autocorr, + binfile, + pimax, + cosmology_flag, + &results, + &options, + &extra); + if(status != EXIT_SUCCESS) { + return status; + } - int ret = EXIT_FAILURE; - FILE *fp=my_fopen(correct_outputfile,"r"); - if(fp == NULL) { - free_results_mocks(&results); - return EXIT_FAILURE; - } - const double dpi = pimax/(double)results.npibin ; - const int npibin = results.npibin; - for(int i=1;i brute-force // (dec_link, ra_link) == (1, 0) -> dec-linking only // (dec_link, ra_link) == (1, 1) -> dec + ra linking - for(int dec_link=0;dec_link<=1;dec_link++) { - for(int ra_link=0;ra_link <= dec_link; ra_link++) { - options.link_in_dec=dec_link; - options.link_in_ra=ra_link; - for(int bf=min_bin_ref;bf<=max_bin_ref;bf++) { - if((dec_link + ra_link) == 0 && bf > min_bin_ref) continue;//bin refine factor has no impact on brute-force - options.bin_refine_factors[0] = bf; - options.bin_refine_factors[1] = bf; - options.bin_refine_factors[2] = bf; - - // Check the specific implementations for each instruction set - for(int iset=0;iset only check brute-force once + if(dec_link == 0 && ra_link == 0 && (dec_bin_ref != min_bin_ref || ra_bin_ref != min_bin_ref)) continue; + + const int bf[] = {ra_bin_ref, dec_bin_ref, -1}; + set_custom_bin_refine_factors(&options, bf); + + options.link_in_dec=dec_link; + options.link_in_ra=ra_link; + + fprintf(stderr,"Running with dec-linking = %d ra-linking = %d bin-ref = (%d, %d) and instruction set = %s ", + dec_link, ra_link, + options.bin_refine_factors[0], + options.bin_refine_factors[1], + isa_name[iset]); + + current_utc_time(&t0); #else - options.link_in_dec = 1; - options.link_in_ra = 1; -#endif - - // Set up the weights pointers - weight_method_t weight_method = PAIR_PRODUCT; - struct extra_options extra = get_extra_options(weight_method); - extra.weights0.weights[0] = weights1; - extra.weights1.weights[0] = weights2; - int status = countpairs_theta_mocks(ND1,RA1,DEC1, - ND2,RA2,DEC2, - nthreads, - autocorr, - angular_binfile, - &results, - &options, - &extra); -#ifdef DEVELOPER_TESTS - struct timeval t1; - gettimeofday(&t1, NULL); - fprintf(stderr,"bf = %d dec = %d ra = %d (iset, isa) = (%d,%d) status = %d. Time taken = %0.3g sec\n", - bf, dec_link, ra_link, iset, instruction_sets[iset], status, ADD_DIFF_TIME(t0, t1)); + { + options.link_in_dec = 1; + options.link_in_ra = 1; #endif - - if(status != EXIT_SUCCESS) { - return status; - } - - /*---Output-Pairs-------------------------------------*/ - FILE *fp=my_fopen(correct_outputfile,"r"); - if(fp == NULL) { - free_results_countpairs_theta(&results); - return EXIT_FAILURE; - } - for(int i=1;iDDrppi, 1->wtheta, 2->vpf + const int function_pointer_index[] = {0,1,2,0,1,2,3,3};//0->DDrppi, 1->wtheta, 2->vpf, 3->DDsmu assert(sizeof(function_pointer_index)/sizeof(int) == ntests && "Number of tests should equal the number of functions"); const char correct_outputfiles[][MAXLEN] = {"../tests/Mr19_mock.DD", /* Test 0 Mr19 DD */ @@ -461,26 +579,32 @@ int main(int argc, char **argv) "../tests/Mr19_mock_vpf", /* Test 2 Mr19 mocks vpf */ "../tests/Mr19_mock.DR", /* Test 3 Mr19 DR */ "../tests/Mr19_mock_wtheta.DR", /* Test 4 Mr19 wtheta DR */ - "../tests/Mr19_randoms_vpf"}; /* Test 5 Mr19 randoms vpf */ + "../tests/Mr19_randoms_vpf", /* Test 5 Mr19 randoms vpf */ + "../tests/Mr19_mock_DDsmu.RR", /* Test 6 Mr19 RR smu */ + "../tests/Mr19_mock_DDsmu.DR"}; /* Test 7 Mr19 DR smu */ const char firstfilename[][MAXLEN] = {"../tests/data/Mr19_mock_northonly.rdcz.dat", "../tests/data/Mr19_mock_northonly.rdcz.dat", "../tests/data/Mr19_mock_northonly.rdcz.dat", "../tests/data/Mr19_randoms_northonly.rdcz.ff", "../tests/data/Mr19_randoms_northonly.rdcz.ff", + "../tests/data/Mr19_randoms_northonly.rdcz.ff", + "../tests/data/Mr19_randoms_northonly.rdcz.ff", "../tests/data/Mr19_randoms_northonly.rdcz.ff"}; - const char firstfiletype[][MAXLEN] = {"a","a","a","f","f","f"}; + const char firstfiletype[][MAXLEN] = {"a","a","a","f","f","f","f","f"}; const char secondfilename[][MAXLEN] = {"../tests/data/Mr19_mock_northonly.rdcz.dat", "../tests/data/Mr19_mock_northonly.rdcz.dat", "../tests/data/Mr19_mock_northonly.rdcz.dat", "../tests/data/Mr19_mock_northonly.rdcz.dat", "../tests/data/Mr19_mock_northonly.rdcz.dat", - "../tests/data/Mr19_randoms_northonly.rdcz.ff"}; - const char secondfiletype[][MAXLEN] = {"a","a","a","a","a","f"}; - - const double allpimax[] = {40.0,40.0,40.0,40.0,40.0,40.0}; + "../tests/data/Mr19_randoms_northonly.rdcz.ff", + "../tests/data/Mr19_randoms_northonly.rdcz.ff", + "../tests/data/Mr19_mock_northonly.rdcz.dat"}; + const char secondfiletype[][MAXLEN] = {"a","a","a","a","a","f","f","a"}; + + const double allpimax[] = {40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0}; - int (*allfunctions[]) (const char *) = {test_DDrppi_mocks,test_wtheta_mocks,test_vpf_mocks}; - const int numfunctions=3;//3 functions total + int (*allfunctions[]) (const char *) = {test_DDrppi_mocks,test_DDtheta_mocks,test_vpf_mocks,test_DDsmu_mocks}; + const int numfunctions=4;//4 functions total int total_tests=0,skipped=0; @@ -508,7 +632,7 @@ int main(int argc, char **argv) char execstring[MAXLEN]; my_snprintf(execstring,MAXLEN,"rm -f %s",tmpoutputfile); run_system_call(execstring);//can ignore the status here - + } else { fprintf(stderr,ANSI_COLOR_RED "FAILED: " ANSI_COLOR_MAGENTA "%s" ANSI_COLOR_RED ". Time taken = %8.2lf seconds " ANSI_COLOR_RESET "\n", testname,pair_time); failed++; diff --git a/theory/DDsmu/DDsmu.c b/theory/DDsmu/DDsmu.c new file mode 100644 index 00000000..edebb837 --- /dev/null +++ b/theory/DDsmu/DDsmu.c @@ -0,0 +1,346 @@ +/* File: DDsmu.c */ +/* + This file is a part of the Corrfunc package + Copyright (C) 2015-- Manodeep Sinha (manodeep@gmail.com) + License: MIT LICENSE. See LICENSE file under the top-level + directory at https://github.com/manodeep/Corrfunc/ +*/ + +/* PROGRAM DDsmu + +--- DDsmu file1 format1 file2 format2 binfile mu_max nmu_bins numthreads [weight_method weights_file1 weights_format1 [weights_file2 weights_format2]] > DDfile +--- Measure the cross-correlation function xi(s, mu) for two different + data files (or autocorrelation if file1=file1). + * file1 = name of first data file + * format1 = format of first data file (a=ascii, c=csv, f=fast-food) + * file2 = name of second data file + * format2 = format of second data file (a=ascii, c=csv, f=fast-food) + * binfile = name of ascii file containing the r-bins (rmin rmax for each bin) + * mu_max = maximum of the cosine of the angle to the line-of-sight (LOS is taken to be along the z-direction) + * nmu_bins = number of bins for mu + * numthreads = number of threads to use +--- OPTIONAL ARGS: + * weight_method = the type of pair weighting to apply. Options are: 'pair_product', 'none'. Default: 'none'. + * weights_file1 = name of file containing the weights corresponding to the first data file + * weights_format1 = format of file containing the weights corresponding to the first data file + * weights_file2 = name of file containing the weights corresponding to the second data file + * weights_format2 = format of file containing the weights corresponding to the second data file +---OUTPUT: + > DDfile = name of output file + ---------------------------------------------------------------------------------- +*/ + +#include +#include +#include +#include + +#include "defs.h" //for ADD_DIFF_TIME +#include "function_precision.h" //definition of DOUBLE +#include "countpairs_s_mu.h" //function proto-type for countpairs +#include "io.h" //function proto-type for file input +#include "utils.h" //general utilities + + +void Printhelp(void); + +int main(int argc, char *argv[]) +{ + + /*---Arguments-------------------------*/ + char *file1=NULL,*file2=NULL,*weights_file1=NULL,*weights_file2=NULL; + char *fileformat1=NULL,*fileformat2=NULL,*weights_fileformat1=NULL,*weights_fileformat2=NULL; + char *sbinfile=NULL; + double mu_max; + int nmu_bins; + char *weight_method_str=NULL; + + weight_method_t weight_method = NONE; + int num_weights = 0; + + /*---Data-variables--------------------*/ + int64_t ND1=0,ND2=0; + + DOUBLE *x1=NULL,*y1=NULL,*z1=NULL,*weights1[MAX_NUM_WEIGHTS]={NULL}; + DOUBLE *x2=NULL,*y2=NULL,*z2=NULL,*weights2[MAX_NUM_WEIGHTS]={NULL};//will point to x1/y1/z1 in case of auto-corr + + int nthreads=1; + /*---Corrfunc-variables----------------*/ +#if !(defined(USE_OMP) && defined(_OPENMP)) + const char argnames[][30]={"file1","format1","file2","format2","sbinfile","mu_max", "nmu_bins"}; +#else + const char argnames[][30]={"file1","format1","file2","format2","sbinfile","mu_max", "nmu_bins","Nthreads"}; +#endif + const char optargnames[][30]={"weight_method", "weights_file1","weights_format1","weights_file2","weights_format2"}; + + int nargs=sizeof(argnames)/(sizeof(char)*30); + int noptargs=sizeof(optargnames)/(sizeof(char)*30); + + struct timeval t_end,t_start,t0,t1; + double read_time=0.0; + gettimeofday(&t_start,NULL); + + /*---Read-arguments-----------------------------------*/ + if(argc< (nargs+1)) { + Printhelp() ; + fprintf(stderr,"\nFound: %d parameters\n ",argc-1); + int i; + for(i=1;i = `%s' \n",argv[i]); + } + fprintf(stderr,"\nMissing required parameters \n"); + for(i=argc;i<=nargs;i++) + fprintf(stderr,"\t\t %s = `?'\n",argnames[i-1]); + return EXIT_FAILURE; + } + + /* Validate optional arguments */ + int noptargs_given = argc - (nargs + 1); + if(noptargs_given != 0 && noptargs_given != 3 && noptargs_given != 5){ + Printhelp(); + fprintf(stderr,"\nFound: %d optional arguments; must be 0 (no weights), 3 (for one set of weights) or 5 (for two sets)\n ", noptargs_given); + int i; + for(i=nargs+1;i = `%s' \n",argv[i]); + } + return EXIT_FAILURE; + } + + file1=argv[1]; + fileformat1=argv[2]; + file2=argv[3]; + fileformat2=argv[4]; + sbinfile=argv[5]; + sscanf(argv[6],"%lf",&mu_max) ; + nmu_bins=atoi(argv[7]); + +#if defined(_OPENMP) + nthreads=atoi(argv[8]); + if(nthreads < 1 ) { + fprintf(stderr, "Nthreads = %d must be at least 1. Exiting...\n", nthreads); + return EXIT_FAILURE; + } +#endif + + if(noptargs_given >= 3){ + weight_method_str = argv[nargs + 1]; + int wstatus = get_weight_method_by_name(weight_method_str, &weight_method); + if(wstatus != EXIT_SUCCESS){ + fprintf(stderr, "Error: Unknown weight method \"%s\"\n", weight_method_str); + return EXIT_FAILURE; + } + num_weights = get_num_weights_by_method(weight_method); + + weights_file1 = argv[nargs + 2]; + weights_fileformat1 = argv[nargs + 3]; + } + if(noptargs_given >= 5){ + weights_file2 = argv[nargs + 4]; + weights_fileformat2 = argv[nargs + 5]; + } + + int autocorr=0; + if(strcmp(file1,file2)==0) { + autocorr=1; + } + + fprintf(stderr,"Running `%s' with the parameters \n",argv[0]); + fprintf(stderr,"\n\t\t -------------------------------------\n"); + for(int i=1;i = `%s' \n",argv[i]); + } + } + fprintf(stderr,"\t\t -------------------------------------\n"); + + + gettimeofday(&t0,NULL); + /*---Read-data1-file----------------------------------*/ + ND1=read_positions(file1,fileformat1,sizeof(DOUBLE), 3, &x1, &y1, &z1); + gettimeofday(&t1,NULL); + read_time += ADD_DIFF_TIME(t0,t1); + gettimeofday(&t0,NULL); + + /* Read weights file 1 */ + if(weights_file1 != NULL){ + gettimeofday(&t0,NULL); + int64_t wND1 = read_columns_into_array(weights_file1,weights_fileformat1, sizeof(DOUBLE), num_weights, (void **) weights1); + gettimeofday(&t1,NULL); + read_time += ADD_DIFF_TIME(t0,t1); + + if(wND1 != ND1){ + fprintf(stderr, "Error: read %"PRId64" lines from %s, but read %"PRId64" from %s\n", wND1, weights_file1, ND1, file1); + return EXIT_FAILURE; + } + } + + if (autocorr==0) { + /*---Read-data2-file----------------------------------*/ + ND2=read_positions(file2,fileformat2,sizeof(DOUBLE), 3, &x2, &y2, &z2); + gettimeofday(&t1,NULL); + read_time += ADD_DIFF_TIME(t0,t1); + + if(weights_file2 != NULL){ + gettimeofday(&t0,NULL); + int64_t wND2 = read_columns_into_array(weights_file2,weights_fileformat2, sizeof(DOUBLE), num_weights, (void **) weights2); + gettimeofday(&t1,NULL); + read_time += ADD_DIFF_TIME(t0,t1); + + if(wND2 != ND2){ + fprintf(stderr, "Error: read %"PRId64" lines from %s, but read %"PRId64" from %s\n", wND2, weights_file2, ND2, file2); + return EXIT_FAILURE; + } + } + } else { + //None of these are required. But I prefer to preserve the possibility + ND2 = ND1; + x2 = x1; + y2 = y1; + z2 = z1; + for(int w = 0; w < MAX_NUM_WEIGHTS; w++){ + weights2[w] = weights1[w]; + } + } + + /*---Count-pairs--------------------------------------*/ + gettimeofday(&t0,NULL); + results_countpairs_s_mu results; + struct config_options options = get_config_options(); + + /* Pack weights into extra options */ + struct extra_options extra = get_extra_options(weight_method); + for(int w = 0; w < num_weights; w++){ + extra.weights0.weights[w] = (void *) weights1[w]; + extra.weights1.weights[w] = (void *) weights2[w]; + } + + /* If you want to change the bin refine factors */ + /* const int bf[] = {2, 2, 1}; */ + /* set_bin_refine_factors(&options, bf); */ + int status = countpairs_s_mu(ND1,x1,y1,z1, + ND2,x2,y2,z2, + nthreads, + autocorr, + sbinfile, + mu_max, + nmu_bins, + &results, + &options, + &extra); + + free(x1);free(y1);free(z1); + for(int w = 0; w < num_weights; w++){ + free(weights1[w]); + } + if(autocorr == 0) { + free(x2);free(y2);free(z2); + for(int w = 0; w < num_weights; w++){ + free(weights2[w]); + } + } + + if(status != EXIT_SUCCESS) { + return status; + } + + gettimeofday(&t1,NULL); + double pair_time = ADD_DIFF_TIME(t0,t1); + double smin = results.supp[0]; + const double dmu = mu_max/(double) nmu_bins; + for(int i=1;i Done - ND1=%12"PRId64" ND2=%12"PRId64". Time taken = %6.2lf seconds. read-in time = %6.2lf seconds pair-counting time = %6.2lf sec\n", + ND1,ND2,ADD_DIFF_TIME(t_start,t_end),read_time,pair_time); + return EXIT_SUCCESS; +} + +/*---Print-help-information---------------------------*/ +void Printhelp(void) +{ + fprintf(stderr,"=========================================================================\n") ; +#if defined(USE_OMP) && defined(_OPENMP) + fprintf(stderr," --- DDsmu file1 format1 file2 format2 sbinfile mu_max nmu_bins numthreads [weight_method weights_file1 weights_format1 [weights_file2 weights_format2]] > DDfile\n"); +#else + fprintf(stderr," --- DDsmu file1 format1 file2 format2 sbinfile mu_max nmu_bins [weight_method weights_file1 weights_format1 [weights_file2 weights_format2]] > DDfile\n") ; +#endif + + fprintf(stderr," --- Measure the cross-correlation function xi(s, mu) for two different\n") ; + fprintf(stderr," data files (or autocorrelation if data1=data2).\n") ; + fprintf(stderr," * file1 = name of first data file\n") ; + fprintf(stderr," * format1 = format of first data file (a=ascii, c=csv, f=fast-food)\n") ; + fprintf(stderr," * file2 = name of second data file\n") ; + fprintf(stderr," * format2 = format of second data file (a=ascii, c=csv, f=fast-food)\n") ; + fprintf(stderr," * sbinfile = name of ascii file containing the s-bins (smin smax for each bin)\n") ; + fprintf(stderr," * mu_max = maximum of the cosine of the angle to the line-of-sight (LOS is taken to be along the z-direction). Valid values are in: (0.0, 1.0]\n"); + fprintf(stderr," * nmu_bins = number of bins for mu (must be >= 1)\n"); +#if defined(USE_OMP) && defined(_OPENMP) + fprintf(stderr," * numthreads = number of threads to use (must be >= 1)\n"); +#endif + fprintf(stderr," --- OPTIONAL ARGS:\n"); + fprintf(stderr," * weight_method = the type of pair weighting to apply. Options are: 'pair_product', 'none'. Default: 'none'.\n"); + fprintf(stderr," * weights_file1 = name of file containing the weights corresponding to the first data file\n"); + fprintf(stderr," * weights_format1 = format of file containing the weights corresponding to the first data file\n"); + fprintf(stderr," * weights_file2 = name of file containing the weights corresponding to the second data file\n"); + fprintf(stderr," * weights_format2 = format of file containing the weights corresponding to the second data file\n"); + fprintf(stderr," ---OUTPUT:\n") ; +#ifdef OUTPUT_RPAVG + fprintf(stderr," > DD(s, mu) file = name of output file \n") ; +#else + fprintf(stderr," > DD(s, mu) file = name of output file \n") ; +#endif + fprintf(stderr,"\n\tCompile options: \n"); +#ifdef PERIODIC + fprintf(stderr,"\tPeriodic = True\n"); +#else + fprintf(stderr,"\tPeriodic = False\n"); +#endif + +#ifdef OUTPUT_RPAVG + fprintf(stderr,"\tOutput SAVG = True\n"); +#else + fprintf(stderr,"\tOutput SAVG = False\n"); +#endif + +#ifdef DOUBLE_PREC + fprintf(stderr,"\tPrecision = double\n"); +#else + fprintf(stderr,"\tPrecision = float\n"); +#endif + +#if defined(__AVX__) + fprintf(stderr,"\tUse AVX = True\n"); +#else + fprintf(stderr,"\tUse AVX = False\n"); +#endif + +#if defined(USE_OMP) && defined(_OPENMP) + fprintf(stderr,"\tUse OMP = True\n"); +#else + fprintf(stderr,"\tUse OMP = False\n"); +#endif + + fprintf(stderr,"=========================================================================\n") ; +} diff --git a/theory/DDsmu/Makefile b/theory/DDsmu/Makefile new file mode 100644 index 00000000..ca63851a --- /dev/null +++ b/theory/DDsmu/Makefile @@ -0,0 +1,56 @@ +ROOT_DIR := ../.. +INSTALL_HEADERS_DIR := $(ROOT_DIR)/include +INSTALL_LIB_DIR := $(ROOT_DIR)/lib +INSTALL_BIN_DIR := $(ROOT_DIR)/bin +UTILS_DIR := $(ROOT_DIR)/utils +IO_DIR := $(ROOT_DIR)/io + +include $(ROOT_DIR)/theory.options $(ROOT_DIR)/common.mk + +TARGET := DDsmu +TARGETS := $(TARGET) +LIBRARY := libcountpairs_s_mu.a +LIBSRC := countpairs_s_mu.c countpairs_s_mu_impl_double.c countpairs_s_mu_impl_float.c \ + $(UTILS_DIR)/gridlink_impl_double.c $(UTILS_DIR)/gridlink_impl_float.c \ + $(UTILS_DIR)/utils.c $(UTILS_DIR)/progressbar.c $(UTILS_DIR)/cpu_features.c +LIBRARY_HEADERS := countpairs_s_mu.h + +TARGETSRC := DDsmu.c $(IO_DIR)/ftread.c $(IO_DIR)/io.c $(LIBSRC) + +INCL := countpairs_s_mu_kernels_float.c countpairs_s_mu_kernels_double.c countpairs_s_mu_kernels.c.src countpairs_s_mu_impl.c.src countpairs_s_mu_impl.h.src \ + countpairs_s_mu.h countpairs_s_mu_impl_double.h countpairs_s_mu_impl_float.h \ + $(UTILS_DIR)/gridlink_impl_float.h $(UTILS_DIR)/gridlink_impl_double.h $(UTILS_DIR)/gridlink_impl.h.src \ + $(UTILS_DIR)/cellarray_float.h $(UTILS_DIR)/cellarray_double.h $(UTILS_DIR)/cellarray.h.src \ + $(UTILS_DIR)/function_precision.h $(UTILS_DIR)/avx_calls.h $(UTILS_DIR)/sse_calls.h \ + $(UTILS_DIR)/defs.h $(UTILS_DIR)/cpu_features.h \ + $(IO_DIR)/ftread.h $(IO_DIR)/io.h $(UTILS_DIR)/utils.h $(UTILS_DIR)/progressbar.h \ + $(UTILS_DIR)/weight_functions_double.h $(UTILS_DIR)/weight_functions_float.h $(UTILS_DIR)/weight_functions.h.src \ + $(UTILS_DIR)/weight_defs_double.h $(UTILS_DIR)/weight_defs_float.h $(UTILS_DIR)/weight_defs.h.src + +TARGETOBJS := $(TARGETSRC:.c=.o) +LIBOBJS := $(LIBSRC:.c=.o) +all: $(TARGETS) $(TARGETSRC) $(ROOT_DIR)/theory.options $(ROOT_DIR)/common.mk Makefile + +countpairs_s_mu_impl_double.o:countpairs_s_mu_impl_double.c countpairs_s_mu_impl_double.h countpairs_s_mu_kernels_double.c $(UTILS_DIR)/gridlink_impl_double.h $(UTILS_DIR)/cellarray_double.h +countpairs_s_mu_impl_float.o:countpairs_s_mu_impl_float.c countpairs_s_mu_impl_float.h countpairs_s_mu_kernels_float.c $(UTILS_DIR)/gridlink_impl_float.h $(UTILS_DIR)/cellarray_float.h +countpairs_s_mu.o:countpairs_s_mu.c countpairs_s_mu_impl_double.h countpairs_s_mu_impl_float.h $(INCL) + +libs: lib +lib: $(LIBRARY) +install:$(INSTALL_BIN_DIR)/$(TARGET) $(INSTALL_LIB_DIR)/$(LIBRARY) $(INSTALL_HEADERS_DIR)/$(LIBRARY_HEADERS) + +clean: + $(RM) $(TARGETOBJS) $(TARGET) $(LIBRARY) countpairs_s_mu_kernels_float.c countpairs_s_mu_kernels_double.c countpairs_s_mu_impl_double.[ch] countpairs_s_mu_impl_float.[ch] + $(RM) -R *.dSYM + +distclean:clean + cd $(INSTALL_HEADERS_DIR) && $(RM) $(LIBRARY_HEADERS) + cd $(INSTALL_LIB_DIR) && $(RM) $(LIBRARY) + cd $(INSTALL_BIN_DIR) && $(RM) $(TARGET) + +tests: + $(MAKE) -C ../tests DDsmu + +include $(ROOT_DIR)/rules.mk + + diff --git a/theory/DDsmu/countpairs_s_mu.c b/theory/DDsmu/countpairs_s_mu.c new file mode 100644 index 00000000..b577a9ad --- /dev/null +++ b/theory/DDsmu/countpairs_s_mu.c @@ -0,0 +1,74 @@ +/* File: countpairs_s_mu.c */ +/* + This file is a part of the Corrfunc package + Copyright (C) 2015-- Manodeep Sinha (manodeep@gmail.com) + License: MIT LICENSE. See LICENSE file under the top-level + directory at https://github.com/manodeep/Corrfunc/ +*/ + +#include +#include +#include + +#include "countpairs_s_mu.h" //function proto-type for API +#include "countpairs_s_mu_impl_double.h"//actual implementations for double +#include "countpairs_s_mu_impl_float.h"//actual implementations for float + +void free_results_s_mu(results_countpairs_s_mu *results) +{ + if(results==NULL) + return; + + free(results->npairs); + free(results->supp); + free(results->savg); + free(results->weightavg); +} + + +int countpairs_s_mu(const int64_t ND1, void *X1, void *Y1, void *Z1, + const int64_t ND2, void *X2, void *Y2, void *Z2, + const int numthreads, + const int autocorr, + const char *sbinfile, + const double mu_max, + const int nmu_bins, + results_countpairs_s_mu *results, + struct config_options *options, + struct extra_options *extra) +{ + if( ! (options->float_type == sizeof(float) || options->float_type == sizeof(double))){ + fprintf(stderr,"ERROR: In %s> Can only handle doubles or floats. Got an array of size = %zu\n", + __FUNCTION__, options->float_type); + return EXIT_FAILURE; + } + + if( strncmp(options->version, STR(VERSION), sizeof(options->version)/sizeof(char)-1) != 0) { + fprintf(stderr,"Error: Do not know this API version = `%s'. Expected version = `%s'\n", options->version, STR(VERSION)); + return EXIT_FAILURE; + } + + if(options->float_type == sizeof(float)) { + return countpairs_s_mu_float(ND1, (float *) X1, (float *) Y1, (float *) Z1, + ND2, (float *) X2, (float *) Y2, (float *) Z2, + numthreads, + autocorr, + sbinfile, + mu_max, + nmu_bins, + results, + options, + extra); + } else { + return countpairs_s_mu_double(ND1, (double *) X1, (double *) Y1, (double *) Z1, + ND2, (double *) X2, (double *) Y2, (double *) Z2, + numthreads, + autocorr, + sbinfile, + mu_max, + nmu_bins, + results, + options, + extra); + } +} diff --git a/theory/DDsmu/countpairs_s_mu.h b/theory/DDsmu/countpairs_s_mu.h new file mode 100644 index 00000000..f1182a3d --- /dev/null +++ b/theory/DDsmu/countpairs_s_mu.h @@ -0,0 +1,45 @@ +/* File: countpairs_s_mu.h */ +/* + This file is a part of the Corrfunc package + Copyright (C) 2015-- Manodeep Sinha (manodeep@gmail.com) + License: MIT LICENSE. See LICENSE file under the top-level + directory at https://github.com/manodeep/Corrfunc/ +*/ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include "defs.h" //for struct config_options +#include //for uint64_t + + //define the results structure + typedef struct{ + uint64_t *npairs; + double *supp; + double *savg; + double mu_max; + double mu_min;//not used -> assumed to be 0.0 + double *weightavg; + int nsbin; + int nmu_bins; + } results_countpairs_s_mu; + + extern int countpairs_s_mu(const int64_t ND1, void *X1, void *Y1, void *Z1, + const int64_t ND2, void *X2, void *Y2, void *Z2, + const int numthreads, + const int autocorr, + const char *sbinfile, + const double mu_max, + const int nmu_bins, + results_countpairs_s_mu *results, + struct config_options *options, + struct extra_options *extra); + + extern void free_results_s_mu(results_countpairs_s_mu *results); + +#ifdef __cplusplus +} +#endif diff --git a/theory/DDsmu/countpairs_s_mu_impl.c.src b/theory/DDsmu/countpairs_s_mu_impl.c.src new file mode 100644 index 00000000..494a6aff --- /dev/null +++ b/theory/DDsmu/countpairs_s_mu_impl.c.src @@ -0,0 +1,681 @@ +// # -*- mode: c -*- +/* File: countpairs_s_mu_impl.c.src */ +/* + This file is a part of the Corrfunc package + Copyright (C) 2015-- Manodeep Sinha (manodeep@gmail.com) + License: MIT LICENSE. See LICENSE file under the top-level + directory at https://github.com/manodeep/Corrfunc/ +*/ + + +#include +#include +#include +#include +#include + +#include "countpairs_s_mu_impl_DOUBLE.h" //function proto-type +#include "countpairs_s_mu_kernels_DOUBLE.c" + +#include "defs.h" +#include "utils.h" //all of the utilities +#include "progressbar.h" //for the progressbar +#include "cpu_features.h" //prototype instrset_detect required for runtime dispatch + +#include "cellarray_DOUBLE.h" //definition of struct cellarray* +#include "gridlink_impl_DOUBLE.h"//function proto-type for gridlink + +#if defined(_OPENMP) +#include +#endif + +int interrupt_status_DDsmu_DOUBLE=EXIT_SUCCESS; + +void interrupt_handler_countpairs_s_mu_DOUBLE(int signo) +{ + fprintf(stderr,"Received signal = `%s' (signo = %d). Aborting \n",strsignal(signo), signo); + interrupt_status_DDsmu_DOUBLE = EXIT_FAILURE; +} + +countpairs_s_mu_func_ptr_DOUBLE countpairs_s_mu_driver_DOUBLE(const struct config_options *options) +{ + static countpairs_s_mu_func_ptr_DOUBLE function = NULL; + static isa old_isa=-1; + if(old_isa == options->instruction_set) { + return function; + } + + + /* Array of function pointers */ + countpairs_s_mu_func_ptr_DOUBLE allfunctions[] = { +#ifdef __AVX__ + countpairs_s_mu_avx_intrinsics_DOUBLE, +#endif +#ifdef __SSE4_2__ + countpairs_s_mu_sse_intrinsics_DOUBLE, +#endif + countpairs_s_mu_fallback_DOUBLE + }; + + const int num_functions = sizeof(allfunctions)/sizeof(void *); + const int fallback_offset = num_functions - 1; +#if defined(__AVX__) || defined __SSE4_2__ + const int highest_isa = instrset_detect(); +#endif + int curr_offset = 0; + + /* Now check if AVX is supported by the CPU */ + int avx_offset = fallback_offset; +#ifdef __AVX__ + avx_offset = highest_isa >= 7 ? curr_offset:fallback_offset; + curr_offset++; +#endif + + /* Is the SSE function supported at runtime and enabled at compile-time?*/ + int sse_offset = fallback_offset; +#ifdef __SSE4_2__ + sse_offset = highest_isa >= 6 ? curr_offset:fallback_offset; + curr_offset++; +#endif + if( curr_offset != fallback_offset) { + fprintf(stderr,"ERROR: Bug in code (current offset = %d *should equal* fallback function offset = %d)\n", + curr_offset, fallback_offset); + return NULL; + } + + int function_dispatch=0; + /* Check that cpu supports feature */ + if(options->instruction_set >= 0) { + switch(options->instruction_set) { + case(AVX512F): + case(AVX2): + case(AVX):function_dispatch=avx_offset;break; + case(SSE42):function_dispatch=sse_offset;break; + default:function_dispatch=fallback_offset;break; + } + } + + if(function_dispatch >= num_functions) { + fprintf(stderr,"In %s> ERROR: Could not resolve the correct function.\n Function index = %d must lie between [0, %d)\n", + __FUNCTION__, function_dispatch, num_functions); + return NULL; + } + function = allfunctions[function_dispatch]; + old_isa = options->instruction_set; + + if(options->verbose){ + // This must be first (AVX/SSE may be aliased to fallback) + if(function_dispatch == fallback_offset){ + fprintf(stderr,"Using fallback kernel\n"); + } else if(function_dispatch == avx_offset){ + fprintf(stderr, "Using AVX kernel\n"); + } else if(function_dispatch == sse_offset){ + fprintf(stderr, "Using SSE kernel\n"); + } else { + fprintf(stderr, "Unknown kernel!\n"); + } + } + + return function; +} + + +int countpairs_s_mu_DOUBLE(const int64_t ND1, DOUBLE *X1, DOUBLE *Y1, DOUBLE *Z1, + const int64_t ND2, DOUBLE *X2, DOUBLE *Y2, DOUBLE *Z2, + const int numthreads, + const int autocorr, + const char *sbinfile, + const double max_mu, + const int nmu_bins, + results_countpairs_s_mu *results, + struct config_options *options, + struct extra_options *extra) +{ + if(options->float_type != sizeof(DOUBLE)) { + fprintf(stderr,"ERROR: In %s> Can only handle arrays of size=%zu. Got an array of size = %zu\n", + __FUNCTION__, sizeof(DOUBLE), options->float_type); + return EXIT_FAILURE; + } + + // If no extra options were passed, create dummy options + // This allows us to pass arguments like "extra->weights0" below; + // they'll just be NULLs, which is the correct behavior + struct extra_options dummy_extra; + if(extra == NULL){ + weight_method_t dummy_method = NONE; + dummy_extra = get_extra_options(dummy_method); + extra = &dummy_extra; + } + + int need_weightavg = extra->weight_method != NONE; + + struct timeval t0; + if(options->c_api_timer) { + gettimeofday(&t0, NULL); + } + +#if defined(_OPENMP) + omp_set_num_threads(numthreads); +#else + (void) numthreads; +#endif + + options->sort_on_z = 1; + for(int i=0;i<3;i++) { + if(options->bin_refine_factors[i] < 1) { + fprintf(stderr,"Warning: bin refine factor along axis = %d *must* be >=1. Instead found bin refine factor =%d\n", + i, options->bin_refine_factors[i]); + reset_bin_refine_factors(options); + break;/* all factors have been reset -> no point continuing with the loop */ + } + } + if(options->max_cells_per_dim == 0) { + fprintf(stderr,"Warning: Max. cells per dimension is set to 0 - resetting to `NLATMAX' = %d\n", NLATMAX); + options->max_cells_per_dim = NLATMAX; + } + + /* setup interrupt handler -> mostly useful during the python execution. + Let's Ctrl-C abort the extension */ + SETUP_INTERRUPT_HANDLERS(interrupt_handler_countpairs_s_mu_DOUBLE); + + /*********************** + *initializing the bins + ************************/ + double *supp; + int nsbin; + double smin,smax; + setup_bins(sbinfile,&smin,&smax,&nsbin,&supp); + if( ! (smin >= 0.0 && smax > 0.0 && smin < smax && nsbin > 0)) { + fprintf(stderr,"Error: Could not setup with R bins correctly. (rmin = %lf, rmax = %lf, with nbins = %d). Expected non-zero rmin/rmax with rmax > rmin and nbins >=1 \n", + smin, smax, nsbin); + return EXIT_FAILURE; + } + + if(max_mu <= 0.0 || max_mu > 1.0) { + fprintf(stderr,"Error: max_mu (max. value for the cosine of the angle with line of sight) must be greater than 0 and at most 1).\n" + "The passed value is max_mu = %lf. Please change it to be > 0 and <= 1.0\n", max_mu); + return EXIT_FAILURE; + } + + if(nmu_bins < 1 ) { + fprintf(stderr,"Error: Number of mu bins = %d must be at least 1\n", nmu_bins); + return EXIT_FAILURE; + } + + DOUBLE supp_sqr[nsbin]; + const int64_t totnbins = (nmu_bins+1)*(nsbin+1); + for(int i=0; i < nsbin;i++) { + supp_sqr[i] = supp[i]*supp[i]; + } + + const DOUBLE sqr_smax=supp_sqr[nsbin-1]; + const DOUBLE sqr_smin=supp_sqr[0]; + const DOUBLE mu_max = (DOUBLE) max_mu; + const DOUBLE pimax = smax*mu_max; + + //Find the min/max of the data + DOUBLE xmin=1e10,ymin=1e10,zmin=1e10; + DOUBLE xmax=-1e10,ymax=-1e10,zmax=-1e10; + get_max_min_DOUBLE(ND1, X1, Y1, Z1, &xmin, &ymin, &zmin, &xmax, &ymax, &zmax); + + if(autocorr==0) { + if(options->verbose) { + fprintf(stderr,"ND1 = %12"PRId64" [xmin,ymin,zmin] = [%lf,%lf,%lf], [xmax,ymax,zmax] = [%lf,%lf,%lf]\n",ND1,xmin,ymin,zmin,xmax,ymax,zmax); + } + + get_max_min_DOUBLE(ND2, X2, Y2, Z2, &xmin, &ymin, &zmin, &xmax, &ymax, &zmax); + if(options->verbose) { + fprintf(stderr,"ND2 = %12"PRId64" [xmin,ymin,zmin] = [%lf,%lf,%lf], [xmax,ymax,zmax] = [%lf,%lf,%lf]\n",ND2,xmin,ymin,zmin,xmax,ymax,zmax); + } + } + + const DOUBLE xdiff = options->boxsize > 0 ? options->boxsize:(xmax-xmin); + const DOUBLE ydiff = options->boxsize > 0 ? options->boxsize:(ymax-ymin); + const DOUBLE zdiff = options->boxsize > 0 ? options->boxsize:(zmax-zmin); + if(options->verbose && options->periodic) { + fprintf(stderr,"Running with points in [xmin,xmax] = %lf,%lf with periodic wrapping = %lf\n",xmin,xmax,xdiff); + fprintf(stderr,"Running with points in [ymin,ymax] = %lf,%lf with periodic wrapping = %lf\n",ymin,ymax,ydiff); + fprintf(stderr,"Running with points in [zmin,zmax] = %lf,%lf with periodic wrapping = %lf\n",zmin,zmax,zdiff); + } + + if(get_bin_refine_scheme(options) == BINNING_DFL) { + if(smax < 0.05*xdiff) { + options->bin_refine_factors[0] = 1; + } + if(smax < 0.05*ydiff) { + options->bin_refine_factors[1] = 1; + } + if(pimax < 0.05*zdiff) { + options->bin_refine_factors[2] = 1; + } + } + + + /*---Create 3-D lattice--------------------------------------*/ + int nmesh_x=0,nmesh_y=0,nmesh_z=0; + cellarray_index_particles_DOUBLE *lattice1 = gridlink_index_particles_DOUBLE(ND1, X1, Y1, Z1, &(extra->weights0), + xmin, xmax, ymin, ymax, zmin, zmax, + smax, smax, pimax, + options->bin_refine_factors[0], options->bin_refine_factors[1], options->bin_refine_factors[2], + &nmesh_x, &nmesh_y, &nmesh_z, options); + if(lattice1 == NULL) { + return EXIT_FAILURE; + } + + /* If there too few cells (BOOST_CELL_THRESH is ~10), and the number of cells can be increased, then boost bin refine factor (by 2x)*/ + if(nmesh_x <= BOOST_CELL_THRESH && nmesh_y <= BOOST_CELL_THRESH && nmesh_z <= BOOST_CELL_THRESH && options->max_cells_per_dim >= BOOST_BIN_REF*BOOST_CELL_THRESH) { + if(get_bin_refine_scheme(options) == BINNING_DFL) { + fprintf(stderr,"%s> gridlink seems inefficient nmesh = (%d, %d, %d). Boosting bin refine factor - should lead to better performance\n", __FUNCTION__, nmesh_x, nmesh_y, nmesh_z); + fprintf(stderr,"xmin = %lf xmax=%lf smax = %lf\n", xmin, xmax, smax); + free_cellarray_index_particles_DOUBLE(lattice1, nmesh_x * (int64_t) nmesh_y * nmesh_z); + for(int i=0;i<3;i++) { + options->bin_refine_factors[i] *= BOOST_BIN_REF; + } + lattice1 = gridlink_index_particles_DOUBLE(ND1, X1, Y1, Z1, &(extra->weights0), + xmin, xmax, ymin, ymax, zmin, zmax, + smax, smax, pimax, + options->bin_refine_factors[0], options->bin_refine_factors[1], options->bin_refine_factors[2], + &nmesh_x, &nmesh_y, &nmesh_z, options); + if(lattice1 == NULL) { + return EXIT_FAILURE; + } + + } else { + fprintf(stderr,"%s> gridlink seems inefficient nmesh = (%d, %d, %d), boosting bin refine factor could have helped. However, since custom bin refine factors " + "= (%d, %d, %d) are being used - continuing with inefficient mesh\n", __FUNCTION__, nmesh_x, nmesh_y, nmesh_z, options->bin_refine_factors[0], + options->bin_refine_factors[1], options->bin_refine_factors[2]); + } + } + + cellarray_index_particles_DOUBLE *lattice2 = NULL; + if(autocorr==0) { + int ngrid2_x=0,ngrid2_y=0,ngrid2_z=0; + lattice2 = gridlink_index_particles_DOUBLE(ND2, X2, Y2, Z2, &(extra->weights1), + xmin, xmax, ymin, ymax, zmin, zmax, + smax, smax, pimax, + options->bin_refine_factors[0], options->bin_refine_factors[1], options->bin_refine_factors[2], + &ngrid2_x, &ngrid2_y, &ngrid2_z, options); + if(lattice2 == NULL) { + return EXIT_FAILURE; + } + if( ! (nmesh_x == ngrid2_x && nmesh_y == ngrid2_y && nmesh_z == ngrid2_z) ) { + fprintf(stderr,"Error: The two sets of 3-D lattices do not have identical bins. First has dims (%d, %d, %d) while second has (%d, %d, %d)\n", + nmesh_x, nmesh_y, nmesh_z, ngrid2_x, ngrid2_y, ngrid2_z); + return EXIT_FAILURE; + } + } else { + lattice2 = lattice1; + } + const int64_t totncells = (int64_t) nmesh_x * (int64_t) nmesh_y * (int64_t) nmesh_z; + + //Generate the unique set of neighbouring cells to count over. + { + int status = assign_ngb_cells_index_particles_DOUBLE(lattice1, lattice2, totncells, + options->bin_refine_factors[0], options->bin_refine_factors[1], options->bin_refine_factors[2], + nmesh_x, nmesh_y, nmesh_z, xdiff, ydiff, zdiff, autocorr, options->periodic); + if(status != EXIT_SUCCESS) { + free_cellarray_index_particles_DOUBLE(lattice1, totncells); + if(autocorr == 0) { + free_cellarray_index_particles_DOUBLE(lattice2, totncells); + } + free(supp); + return status; + } + } + + /* runtime dispatch - get the function pointer */ + countpairs_s_mu_func_ptr_DOUBLE countpairs_s_mu_function_DOUBLE = countpairs_s_mu_driver_DOUBLE(options); + if(countpairs_s_mu_function_DOUBLE == NULL) { + free_cellarray_index_particles_DOUBLE(lattice1, totncells); + if(autocorr == 0) { + free_cellarray_index_particles_DOUBLE(lattice2, totncells); + } + free(supp); + return EXIT_FAILURE; + } + + +#if defined(_OPENMP) + uint64_t **all_npairs = (uint64_t **) matrix_calloc(sizeof(uint64_t), numthreads, totnbins); + DOUBLE **all_savg = NULL; + if(options->need_avg_sep) { + all_savg = (DOUBLE **) matrix_calloc(sizeof(DOUBLE),numthreads,totnbins); + } + DOUBLE **all_weightavg = NULL; + if(need_weightavg) { + all_weightavg = (DOUBLE **) matrix_calloc(sizeof(DOUBLE),numthreads,totnbins); + } + + if(all_npairs == NULL || + (options->need_avg_sep && all_savg == NULL) || + (need_weightavg && all_weightavg == NULL)) { + free_cellarray_index_particles_DOUBLE(lattice1, totncells); + if(autocorr == 0) { + free_cellarray_index_particles_DOUBLE(lattice2, totncells); + } + matrix_free((void **)all_npairs, numthreads); + if(options->need_avg_sep) { + matrix_free((void **)all_savg, numthreads); + } + if(need_weightavg) { + matrix_free((void**) all_weightavg, numthreads); + } + free(supp); + return EXIT_FAILURE; + } +#else + uint64_t npairs[totnbins]; + DOUBLE savg[totnbins], weightavg[totnbins]; + for(int ibin=0;ibinneed_avg_sep) { + savg[ibin] = ZERO; + } + if(need_weightavg) { + weightavg[ibin] = ZERO; + } + } +#endif//OMP + + + + int interrupted=0, abort_status = EXIT_SUCCESS; + int64_t numdone=0; + if(options->verbose) { + init_my_progressbar(totncells,&interrupted); + } + +#if defined(_OPENMP) +#pragma omp parallel shared(numdone, abort_status, interrupt_status_DDsmu_DOUBLE) + { + const int tid = omp_get_thread_num(); + uint64_t npairs[totnbins]; + DOUBLE savg[totnbins], weightavg[totnbins]; + for(int i=0;ineed_avg_sep) { + savg[i] = ZERO; + } + if(need_weightavg) { + weightavg[i] = ZERO; + } + } + +#pragma omp for schedule(dynamic) nowait +#endif + /*---Loop-over-lattice1--------------------*/ + for(int64_t index1=0;index1verbose) { +#if defined(_OPENMP) + if (omp_get_thread_num() == 0) +#endif + my_progressbar(numdone,&interrupted); + + +#if defined(_OPENMP) +#pragma omp atomic +#endif + numdone++; + } + + + /* Calculate over all ngb cells */ + const cellarray_index_particles_DOUBLE *first = &(lattice1[index1]); + if(first->nelements == 0) { + continue; + } + DOUBLE *x1 = first->x; + DOUBLE *y1 = first->y; + DOUBLE *z1 = first->z; + const weight_struct_DOUBLE *weights1 = &(first->weights); + const int64_t N1 = first->nelements; + if(autocorr == 1) { + int same_cell = 1; + DOUBLE *this_savg = NULL; + DOUBLE *this_weightavg = NULL; + if(options->need_avg_sep) { + this_savg = savg; + } + if(need_weightavg) { + this_weightavg = weightavg; + } + const int status = countpairs_s_mu_function_DOUBLE(N1, x1, y1, z1, weights1, + N1, x1, y1, z1, weights1, + same_cell + ,sqr_smax, sqr_smin, nsbin, nmu_bins, supp_sqr, mu_max, pimax + ,ZERO, ZERO, ZERO + ,this_savg, npairs, + this_weightavg, extra->weight_method); + /* This actually causes a race condition under OpenMP - but mostly + I care that an error occurred - rather than the exact value of + the error status */ + abort_status |= status; + } + for(int64_t ngb=0;ngbnum_ngb;ngb++){ + const cellarray_index_particles_DOUBLE *second = first->ngb_cells[ngb]; + if(second->nelements == 0) { + continue; + } + const int same_cell = 0; + DOUBLE *x2 = second->x; + DOUBLE *y2 = second->y; + DOUBLE *z2 = second->z; + const weight_struct_DOUBLE *weights2 = &(second->weights); + DOUBLE off_xwrap = 0.0, off_ywrap = 0.0, off_zwrap = 0.0; + if(options->periodic) { + off_xwrap = first->xwrap[ngb]; + off_ywrap = first->ywrap[ngb]; + off_zwrap = first->zwrap[ngb]; + } + const int64_t N2 = second->nelements; + DOUBLE *this_savg = NULL; + DOUBLE *this_weightavg = NULL; + if(options->need_avg_sep) { + this_savg = savg; + } + if(need_weightavg) { + this_weightavg = weightavg; + } + const int status = countpairs_s_mu_function_DOUBLE(N1, x1, y1, z1, weights1, + N2, x2, y2, z2, weights2, same_cell, + sqr_smax, sqr_smin, nsbin, nmu_bins, supp_sqr, mu_max, pimax, + off_xwrap, off_ywrap, off_zwrap, + this_savg, npairs, + this_weightavg, extra->weight_method); + /* This actually causes a race condition under OpenMP - but mostly + I care that an error occurred - rather than the exact value of + the error status */ + abort_status |= status; + }//loop over ngb cells + } + }//index1 loop over totncells + +#if defined(_OPENMP) + for(int i=0;ineed_avg_sep) { + all_savg[tid][i] = savg[i]; + } + if(need_weightavg) { + all_weightavg[tid][i] = weightavg[i]; + } + } + }//close the omp parallel region +#endif + + free_cellarray_index_particles_DOUBLE(lattice1,totncells); + if(autocorr == 0) { + free_cellarray_index_particles_DOUBLE(lattice2,totncells); + } + if(abort_status != EXIT_SUCCESS || interrupt_status_DDsmu_DOUBLE != EXIT_SUCCESS) { + /* Cleanup memory here if aborting */ + free(supp); +#if defined(_OPENMP) + matrix_free((void **) all_npairs, numthreads); + if(options->need_avg_sep) { + matrix_free((void **) all_savg, numthreads); + } + if(need_weightavg) { + matrix_free((void **) all_weightavg, numthreads); + } +#endif + return EXIT_FAILURE; + } + + if(options->verbose) { + finish_myprogressbar(&interrupted); + } + +#if defined(_OPENMP) + uint64_t npairs[totnbins]; + DOUBLE savg[totnbins]; + DOUBLE weightavg[totnbins]; + + for(int i=0;ineed_avg_sep) { + savg[i] = 0.0; + } + if(need_weightavg) { + weightavg[i] = 0.0; + } + } + + for(int i=0;ineed_avg_sep) { + savg[j] += all_savg[i][j]; + } + if(need_weightavg) { + weightavg[j] += all_weightavg[i][j]; + } + } + } + matrix_free((void **) all_npairs, numthreads); + if(options->need_avg_sep) { + matrix_free((void **) all_savg, numthreads); + } + if(need_weightavg) { + matrix_free((void **) all_weightavg, numthreads); + } +#endif + + + //The code does not double count for autocorrelations + //which means the npairs and savg values need to be doubled; + if(autocorr == 1) { + const uint64_t int_fac = 2; + const DOUBLE dbl_fac = (DOUBLE) 2.0; + for(int i=0;ineed_avg_sep) { + savg[i] *= dbl_fac; + } + if(need_weightavg) { + weightavg[i] *= dbl_fac; + } + } + + /* Is the min. requested separation 0.0 ?*/ + /* The comparison is '<=' rather than '==' only to silence + the compiler */ + if(supp[0] <= 0.0) { + int index = (nmu_bins + 1);//first valid s bin (with 0-dpi depth in pi) + /* Then, add all the self-pairs. This ensures that + a cross-correlation with two identical datasets + produces the same result as the auto-correlation */ + npairs[index] += ND1; + + // Increasing npairs affects savg and weightavg. + // We don't need to add anything to savg; all the self-pairs have 0 separation! + // The self-pairs have non-zero weight, though. So, fix that here. + if(need_weightavg){ + // Keep in mind this is an autocorrelation (i.e. only one particle set to consider) + weight_func_t_DOUBLE weight_func = get_weight_func_by_method_DOUBLE(extra->weight_method); + pair_struct_DOUBLE pair = {.num_weights = extra->weights0.num_weights, + .dx.d=0., .dy.d=0., .dz.d=0., // always 0 separation + .parx.d=0., .pary.d=0., .parz.d=0.}; + for(int j = 0; j < ND1; j++){ + for(int w = 0; w < pair.num_weights; w++){ + pair.weights0[w].d = ((DOUBLE *) extra->weights0.weights[w])[j]; + pair.weights1[w].d = ((DOUBLE *) extra->weights0.weights[w])[j]; + } + weightavg[1] += weight_func(&pair); + } + } + } + } + + + for(int i=0;i 0) { + if(options->need_avg_sep) { + savg[i] /= (DOUBLE) npairs[i] ; + } + if(need_weightavg) { + weightavg[i] /= (DOUBLE) npairs[i]; + } + } + } + + + //Pack in the results + results->nsbin = nsbin; + results->nmu_bins = nmu_bins; + results->mu_max = max_mu;//NOTE max_mu which is double and not mu_max (which might be float) + results->mu_min = ZERO; + results->npairs = my_malloc(sizeof(uint64_t), totnbins); + results->supp = my_malloc(sizeof(double) , nsbin); + results->savg = my_malloc(sizeof(double) , totnbins); + results->weightavg = my_calloc(sizeof(double) , totnbins); + if(results->npairs == NULL || results->supp == NULL || + results->savg == NULL || results->weightavg == NULL) { + free_results_s_mu(results); + free(supp); + return EXIT_FAILURE; + } + + for(int i=0;isupp[i] = supp[i]; + for(int j=0;j= totnbins) { + fprintf(stderr,"ERROR: In %s> Bin index = %d must lie within range [0, %"PRId64") (possible int overflow)\n", + __FUNCTION__, index, totnbins); + return EXIT_FAILURE; + } + + results->npairs[index] = npairs[index]; + results->savg[index] = 0.0; + results->weightavg[index] = 0.0; + if(options->need_avg_sep){ + results->savg[index] = savg[index]; + } + if(need_weightavg) { + results->weightavg[index] = weightavg[index]; + } + } + } + free(supp); + + /* reset interrupt handlers to default */ + RESET_INTERRUPT_HANDLERS(); + reset_bin_refine_factors(options); + + if(options->c_api_timer) { + struct timeval t1; + gettimeofday(&t1, NULL); + options->c_api_time = ADD_DIFF_TIME(t0, t1); + } + + return EXIT_SUCCESS; +} diff --git a/theory/DDsmu/countpairs_s_mu_impl.h.src b/theory/DDsmu/countpairs_s_mu_impl.h.src new file mode 100644 index 00000000..7194b827 --- /dev/null +++ b/theory/DDsmu/countpairs_s_mu_impl.h.src @@ -0,0 +1,48 @@ +// # -*- mode: c -*- +/* File: countpairs_s_mu_impl.h.src */ +/* + This file is a part of the Corrfunc package + Copyright (C) 2015-- Manodeep Sinha (manodeep@gmail.com) + License: MIT LICENSE. See LICENSE file under the top-level + directory at https://github.com/manodeep/Corrfunc/ +*/ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include "defs.h" //for struct config_options +#include "weight_defs_DOUBLE.h" +#include //for uint64_t + +#include "countpairs_s_mu.h"//for struct results_countpairs_s_mu + + extern void interrupt_handler_countpairs_s_mu_DOUBLE(int signo); + + typedef int (*countpairs_s_mu_func_ptr_DOUBLE)(const int64_t N0, DOUBLE *x0, DOUBLE *y0, DOUBLE *z0, const weight_struct_DOUBLE *weights0, + const int64_t N1, DOUBLE *x1, DOUBLE *y1, DOUBLE *z1, const weight_struct_DOUBLE *weights1, const int same_cell, + const DOUBLE sqr_smax, const DOUBLE sqr_smin, const int nsbin, const int nmu_bins, + const DOUBLE *supp_sqr, const DOUBLE mu_max, const DOUBLE pimax, + const DOUBLE off_xwrap, const DOUBLE off_ywrap, const DOUBLE off_zwrap, + DOUBLE *src_savg, uint64_t *src_npairs, + DOUBLE *src_weightavg, const weight_method_t weight_method); + + + extern countpairs_s_mu_func_ptr_DOUBLE countpairs_s_mu_driver_DOUBLE(const struct config_options *options) __attribute__((warn_unused_result)); + + extern int countpairs_s_mu_DOUBLE(const int64_t ND1, DOUBLE *X1, DOUBLE *Y1, DOUBLE *Z1, + const int64_t ND2, DOUBLE *X2, DOUBLE *Y2, DOUBLE *Z2, + const int numthreads, + const int autocorr, + const char *sbinfile, + const double mu_max, + const int nmu_bins, + results_countpairs_s_mu *results, + struct config_options *options, + struct extra_options *extra); + +#ifdef __cplusplus +} +#endif diff --git a/theory/DDsmu/countpairs_s_mu_kernels.c.src b/theory/DDsmu/countpairs_s_mu_kernels.c.src new file mode 100644 index 00000000..72bf8044 --- /dev/null +++ b/theory/DDsmu/countpairs_s_mu_kernels.c.src @@ -0,0 +1,787 @@ +// # -*- mode: c -*- +/* File: countpairs_s_mu_kernels.c.src */ +/* + This file is a part of the Corrfunc package + Copyright (C) 2015-- Manodeep Sinha (manodeep@gmail.com) + License: MIT LICENSE. See LICENSE file under the top-level + directory at https://github.com/manodeep/Corrfunc/ +*/ + + +#include +#include +#include +#include + +#include "function_precision.h" +#include "utils.h" + +#include "weight_functions_DOUBLE.h" + + +#if defined(__AVX__) +#include "avx_calls.h" + +static inline int countpairs_s_mu_avx_intrinsics_DOUBLE(const int64_t N0, DOUBLE *x0, DOUBLE *y0, DOUBLE *z0, const weight_struct_DOUBLE *weights0, + const int64_t N1, DOUBLE *x1, DOUBLE *y1, DOUBLE *z1, const weight_struct_DOUBLE *weights1, const int same_cell, + const DOUBLE sqr_smax, const DOUBLE sqr_smin, const int nsbin, + const int nmu_bins, const DOUBLE *supp_sqr, const DOUBLE mu_max, const DOUBLE pimax, + const DOUBLE off_xwrap, const DOUBLE off_ywrap, const DOUBLE off_zwrap, + DOUBLE *src_savg, uint64_t *src_npairs, + DOUBLE *src_weightavg, const weight_method_t weight_method) +{ + if(N0 == 0 || N1 == 0) { + return EXIT_SUCCESS; + } + + if(src_npairs == NULL) { + return EXIT_FAILURE; + } + + const int32_t need_savg = src_savg != NULL; + const int32_t need_weightavg = src_weightavg != NULL; + + const int64_t totnbins = (nmu_bins+1)*(nsbin+1); + uint64_t npairs[totnbins]; + DOUBLE savg[totnbins], weightavg[totnbins]; + for(int64_t i=0;i -pimax) break; + z1++; n_off++; + } + if(prev_j == N1) { + i = N0; + break; + } + j = prev_j; + } + DOUBLE *localz1 = z1; + DOUBLE *localx1 = x1 + n_off; + DOUBLE *localy1 = y1 + n_off; + for(int w = 0; w < local_w1.num_weights; w++){ + local_w1.weights[w] = weights1->weights[w] + n_off; + } + + for(;j<=(N1 - AVX_NVEC);j+=AVX_NVEC) { + const AVX_FLOATS m_xpos = AVX_SET_FLOAT(xpos); + const AVX_FLOATS m_ypos = AVX_SET_FLOAT(ypos); + const AVX_FLOATS m_zpos = AVX_SET_FLOAT(zpos); + + union int8 { + AVX_INTS m_ibin; + int ibin[AVX_NVEC]; + }; + union int8 union_finalbin; + union float8{ + AVX_FLOATS m_Dperp; + DOUBLE Dperp[AVX_NVEC]; + }; + union float8 union_mDperp; + + + const AVX_FLOATS m_x1 = AVX_LOAD_FLOATS_UNALIGNED(localx1); + const AVX_FLOATS m_y1 = AVX_LOAD_FLOATS_UNALIGNED(localy1); + const AVX_FLOATS m_z1 = AVX_LOAD_FLOATS_UNALIGNED(localz1); + + localx1 += AVX_NVEC;//this might actually exceed the allocated range but we will never dereference that + localy1 += AVX_NVEC; + localz1 += AVX_NVEC; + + for(int w = 0; w < pair.num_weights; w++){ + pair.weights1[w].a = AVX_LOAD_FLOATS_UNALIGNED(local_w1.weights[w]); + local_w1.weights[w] += AVX_NVEC; + } + + union float8_weights{ + AVX_FLOATS m_weights; + DOUBLE weights[NVEC]; + }; + union float8_weights union_mweight; + + const AVX_FLOATS m_pimax = AVX_SET_FLOAT((DOUBLE) pimax); + const AVX_FLOATS m_sqr_smax = m_supp_sqr[nsbin-1]; + const AVX_FLOATS m_sqr_smin = m_supp_sqr[0]; + const AVX_FLOATS m_inv_dmu = AVX_SET_FLOAT(inv_dmu); + const AVX_FLOATS m_sqr_mumax = AVX_SET_FLOAT(sqr_mumax); + + const AVX_FLOATS m_zero = AVX_SET_FLOAT(ZERO); + const AVX_FLOATS m_nmu_bins = AVX_SET_FLOAT((DOUBLE) nmu_bins); + const AVX_FLOATS m_one = AVX_SET_FLOAT((DOUBLE) 1); + + const AVX_FLOATS m_xdiff = AVX_SUBTRACT_FLOATS(m_x1, m_xpos); //(x[j] - x0) + const AVX_FLOATS m_ydiff = AVX_SUBTRACT_FLOATS(m_y1, m_ypos); //(y[j] - y0) + AVX_FLOATS m_zdiff = AVX_SUBTRACT_FLOATS(m_z1, m_zpos); //z2[j:j+NVEC-1] - z1 + + const AVX_FLOATS m_sqr_xdiff = AVX_SQUARE_FLOAT(m_xdiff); //(x0 - x[j])^2 + const AVX_FLOATS m_sqr_ydiff = AVX_SQUARE_FLOAT(m_ydiff); //(y0 - y[j])^2 + const AVX_FLOATS m_sqr_zdiff = AVX_SQUARE_FLOAT(m_zdiff); //(z0 - z[j])^2 + + AVX_FLOATS s2 = AVX_ADD_FLOATS(m_sqr_zdiff, AVX_ADD_FLOATS(m_sqr_xdiff, m_sqr_ydiff));//s^2 = dz^2 + dx^2 + dy^2 + m_zdiff = AVX_MAX_FLOATS(m_zdiff,AVX_SUBTRACT_FLOATS(m_zero,m_zdiff));//dz = fabs(dz) => dz = max(dz, -dz); + + AVX_FLOATS m_mask_left; + AVX_FLOATS max_sqr_dz = AVX_MULTIPLY_FLOATS(s2, m_sqr_mumax); + + //Do all the distance cuts using masks here in new scope + { + //the z2 arrays are sorted in increasing order. which means + //the z2 value will increase in any future iteration of j. + //that implies the zdiff values are also monotonically increasing + //Therefore, if none of the zdiff values are less than pimax, then + //no future iteration in j can produce a zdiff value less than pimax. + AVX_FLOATS m_mask_pimax = AVX_COMPARE_FLOATS(m_zdiff,m_pimax,_CMP_LT_OS); + if(AVX_TEST_COMPARISON(m_mask_pimax) == 0) { + j=N1; + break; + } + + const AVX_FLOATS m_mu_mask = AVX_COMPARE_FLOATS(m_sqr_zdiff, max_sqr_dz, _CMP_LT_OS); + const AVX_FLOATS m_smax_mask = AVX_COMPARE_FLOATS(s2, m_sqr_smax, _CMP_LT_OS);//check for s2 < sqr_smax + const AVX_FLOATS m_smin_mask = AVX_COMPARE_FLOATS(s2, m_sqr_smin, _CMP_GE_OS);//check for s2 >= sqr_smin + const AVX_FLOATS m_s2_mask = AVX_BITWISE_AND(m_smax_mask,m_smin_mask); + + //Create a combined mask by bitwise and of m1 and m_mask_left. + //This gives us the mask for all sqr_smin <= s2 < sqr_smax + // + mu_min <= mu < mu_max + m_mask_left = AVX_BITWISE_AND(m_mu_mask, m_s2_mask); + + //If not, continue with the next iteration of j-loop + if(AVX_TEST_COMPARISON(m_mask_left) == 0) { + continue; + } + + } + + //There is some s2 that satisfies sqr_smin <= s2 < sqr_smax && mu_min <= |dz| < mu_max + s2 = AVX_BLEND_FLOATS_WITH_MASK(m_sqr_smax, s2, m_mask_left); + /*m_mu := sqrt(s2/dz^2) (with masked elements set to mu_max */ + const AVX_FLOATS m_mu = AVX_SQRT_FLOAT(AVX_BLEND_FLOATS_WITH_MASK(m_sqr_mumax, AVX_DIVIDE_FLOATS(m_sqr_zdiff, s2), m_mask_left)); + + if(need_savg) { + union_mDperp.m_Dperp = AVX_SQRT_FLOAT(s2); + } + if(need_weightavg){ + pair.dx.a = m_xdiff; + pair.dy.a = m_ydiff; + pair.dz.a = m_zdiff; + + union_mweight.m_weights = avx_weight_func(&pair); + } + + const AVX_FLOATS m_mubin = AVX_MULTIPLY_FLOATS(m_mu,m_inv_dmu); + AVX_FLOATS m_sbin = AVX_SET_FLOAT((DOUBLE) 0); + //AVX_FLOATS m_all_ones = AVX_CAST_INT_TO_FLOAT(AVX_SET_INT(-1)); + for(int kbin=nsbin-1;kbin>=1;kbin--) { + const AVX_FLOATS m_mask_low = AVX_COMPARE_FLOATS(s2,m_supp_sqr[kbin-1],_CMP_GE_OS); + const AVX_FLOATS m_bin_mask = AVX_BITWISE_AND(m_mask_low,m_mask_left); + m_sbin = AVX_BLEND_FLOATS_WITH_MASK(m_sbin,m_kbin[kbin], m_bin_mask); + m_mask_left = AVX_COMPARE_FLOATS(s2, m_supp_sqr[kbin-1],_CMP_LT_OS); + //m_mask_left = AVX_XOR_FLOATS(m_mask_low, m_all_ones);//XOR with 0xFFFF... gives the bins that are smaller than m_supp_sqr[kbin] (and is faster than cmp_p(s/d) in theory) + const int test = AVX_TEST_COMPARISON(m_mask_left); + if(test==0) { + break; + } + } + const AVX_FLOATS m_nmu_bins_p1 = AVX_ADD_FLOATS(m_nmu_bins,m_one); + const AVX_FLOATS m_binproduct = AVX_ADD_FLOATS(AVX_MULTIPLY_FLOATS(m_sbin,m_nmu_bins_p1),m_mubin); + union_finalbin.m_ibin = AVX_TRUNCATE_FLOAT_TO_INT(m_binproduct); + + //update the histograms +#if defined(__ICC) || defined(__INTEL_COMPILER) +#pragma unroll(AVX_NVEC) +#endif + for(int jj=0;jj= pimax) { + break; + } + + const DOUBLE sqr_dx_dy = dx*dx + dy*dy; + const DOUBLE sqr_dz = dz*dz; + const DOUBLE s2 = sqr_dx_dy + sqr_dz; + if(s2 >= sqr_smax || s2 < sqr_smin) + continue; + if(sqr_dz >= s2 * sqr_mumax) continue; + const DOUBLE mu = SQRT(sqr_dz/s2); + + DOUBLE s, pairweight; + if(need_savg) { + s = SQRT(s2); + } + if(need_weightavg){ + pair.dx.d = dx; + pair.dy.d = dy; + pair.dz.d = dz; + pairweight = fallback_weight_func(&pair); + } + + int mu_bin = (int) (mu*inv_dmu); + mu_bin = mu_bin > nmu_bins ? nmu_bins:mu_bin; + for(int kbin=nsbin-1;kbin>=1;kbin--) { + if(s2 >= supp_sqr[kbin-1]) { + const int ibin = kbin*(nmu_bins+1) + mu_bin; + npairs[ibin]++; + if(need_savg) { + savg[ibin] += s; + } + if(need_weightavg){ + weightavg[ibin] += pairweight; + } + break; + } + } + }//remainder loop over second set of particles + }//loop over first set of particles + + for(int i=0;i -pimax) break; + z1++; n_off++; + } + if(prev_j == N1) { + i = N0; + break; + } + j = prev_j; + } + DOUBLE *localz1 = z1; + DOUBLE *localx1 = x1 + n_off; + DOUBLE *localy1 = y1 + n_off; + for(int w = 0; w < local_w1.num_weights; w++){ + local_w1.weights[w] = weights1->weights[w] + n_off; + } + + for(;j<=(N1 - SSE_NVEC);j+=SSE_NVEC){ + + union int4{ + SSE_INTS m_ibin; + int ibin[SSE_NVEC]; + }; + union int4 union_finalbin; + + union float4{ + SSE_FLOATS m_Dperp; + DOUBLE Dperp[SSE_NVEC]; + }; + union float4 union_mDperp; + + const SSE_FLOATS m_xpos = SSE_SET_FLOAT(xpos); + const SSE_FLOATS m_ypos = SSE_SET_FLOAT(ypos); + const SSE_FLOATS m_zpos = SSE_SET_FLOAT(zpos); + + const SSE_FLOATS m_x1 = SSE_LOAD_FLOATS_UNALIGNED(localx1); + const SSE_FLOATS m_y1 = SSE_LOAD_FLOATS_UNALIGNED(localy1); + const SSE_FLOATS m_z1 = SSE_LOAD_FLOATS_UNALIGNED(localz1); + + localx1 += SSE_NVEC; + localy1 += SSE_NVEC; + localz1 += SSE_NVEC; + + for(int w = 0; w < pair.num_weights; w++){ + pair.weights1[w].s = SSE_LOAD_FLOATS_UNALIGNED(local_w1.weights[w]); + local_w1.weights[w] += SSE_NVEC; + } + + union float4_weights{ + SSE_FLOATS m_weights; + DOUBLE weights[SSE_NVEC]; + }; + union float4_weights union_mweight; + + const SSE_FLOATS m_pimax = SSE_SET_FLOAT((DOUBLE) pimax); + const SSE_FLOATS m_sqr_smax = m_supp_sqr[nsbin-1]; + const SSE_FLOATS m_sqr_smin = m_supp_sqr[0]; + const SSE_FLOATS m_sqr_mumax = SSE_SET_FLOAT(sqr_mumax); + const SSE_FLOATS m_inv_dmu = SSE_SET_FLOAT(inv_dmu); + const SSE_FLOATS m_zero = SSE_SET_FLOAT(ZERO); + const SSE_FLOATS m_nmu_bins = SSE_SET_FLOAT((DOUBLE) nmu_bins); + const SSE_FLOATS m_one = SSE_SET_FLOAT((DOUBLE) 1); + + const SSE_FLOATS m_xdiff = SSE_SUBTRACT_FLOATS(m_x1, m_xpos); //(x[j] - x0) + const SSE_FLOATS m_ydiff = SSE_SUBTRACT_FLOATS(m_y1, m_ypos); //(y[j] - y0) + SSE_FLOATS m_zdiff = SSE_SUBTRACT_FLOATS(m_z1, m_zpos); //z2[j:j+NVEC-1] - z1 + + const SSE_FLOATS m_sqr_xdiff = SSE_SQUARE_FLOAT(m_xdiff); + const SSE_FLOATS m_sqr_ydiff = SSE_SQUARE_FLOAT(m_ydiff); + const SSE_FLOATS m_sqr_zdiff = SSE_SQUARE_FLOAT(m_zdiff); + + SSE_FLOATS s2 = SSE_ADD_FLOATS(m_sqr_zdiff, SSE_ADD_FLOATS(m_sqr_xdiff, m_sqr_ydiff));//s^2 = dx^2 + dy^2 + dz^2 + m_zdiff = SSE_MAX_FLOATS(m_zdiff,SSE_SUBTRACT_FLOATS(m_zero,m_zdiff));//dz = fabs(dz) => dz = max(dz, -dz); + + SSE_FLOATS m_mask_left; + SSE_FLOATS max_sqr_dz = SSE_MULTIPLY_FLOATS(s2, m_sqr_mumax); + + //Do all the distance cuts using masks here in new scope + { + //the z2 arrays are sorted in increasing order. which means + //the z2 value will increase in any future iteration of j. + //that implies the zdiff values are also monotonically increasing + //Therefore, if none of the zdiff values are less than pimax, then + //no future iteration in j can produce a zdiff value less than pimax. + SSE_FLOATS m_mask_pimax = SSE_COMPARE_FLOATS_LT(m_zdiff,m_pimax); + if(SSE_TEST_COMPARISON(m_mask_pimax) == 0) { + j=N1; + break; + } + + const SSE_FLOATS m_mu_mask = SSE_COMPARE_FLOATS_LT(m_sqr_zdiff, max_sqr_dz); + const SSE_FLOATS m_smax_mask = SSE_COMPARE_FLOATS_LT(s2, m_sqr_smax); + const SSE_FLOATS m_smin_mask = SSE_COMPARE_FLOATS_GE(s2, m_sqr_smin); + const SSE_FLOATS m_s2_mask = SSE_BITWISE_AND(m_smax_mask,m_smin_mask); + + //Create a combined mask by bitwise and of m1 and m_mask_left. + //This gives us the mask for all sqr_smin <= s2 < sqr_smax + // + mu_min <= mu < mu_max + m_mask_left = SSE_BITWISE_AND(m_mu_mask, m_s2_mask); + + //If not, continue with the next iteration of j-loop + if(SSE_TEST_COMPARISON(m_mask_left) == 0) { + continue; + } + + } + + //There is some s2 that satisfies sqr_smin <= s2 < sqr_smax && mu_min <= |dz| < mu_max + s2 = SSE_BLEND_FLOATS_WITH_MASK(m_sqr_smax, s2, m_mask_left); + const SSE_FLOATS m_mu = SSE_SQRT_FLOAT(SSE_BLEND_FLOATS_WITH_MASK(m_sqr_mumax, SSE_DIVIDE_FLOATS(m_sqr_zdiff, s2), m_mask_left)); + + if(need_savg) { + union_mDperp.m_Dperp = SSE_SQRT_FLOAT(s2); + } + if(need_weightavg){ + pair.dx.s = m_xdiff; + pair.dy.s = m_ydiff; + pair.dz.s = m_zdiff; + + union_mweight.m_weights = sse_weight_func(&pair); + } + + const SSE_FLOATS m_mubin = SSE_MULTIPLY_FLOATS(m_mu,m_inv_dmu); + SSE_FLOATS m_sbin = SSE_SET_FLOAT((DOUBLE) 0); + //SSE_FLOATS m_all_ones = SSE_CAST_INT_TO_FLOAT(SSE_SET_INT(-1)); + for(int kbin=nsbin-1;kbin>=1;kbin--) { + const SSE_FLOATS m_mask_low = SSE_COMPARE_FLOATS_GE(s2,m_supp_sqr[kbin-1]); + const SSE_FLOATS m_bin_mask = SSE_BITWISE_AND(m_mask_low,m_mask_left); + m_sbin = SSE_BLEND_FLOATS_WITH_MASK(m_sbin,m_kbin[kbin], m_bin_mask); + m_mask_left = SSE_COMPARE_FLOATS_LT(s2, m_supp_sqr[kbin-1]); + //XOR with 0xFFFF... gives the bins that are smaller than m_supp_sqr[kbin] (and is faster than cmp_p(s/d) in theory) + //m_mask_left = SSE_XOR_FLOATS(m_mask_low, m_all_ones); + const int test = SSE_TEST_COMPARISON(m_mask_left); + if(test==0) { + break; + } + } + const SSE_FLOATS m_nmu_bins_p1 = SSE_ADD_FLOATS(m_nmu_bins,m_one); + const SSE_FLOATS m_binproduct = SSE_ADD_FLOATS(SSE_MULTIPLY_FLOATS(m_sbin,m_nmu_bins_p1),m_mubin); + union_finalbin.m_ibin = SSE_TRUNCATE_FLOAT_TO_INT(m_binproduct); + + //update the histograms +#if defined(__ICC) || defined(__INTEL_COMPILER) +#pragma unroll(SSE_NVEC) +#endif + for(int jj=0;jj= pimax) break; + + const DOUBLE sqr_dx_dy = dx*dx + dy*dy; + const DOUBLE sqr_dz = dz*dz; + const DOUBLE s2 = sqr_dx_dy + sqr_dz; + if(s2 >= sqr_smax || s2 < sqr_smin) + continue; + if(sqr_dz >= s2 * sqr_mumax) continue; + const DOUBLE mu = SQRT(sqr_dz/s2); + + DOUBLE s, pairweight; + if(need_weightavg){ + pair.dx.d = dx; + pair.dy.d = dy; + pair.dz.d = dz; + pairweight = fallback_weight_func(&pair); + } + + if(need_savg) { + s = SQRT(s2); + } + + int mu_bin = (int) (mu*inv_dmu); + mu_bin = mu_bin > nmu_bins ? nmu_bins:mu_bin; + for(int kbin=nsbin-1;kbin>=1;kbin--) { + if(s2 >= supp_sqr[kbin-1]) { + const int ibin = kbin*(nmu_bins+1) + mu_bin; + npairs[ibin]++; + if(need_savg) { + savg[ibin] += s; + } + if(need_weightavg){ + weightavg[ibin] += pairweight; + } + break; + } + }//searching for kbin + } + } + + for(int i=0;i 0) { + /*Particles are sorted on 'z', in increasing order */ + const DOUBLE dz = *z1 - zpos; + if(dz > -pimax) break; + z1++; n_off++; + nleft--; + } + /*If no particle in the second cell satisfies distance constraints on 'dz' for the current 'i'th particle in first cell, + then there can be no more pairs from any particles in the first cell (since the first cell is also sorted in increasing order in 'z') + */ + if(nleft == 0) { + i=N0; + break; + } + } + DOUBLE *localz1 = z1; + DOUBLE *localx1 = x1 + n_off; + DOUBLE *localy1 = y1 + n_off; + for(int w = 0; w < pair.num_weights; w++){ + local_w1.weights[w] = weights1->weights[w] + n_off; + } + + for(int64_t j=0;j= pimax) break; + + const DOUBLE sqr_dx_dy = dx*dx + dy*dy; + const DOUBLE sqr_dz = dz*dz; + const DOUBLE s2 = sqr_dx_dy + sqr_dz; + if(s2 >= sqr_smax || s2 < sqr_smin) { + continue; + } + + if(sqr_dz >= s2 * sqr_mu_max) { + continue; + } + const DOUBLE mu = SQRT(sqr_dz/s2); + + DOUBLE s, pairweight; + if(need_savg) { + s = SQRT(s2); + } + + if(need_weightavg){ + pair.dx.d = dx; + pair.dy.d = dy; + pair.dz.d = dz; + pairweight = weight_func(&pair); + } + + int mu_bin = (int) (mu*inv_dmu); + mu_bin = mu_bin > nmu_bins ? nmu_bins:mu_bin; + for(int kbin=nsbin-1;kbin>=1;kbin--) { + if(s2 >= supp_sqr[kbin-1]) { + const int ibin = kbin*(nmu_bins+1) + mu_bin; + npairs[ibin]++; + if(need_savg) { + savg[ibin] += s; + } + if(need_weightavg){ + weightavg[ibin] += pairweight; + } + break; + } + } + } + } + for(int i=0;i>> from Corrfunc._countpairs import countpairs\n" - ">>> from Corrfunc.io import read_catalog\n" + ">>> from Corrfunc.io import read_catalog\n" ">>> x,y,z = read_catalog()\n" ">>> autocorr=1\n" ">>> nthreads=2\n" @@ -229,7 +232,7 @@ static PyMethodDef module_methods[] = { "\n" "autocorr: boolean, required\n" " Boolean flag for auto/cross-correlation. If autocorr is set to 1,\n" - " are not used (but must still be passed, perhaps again as X1/Y1/Z1).\n" + " are not used (but must still be passed, perhaps again as X1/Y1/Z1).\n" "\n" "nthreads: integer\n" " The number of OpenMP threads to use. Has no effect if OpenMP was not\n" @@ -248,17 +251,17 @@ static PyMethodDef module_methods[] = { " contain white-space separated values of (rpmin, rpmax) for each\n" " ``rp`` wanted. The bins do not need to be contiguous but must be in\n" " increasing order (smallest bins come first). \n\n" - + "X1/Y1/Z1 : array-like, real (float/double)\n" " The array of X/Y/Z positions for the first set of points.\n" " Calculations are done in the precision of the supplied arrays.\n" "\n" "weights1 : array-like, real (float/double), shape (n_particles,) or (n_weights_per_particle,n_particles), optional\n" " Weights for computing a weighted pair count.\n\n" - + "weight_type : str, optional\n" " The type of pair weighting to apply.\n" - " Options: \"pair_product\", None\n" + " Options: \"pair_product\", None\n" " Default: None.\n\n" "periodic : boolean\n" @@ -295,7 +298,7 @@ static PyMethodDef module_methods[] = { " Controls the maximum number of cells per dimension. Total number of cells \n" " can be up to (max_cells_per_dim)^3. Only increase if ``rmax`` is too small \n" " relative to the boxsize (and increasing helps the runtime). \n\n" - + "c_api_timer : boolean (default false)\n" " Boolean flag to measure actual time spent in the C libraries. Here\n" " to allow for benchmarking and scaling studies.\n" @@ -325,7 +328,7 @@ static PyMethodDef module_methods[] = { " will be set to 0.0 for all bins; similarly for ``weight_avg``. ``npairs`` contains the number of pairs\n" " in that bin and can be used to compute the actual wp("RP_CHAR") by\n" " combining with (DR, RR) counts.\n" - "\n" + "\n" "time : if ``c_api_timer`` is set, then the return value contains the time spent\n" " in the API; otherwise time is set to 0.0\n" "\n" @@ -333,7 +336,7 @@ static PyMethodDef module_methods[] = { "--------\n" "\n" ">>> from Corrfunc._countpairs import countpairs_rp_pi\n" - ">>> from Corrfunc.io import read_catalog\n" + ">>> from Corrfunc.io import read_catalog\n" ">>> x,y,z = read_catalog()\n" ">>> autocorr=1\n" ">>> nthreads=2\n" @@ -399,10 +402,10 @@ static PyMethodDef module_methods[] = { "\n" "weights : array-like, real (float/double), shape (n_particles,) or (n_weights_per_particle,n_particles), optional\n" " Weights for computing a weighted correlation function.\n\n" - + "weight_type : str, optional\n" " The type of pair weighting to apply.\n" - " Options: \"pair_product\", None\n" + " Options: \"pair_product\", None\n" " Default: None.\n\n" "verbose : boolean (default false)\n" @@ -461,7 +464,7 @@ static PyMethodDef module_methods[] = { " ``rpavg`` will be set to 0.0 for all bins; similarly for ``weight_avg``. ``wp`` contains the projected\n" " correlation function while ``npairs`` contains the number of unique pairs\n" " in that bin. If weight are used, then ``wp`` is weighted, while ``npairs`` is not.\n" - "\n" + "\n" "time : if ``c_api_timer`` is set, then the return value contains the time spent\n" " in the API; otherwise time is set to 0.0\n" "\n" @@ -476,7 +479,7 @@ static PyMethodDef module_methods[] = { "--------\n" "\n" ">>> from _countpairs import countpairs_wp\n" - ">>> from Corrfunc.io import read_catalog\n" + ">>> from Corrfunc.io import read_catalog\n" ">>> x,y,z = read_catalog()\n" ">>> nthreads=2\n" ">>> pimax=40.0\n" @@ -531,10 +534,10 @@ static PyMethodDef module_methods[] = { "\n" "weights : array-like, real (float/double), shape (n_particles,) or (n_weights_per_particle,n_particles), optional\n" " Weights for computing a weighted correlation function.\n\n" - + "weight_type : str, optional\n" " The type of pair weighting to apply.\n" - " Options: \"pair_product\", None\n" + " Options: \"pair_product\", None\n" " Default: None.\n\n" "verbose : boolean (default false)\n" @@ -585,7 +588,7 @@ static PyMethodDef module_methods[] = { " ``ravg`` will be set to 0.0 for all bins; similarly for ``weightavg``. ``xi`` contains the projected\n" " correlation function while ``npairs`` contains the number of unique pairs\n" " in that bin. If weights are used, then ``xi`` is weighted, while ``npairs`` is not.\n" - "\n" + "\n" "time : if ``c_api_timer`` is set, then the return value contains the time spent\n" " in the API; otherwise time is set to 0.0\n" "\n" @@ -593,7 +596,7 @@ static PyMethodDef module_methods[] = { "--------\n" "\n" ">>> from _countpairs import countpairs_xi\n" - ">>> from Corrfunc.io import read_catalog\n" + ">>> from Corrfunc.io import read_catalog\n" ">>> x,y,z = read_catalog()\n" ">>> nthreads=2\n" ">>> boxsize = 420.0\n" @@ -601,6 +604,153 @@ static PyMethodDef module_methods[] = { " x, y, z, verbose=True, output_ravg=True)\n" "\n" }, + {"countpairs_s_mu" ,(PyCFunction) countpairs_countpairs_s_mu ,METH_VARARGS | METH_KEYWORDS, + "countpairs_s_mu(autocorr, nthreads, binfile, mu_max, nmu_bins, X1, Y1, Z1, weights1=None, weight_type=None,\n" + " periodic=True, X2=None, Y2=None, Z2=None, weights2=None, verbose=False,\n" + " boxsize=0.0, output_savg=False, xbin_refine_factor=2, ybin_refine_factor=2,\n" + " zbin_refine_factor=1, max_cells_per_dim=100, c_api_timer=False, isa=-1)\n" + "\n" + "Calculate the 2-D pair-counts corresponding to the real-space correlation\n" + "function, "XI_CHAR"(s, "MU_CHAR"). Pairs which are separated\n" + "by less than the ``s`` bins (specified in ``binfile``) in the X-Y plane, and\n" + "less than ``s*mu_max`` in the Z-dimension are counted.\n\n" + + "Note, that this module only returns pair counts and not the actual\n" + "correlation function "XI_CHAR"(s, "MU_CHAR"). \n" + "Also note that the python wrapper for this extension: `Corrfunc.theory.DDsmu`\n" + "is more user-friendly.\n" + UNICODE_WARNING + "\n" + "Parameters\n" + "-----------\n" + "Every parameter can be passed as a keyword of the corresponding name.\n" + "\n" + "autocorr: boolean, required\n" + " Boolean flag for auto/cross-correlation. If autocorr is set to 1,\n" + " are not used (but must still be passed, perhaps again as X1/Y1/Z1).\n" + "\n" + "nthreads: integer\n" + " The number of OpenMP threads to use. Has no effect if OpenMP was not\n" + " enabled during library compilation.\n" + "\n" + "binfile : string\n" + " Filename specifying the ``s`` bins for ``DDsmu``. The file should\n" + " contain white-space separated values of (smin, smax) for each\n" + " ``s`` wanted. The bins must be contiguous and in\n" + " increasing order (smallest bins come first). \n" + "\n" + "mu_max: double. Must be in range (0.0, 1.0]\n" + " A double-precision value for the maximum cosine of the angular separation from\n" + " the line of sight (LOS). Here, LOS is taken to be along the Z direction.\n" + " Note that only pairs with ``0 <= cos("THETA_CHAR"_LOS) < mu_max``\n" + " are counted (no equality).\n\n" + "\n" + "nmu_bins: Integer. Must be at least 1\n" + " Number of bins for ``mu``\n\n" + "\n" + "X1/Y1/Z1 : array-like, real (float/double)\n" + " The array of X/Y/Z positions for the first set of points.\n" + " Calculations are done in the precision of the supplied arrays.\n" + "\n" + "weights1 : array-like, real (float/double), shape (n_particles,) or (n_weights_per_particle,n_particles), optional\n" + " Weights for computing a weighted pair count.\n\n" + + "weight_type : str, optional\n" + " The type of pair weighting to apply.\n" + " Options: \"pair_product\", None\n" + " Default: None.\n\n" + + "periodic : boolean\n" + " Boolean flag to indicate periodic boundary conditions.\n" + "\n" + + "X2/Y2/Z2 : array-like, real (float/double)\n" + " Array of XYZ positions for the second set of points. *Must* be the same\n" + " precision as the X1/Y1/Z1 arrays. Only required when ``autocorr==0``.\n" + "\n" + + "weights2\n : array-like, real (float/double), shape (n_particles,) or (n_weights_per_particle,n_particles), optional\n" + " Weights for computing a weighted pair count." + + "verbose : boolean (default false)\n" + " Boolean flag to control output of informational messages\n" + "\n" + + "boxsize : double\n" + " The side-length of the cube in the cosmological simulation.\n" + " Present to facilitate exact calculations for periodic wrapping.\n" + " If boxsize is not supplied, then the wrapping is done based on\n" + " the maximum difference within each dimension of the X/Y/Z arrays.\n" + "\n" + + "output_savg : boolean (default false)\n" + " Boolean flag to output the average ``s`` for each bin. Code will\n" + " run slower if you set this flag. Also, note, if you are calculating\n" + " in single-precision, ``s`` will suffer from numerical loss of\n" + " precision and can not be trusted. If you need accurate ``s``\n" + " values, then pass in double precision arrays for the particle positions.\n" + "\n" + + "(xyz)bin_refine_factor: integer (default (2,2,1) typical values in [1-3]) \n" + " Controls the refinement on the cell sizes. Can have up to a 20% impact \n" + " on runtime. \n\n" + + "max_cells_per_dim: integer (default 100, typical values in [50-300]) \n" + " Controls the maximum number of cells per dimension. Total number of cells \n" + " can be up to (max_cells_per_dim)^3. Only increase if ``rmax`` is too small \n" + " relative to the boxsize (and increasing helps the runtime). \n\n" + + "c_api_timer : boolean (default false)\n" + " Boolean flag to measure actual time spent in the C libraries. Here\n" + " to allow for benchmarking and scaling studies.\n" + "\n" + + "isa : integer (default -1)\n" + " Controls the runtime dispatch for the instruction set to use. Possible\n" + " options are: [-1, AVX, SSE42, FALLBACK]\n" + "\n" + " Setting isa to -1 will pick the fastest available instruction\n" + " set on the current computer. However, if you set ``isa`` to, say,\n" + " ``AVX`` and ``AVX`` is not available on the computer, then the code will\n" + " revert to using ``FALLBACK`` (even though ``SSE42`` might be available).\n" + "\n" + " Unless you are benchmarking the different instruction sets, you should\n" + " always leave ``isa`` to the default value. And if you *are* benchmarking,\n" + " then the integer values correspond to the ``enum`` for the instruction set\n" + " defined in ``utils/defs.h``.\n" + "\n" + + "Returns\n" + "--------\n" + "\n" + "A tuple (results, time) \n" + "\n" + "results : A python list\n" + " A python list containing ``nmu_bins`` of [smin, smax, savg, mu_max, npairs, weightavg]\n" + " for each spatial bin specified in the ``binfile``. There will be a total of ``nmu_bins``\n" + " ranging from [0, ``mu_max``) *per* spatial bin. If ``output_savg`` is not set, then ``savg``\n" + " will be set to 0.0 for all bins; similarly for ``weight_avg``. ``npairs`` \n" + " contains the number of pairs in that bin.\n" + "\n" + "time : if ``c_api_timer`` is set, then the return value contains the time spent\n" + " in the API; otherwise time is set to 0.0\n" + "\n" + + "Example\n" + "--------\n" + "\n" + ">>> from Corrfunc._countpairs import countpairs_s_mu\n" + ">>> from Corrfunc.io import read_catalog\n" + ">>> x,y,z = read_catalog()\n" + ">>> autocorr=1\n" + ">>> nthreads=2\n" + ">>> mu_max=1.0\n" + ">>> nmu_bins=40\n" + ">>> (DDsmu, time) = countpairs_s_mu(autocorr, nthreads, '../tests/bins', mu_max, nmu_bins, \n" + " x, y, z, X2=x, Y2=y, Z2=z,\n" + " verbose=True, output_savg=True)\n" + "\n" + }, {"countspheres_vpf" ,(PyCFunction) countpairs_countspheres_vpf ,METH_VARARGS | METH_KEYWORDS, "countspheres_vpf(rmax, nbins, nspheres, numpN, seed,\n" " X, Y, Z, verbose=False, periodic=True,\n" @@ -730,7 +880,7 @@ static PyObject *countpairs_error_out(PyObject *module, const char *msg) { #if PY_MAJOR_VERSION < 3 (void) module;//to avoid unused warning with python2 -#endif +#endif struct module_state *st = GETSTATE(module); PyErr_SetString(st->error, msg); @@ -786,7 +936,7 @@ PyMODINIT_FUNC init_countpairs(void) Py_DECREF(module); INITERROR; } - + /* Load `numpy` functionality. */ import_array(); @@ -803,24 +953,24 @@ PyMODINIT_FUNC init_countpairs(void) static int64_t check_dims_and_datatype(PyObject *module, PyArrayObject *x1_obj, PyArrayObject *y1_obj, PyArrayObject *z1_obj, PyArrayObject *weights1_obj, size_t *element_size) { char msg[1024]; - + const int check_weights = weights1_obj != NULL; /* All the position arrays should be 1-D*/ const int nxdims = PyArray_NDIM(x1_obj); const int nydims = PyArray_NDIM(y1_obj); const int nzdims = PyArray_NDIM(z1_obj); - + if(nxdims != 1 || nydims != 1 || nzdims != 1) { snprintf(msg, 1024, "ERROR: Expected 1-D numpy arrays.\nFound (nxdims, nydims, nzdims) = (%d, %d, %d) instead", nxdims, nydims, nzdims); countpairs_error_out(module, msg); return -1; } - + /* The weights array can be 1-D or 2-D of shape (n_weights, n_particles) */ const int n_weight_dims = check_weights ? PyArray_NDIM(weights1_obj) : 1; - + if(n_weight_dims != 1 && n_weight_dims != 2) { snprintf(msg, 1024, "ERROR: Expected 1-D or 2-D weight array.\nFound n_weight_dims = %d instead", n_weight_dims); countpairs_error_out(module, msg); @@ -854,7 +1004,7 @@ static int64_t check_dims_and_datatype(PyObject *module, PyArrayObject *x1_obj, countpairs_error_out(module, msg); return -1; } - + // Current version of the code only supports weights of the same dtype as positions if( x_type != y_type || y_type != z_type || (check_weights && z_type != weights_type)) { PyArray_Descr *x_descr = PyArray_DescrFromType(x_type); @@ -874,12 +1024,12 @@ static int64_t check_dims_and_datatype(PyObject *module, PyArrayObject *x1_obj, countpairs_error_out(module, msg); return -1; } - + /* Check if the number of elements in the 3 Python arrays are identical */ const int64_t nx1 = (int64_t)PyArray_SIZE(x1_obj); const int64_t ny1 = (int64_t)PyArray_SIZE(y1_obj); const int64_t nz1 = (int64_t)PyArray_SIZE(z1_obj); - + if(nx1 != ny1 || ny1 != nz1) { snprintf(msg, 1024, "ERROR: Expected arrays to have the same number of elements in all 3-dimensions.\nFound (nx, ny, nz) = (%"PRId64", %"PRId64", %"PRId64") instead", nx1, ny1, nz1); @@ -904,25 +1054,25 @@ static int64_t check_dims_and_datatype(PyObject *module, PyArrayObject *x1_obj, } else { *element_size = sizeof(double); } - + return nx1; } static int print_kwlist_into_msg(char *msg, const size_t totsize, size_t len, char *kwlist[], const size_t nitems) { for(size_t i=0;i= totsize-2) { return EXIT_FAILURE; } - + memcpy(msg+len, kwlist[i], strlen(kwlist[i])); len += strlen(kwlist[i]); msg[len] = ','; msg[len+1] = ' '; len += 2; } - + msg[len]='\0'; return EXIT_SUCCESS; } @@ -933,11 +1083,11 @@ static PyObject *countpairs_countpairs(PyObject *self, PyObject *args, PyObject //Error-handling is global in python2 -> stored in struct module_state _struct declared at the top of this file #if PY_MAJOR_VERSION < 3 (void) self; - PyObject *module = NULL;//should not be used -> setting to NULL so any attempts to dereference will result in a crash. + PyObject *module = NULL;//should not be used -> setting to NULL so any attempts to dereference will result in a crash. #else //In python3, self is simply the module object that was returned earlier by init PyObject *module = self; -#endif +#endif PyArrayObject *x1_obj=NULL, *y1_obj=NULL, *z1_obj=NULL, *weights1_obj=NULL; PyArrayObject *x2_obj=NULL, *y2_obj=NULL, *z2_obj=NULL, *weights2_obj=NULL; @@ -951,7 +1101,7 @@ static PyObject *countpairs_countpairs(PyObject *self, PyObject *args, PyObject options.periodic = 1; options.need_avg_sep = 0; options.c_api_timer = 0; - + int8_t xbin_ref=options.bin_refine_factors[0], ybin_ref=options.bin_refine_factors[1], zbin_ref=options.bin_refine_factors[2]; @@ -1004,7 +1154,7 @@ static PyObject *countpairs_countpairs(PyObject *self, PyObject *args, PyObject &weighting_method_str) ) { - + PyObject_Print(kwargs, stdout, 0); fprintf(stdout, "\n"); @@ -1017,7 +1167,7 @@ static PyObject *countpairs_countpairs(PyObject *self, PyObject *args, PyObject if(status != EXIT_SUCCESS) { fprintf(stderr,"Error message does not contain all of the keywords\n"); } - + countpairs_error_out(module,msg); Py_RETURN_NONE; } @@ -1036,16 +1186,16 @@ static PyObject *countpairs_countpairs(PyObject *self, PyObject *args, PyObject set_bin_refine_scheme(&options, BINNING_CUST);//custom binning -> code will honor requested binning scheme } - + /* We have numpy arrays and all the required inputs*/ /* How many data points are there? And are they all of floating point type */ size_t element_size; const int64_t ND1 = check_dims_and_datatype(module, x1_obj, y1_obj, z1_obj, weights1_obj, &element_size); if(ND1 == -1) { - //Error has already been set -> simply return + //Error has already been set -> simply return Py_RETURN_NONE; } - + /* Ensure the weights are of the right shape (n_weights, n_particles) */ if(weights1_obj != NULL){ // A numpy dimension of length -1 will be expanded to n_weights @@ -1053,7 +1203,7 @@ static PyObject *countpairs_countpairs(PyObject *self, PyObject *args, PyObject PyArray_Dims pdims = {.ptr = &(dims[0]), .len = 2}; weights1_obj = (PyArrayObject *) PyArray_Newshape(weights1_obj, &pdims, NPY_CORDER); } - + /* Validate the user's choice of weighting method */ weight_method_t weighting_method; int wstatus = get_weight_method_by_name(weighting_method_str, &weighting_method); @@ -1072,7 +1222,7 @@ static PyObject *countpairs_countpairs(PyObject *self, PyObject *args, PyObject countpairs_error_out(module, msg); Py_RETURN_NONE; } - + if(extra.weights0.num_weights > 0 && found_weights > MAX_NUM_WEIGHTS){ char msg[1024]; snprintf(msg, 1024, "ValueError: In %s: Provided %d weights-per-particle, but the code was compiled with MAX_NUM_WEIGHTS=%d.\n", @@ -1099,17 +1249,17 @@ static PyObject *countpairs_countpairs(PyObject *self, PyObject *args, PyObject size_t element_size2; ND2 = check_dims_and_datatype(module, x2_obj, y2_obj, z2_obj, weights2_obj, &element_size2); if(ND2 == -1) { - //Error has already been set -> simply return + //Error has already been set -> simply return Py_RETURN_NONE; } - + /* Ensure the weights are of the right shape (n_weights, n_particles) */ if(weights2_obj != NULL){ npy_intp dims[2] = {-1, ND2}; PyArray_Dims pdims = {.ptr = &(dims[0]), .len = 2}; weights2_obj = (PyArrayObject *) PyArray_Newshape(weights2_obj, &pdims, NPY_CORDER); } - + if(element_size != element_size2) { snprintf(msg, 1024, "TypeError: In %s: The two arrays must have the same data-type. First array is of type %s while second array is of type %s\n", __FUNCTION__, element_size == 4 ? "floats":"doubles", element_size2 == 4 ? "floats":"doubles"); @@ -1118,9 +1268,9 @@ static PyObject *countpairs_countpairs(PyObject *self, PyObject *args, PyObject } } - - /* - Interpret the input objects as numpy arrays (of whatever the input type the python object has). + + /* + Interpret the input objects as numpy arrays (of whatever the input type the python object has). NULL initialization is necessary since we might be calling XDECREF. The input objects can be converted into the required DOUBLE array. */ @@ -1132,7 +1282,7 @@ static PyObject *countpairs_countpairs(PyObject *self, PyObject *args, PyObject if(weights1_obj != NULL){ weights1_array = PyArray_FromArray(weights1_obj, NOTYPE_DESCR, requirements); } - + /* NULL initialization is necessary since we might be calling XDECREF*/ PyObject *x2_array = NULL, *y2_array = NULL, *z2_array = NULL, *weights2_array = NULL; if(autocorr == 0) { @@ -1143,14 +1293,14 @@ static PyObject *countpairs_countpairs(PyObject *self, PyObject *args, PyObject weights2_array = PyArray_FromArray(weights2_obj, NOTYPE_DESCR, requirements); } } - + if (x1_array == NULL || y1_array == NULL || z1_array == NULL || (autocorr==0 && (x2_array == NULL || y2_array == NULL || z2_array == NULL))) { Py_XDECREF(x1_array); Py_XDECREF(y1_array); Py_XDECREF(z1_array); Py_XDECREF(weights1_array); - + Py_XDECREF(x2_array); Py_XDECREF(y2_array); Py_XDECREF(z2_array); @@ -1165,7 +1315,7 @@ static PyObject *countpairs_countpairs(PyObject *self, PyObject *args, PyObject /* Get pointers to the data */ void *X1 = NULL, *Y1=NULL, *Z1=NULL, *weights1=NULL; - X1 = PyArray_DATA((PyArrayObject *) x1_array); + X1 = PyArray_DATA((PyArrayObject *) x1_array); Y1 = PyArray_DATA((PyArrayObject *) y1_array); Z1 = PyArray_DATA((PyArrayObject *) z1_array); if(weights1_array != NULL){ @@ -1181,7 +1331,7 @@ static PyObject *countpairs_countpairs(PyObject *self, PyObject *args, PyObject weights2 = PyArray_DATA((PyArrayObject *) weights2_array); } } - + /* Pack the weights into extra_options */ for(int64_t w = 0; w < extra.weights0.num_weights; w++){ extra.weights0.weights[w] = (char *) weights1 + w*ND1*element_size; @@ -1239,16 +1389,16 @@ static PyObject *countpairs_countpairs_rp_pi(PyObject *self, PyObject *args, PyO { #if PY_MAJOR_VERSION < 3 (void) self; - PyObject *module = NULL;//should not be used -> setting to NULL so any attempts to dereference will result in a crash. + PyObject *module = NULL;//should not be used -> setting to NULL so any attempts to dereference will result in a crash. #else //In python3, self is simply the module object that was returned earlier by init PyObject *module = self; -#endif +#endif PyArrayObject *x1_obj=NULL, *y1_obj=NULL, *z1_obj=NULL, *weights1_obj=NULL; PyArrayObject *x2_obj=NULL, *y2_obj=NULL, *z2_obj=NULL, *weights2_obj=NULL; int autocorr=0; int nthreads=4; - + double pimax; char *binfile, *weighting_method_str = NULL; struct config_options options = get_config_options(); @@ -1259,7 +1409,7 @@ static PyObject *countpairs_countpairs_rp_pi(PyObject *self, PyObject *args, PyO int8_t xbin_ref=options.bin_refine_factors[0], ybin_ref=options.bin_refine_factors[1], zbin_ref=options.bin_refine_factors[2]; - + static char *kwlist[] = { "autocorr", "nthreads", @@ -1320,7 +1470,7 @@ static PyObject *countpairs_countpairs_rp_pi(PyObject *self, PyObject *args, PyO if(status != EXIT_SUCCESS) { fprintf(stderr,"Error message does not contain all of the keywords\n"); } - + countpairs_error_out(module,msg); Py_RETURN_NONE; } @@ -1343,10 +1493,10 @@ static PyObject *countpairs_countpairs_rp_pi(PyObject *self, PyObject *args, PyO /* How many data points are there? And are they all of floating point type */ const int64_t ND1 = check_dims_and_datatype(module, x1_obj, y1_obj, z1_obj, weights1_obj, &element_size); if(ND1 == -1) { - //Error has already been set -> simply return + //Error has already been set -> simply return Py_RETURN_NONE; } - + /* Ensure the weights are of the right shape (n_weights, n_particles) */ if(weights1_obj != NULL){ // A numpy dimension of length -1 will be expanded to n_weights @@ -1354,7 +1504,7 @@ static PyObject *countpairs_countpairs_rp_pi(PyObject *self, PyObject *args, PyO PyArray_Dims pdims = {.ptr = &(dims[0]), .len = 2}; weights1_obj = (PyArrayObject *) PyArray_Newshape(weights1_obj, &pdims, NPY_CORDER); } - + /* Validate the user's choice of weighting method */ weight_method_t weighting_method; int wstatus = get_weight_method_by_name(weighting_method_str, &weighting_method); @@ -1373,7 +1523,7 @@ static PyObject *countpairs_countpairs_rp_pi(PyObject *self, PyObject *args, PyO countpairs_error_out(module, msg); Py_RETURN_NONE; } - + if(extra.weights0.num_weights > 0 && found_weights > MAX_NUM_WEIGHTS){ char msg[1024]; snprintf(msg, 1024, "ValueError: In %s: Provided %d weights-per-particle, but the code was compiled with MAX_NUM_WEIGHTS=%d.\n", @@ -1401,7 +1551,7 @@ static PyObject *countpairs_countpairs_rp_pi(PyObject *self, PyObject *args, PyO size_t element_size2; ND2 = check_dims_and_datatype(module, x2_obj, y2_obj, z2_obj, weights2_obj, &element_size2); if(ND2 == -1) { - //Error has already been set -> simply return + //Error has already been set -> simply return Py_RETURN_NONE; } /* Ensure the weights are of the right shape (n_weights, n_particles) */ @@ -1417,8 +1567,8 @@ static PyObject *countpairs_countpairs_rp_pi(PyObject *self, PyObject *args, PyO countpairs_error_out(module, msg); Py_RETURN_NONE; } - } - + } + /* Interpret the input objects as numpy arrays. */ const int requirements = NPY_ARRAY_IN_ARRAY; PyObject *x1_array = NULL, *y1_array = NULL, *z1_array = NULL, *weights1_array = NULL; @@ -1429,7 +1579,7 @@ static PyObject *countpairs_countpairs_rp_pi(PyObject *self, PyObject *args, PyO if(weights1_obj != NULL){ weights1_array = PyArray_FromArray(weights1_obj, NOTYPE_DESCR, requirements); } - + if(autocorr == 0) { x2_array = PyArray_FromArray(x2_obj, NOTYPE_DESCR, requirements); y2_array = PyArray_FromArray(y2_obj, NOTYPE_DESCR, requirements); @@ -1461,7 +1611,7 @@ static PyObject *countpairs_countpairs_rp_pi(PyObject *self, PyObject *args, PyO /* Get pointers to the data as C-types. */ void *X1 = NULL, *Y1 = NULL, *Z1 = NULL, *weights1=NULL; void *X2 = NULL, *Y2 = NULL, *Z2 = NULL, *weights2=NULL; - X1 = PyArray_DATA((PyArrayObject *) x1_array); + X1 = PyArray_DATA((PyArrayObject *) x1_array); Y1 = PyArray_DATA((PyArrayObject *) y1_array); Z1 = PyArray_DATA((PyArrayObject *) z1_array); if(weights1_array != NULL){ @@ -1487,7 +1637,7 @@ static PyObject *countpairs_countpairs_rp_pi(PyObject *self, PyObject *args, PyO NPY_BEGIN_THREADS_DEF; NPY_BEGIN_THREADS; - + options.float_type = element_size; results_countpairs_rp_pi results; double c_api_time = 0.0; @@ -1504,7 +1654,7 @@ static PyObject *countpairs_countpairs_rp_pi(PyObject *self, PyObject *args, PyO c_api_time = options.c_api_time; } NPY_END_THREADS; - + /* Clean up. */ Py_DECREF(x1_array);Py_DECREF(y1_array);Py_DECREF(z1_array);Py_XDECREF(weights1_array);//x1 should absolutely not be NULL Py_XDECREF(x2_array);Py_XDECREF(y2_array);Py_XDECREF(z2_array);Py_XDECREF(weights2_array);//x2 might be NULL depending on value of autocorr @@ -1531,7 +1681,7 @@ static PyObject *countpairs_countpairs_rp_pi(PyObject *self, PyObject *args, PyO rlow=results.rupp[i]; } free_results_rp_pi(&results); - + return Py_BuildValue("(Od)", ret, c_api_time); } @@ -1539,11 +1689,11 @@ static PyObject *countpairs_countpairs_wp(PyObject *self, PyObject *args, PyObje { #if PY_MAJOR_VERSION < 3 (void) self;//to suppress the unused variable warning. Terrible hack - PyObject *module = NULL;//need not be used -> setting to NULL so any attempts to dereference will result in a crash. + PyObject *module = NULL;//need not be used -> setting to NULL so any attempts to dereference will result in a crash. #else //In python3, self is simply the module object that was returned earlier by init PyObject *module = self; -#endif +#endif PyArrayObject *x1_obj=NULL, *y1_obj=NULL, *z1_obj=NULL, *weights1_obj=NULL; double boxsize,pimax; int nthreads=1; @@ -1560,7 +1710,7 @@ static PyObject *countpairs_countpairs_wp(PyObject *self, PyObject *args, PyObje int8_t xbin_ref=options.bin_refine_factors[0], ybin_ref=options.bin_refine_factors[1], zbin_ref=options.bin_refine_factors[2]; - + static char *kwlist[] = { "boxsize", "pimax", @@ -1582,7 +1732,7 @@ static PyObject *countpairs_countpairs_wp(PyObject *self, PyObject *args, PyObje "isa",/* instruction set to use of type enum isa; valid values are AVX, SSE, FALLBACK */ NULL }; - + if( ! PyArg_ParseTupleAndKeywords(args, kwargs, "ddisO!O!O!|O!sbbbbbhbbi", kwlist, &boxsize,&pimax,&nthreads,&binfile, &PyArray_Type,&x1_obj, @@ -1597,7 +1747,7 @@ static PyObject *countpairs_countpairs_wp(PyObject *self, PyObject *args, PyObje &(options.c_api_timer), &(options.c_cell_timer), &(options.instruction_set)) - + ){ PyObject_Print(kwargs, stdout, 0); fprintf(stdout, "\n"); @@ -1611,7 +1761,7 @@ static PyObject *countpairs_countpairs_wp(PyObject *self, PyObject *args, PyObje if(status != EXIT_SUCCESS) { fprintf(stderr,"Error message does not contain all of the keywords\n"); } - + countpairs_error_out(module,msg); Py_RETURN_NONE; } @@ -1630,14 +1780,14 @@ static PyObject *countpairs_countpairs_wp(PyObject *self, PyObject *args, PyObje options.bin_refine_factors[2] = zbin_ref; set_bin_refine_scheme(&options, BINNING_CUST);//custom binning -> code will honor requested binning scheme } - + /* How many data points are there? And are they all of floating point type */ const int64_t ND1 = check_dims_and_datatype(module, x1_obj, y1_obj, z1_obj, weights1_obj, &element_size); if(ND1 == -1) { - //Error has already been set -> simply return + //Error has already been set -> simply return Py_RETURN_NONE; } - + /* Ensure the weights are of the right shape (n_weights, n_particles) */ if(weights1_obj != NULL){ // A numpy dimension of length -1 will be expanded to n_weights @@ -1645,7 +1795,7 @@ static PyObject *countpairs_countpairs_wp(PyObject *self, PyObject *args, PyObje PyArray_Dims pdims = {.ptr = &(dims[0]), .len = 2}; weights1_obj = (PyArrayObject *) PyArray_Newshape(weights1_obj, &pdims, NPY_CORDER); } - + /* Validate the user's choice of weighting method */ weight_method_t weighting_method; int wstatus = get_weight_method_by_name(weighting_method_str, &weighting_method); @@ -1664,7 +1814,7 @@ static PyObject *countpairs_countpairs_wp(PyObject *self, PyObject *args, PyObje countpairs_error_out(module, msg); Py_RETURN_NONE; } - + if(extra.weights0.num_weights > 0 && found_weights > MAX_NUM_WEIGHTS){ char msg[1024]; snprintf(msg, 1024, "ValueError: In %s: Provided %d weights-per-particle, but the code was compiled with MAX_NUM_WEIGHTS=%d.\n", @@ -1672,7 +1822,7 @@ static PyObject *countpairs_countpairs_wp(PyObject *self, PyObject *args, PyObje countpairs_error_out(module, msg); Py_RETURN_NONE; } - + /* Interpret the input objects as numpy arrays. */ const int requirements = NPY_ARRAY_IN_ARRAY; PyObject *x1_array = NULL, *y1_array = NULL, *z1_array = NULL, *weights1_array = NULL; @@ -1683,7 +1833,7 @@ static PyObject *countpairs_countpairs_wp(PyObject *self, PyObject *args, PyObje weights1_array = PyArray_FromArray(weights1_obj, NOTYPE_DESCR, requirements); } - + if (x1_array == NULL || y1_array == NULL || z1_array == NULL) { Py_XDECREF(x1_array); Py_XDECREF(y1_array); @@ -1706,7 +1856,7 @@ static PyObject *countpairs_countpairs_wp(PyObject *self, PyObject *args, PyObje if(weights1_array != NULL){ weights1 = PyArray_DATA((PyArrayObject *) weights1_array); } - + /* Pack the weights into extra_options */ for(int64_t w = 0; w < extra.weights0.num_weights; w++){ extra.weights0.weights[w] = (char *) weights1 + w*ND1*element_size; @@ -1715,7 +1865,7 @@ static PyObject *countpairs_countpairs_wp(PyObject *self, PyObject *args, PyObje NPY_BEGIN_THREADS_DEF; NPY_BEGIN_THREADS; - + results_countpairs_wp results; options.float_type = element_size; double c_api_time = 0.0; @@ -1738,7 +1888,7 @@ static PyObject *countpairs_countpairs_wp(PyObject *self, PyObject *args, PyObje if(status != EXIT_SUCCESS) { Py_RETURN_NONE; } - + #if 0 for(int i=1;i setting to NULL so any attempts to dereference will result in a crash. + PyObject *module = NULL;//should not be used -> setting to NULL so any attempts to dereference will result in a crash. #else //In python3, self is simply the module object that was returned earlier by init PyObject *module = self; -#endif +#endif PyArrayObject *x1_obj, *y1_obj, *z1_obj, *weights1_obj = NULL; double boxsize; @@ -1818,7 +1968,7 @@ static PyObject *countpairs_countpairs_xi(PyObject *self, PyObject *args, PyObje NULL }; - + if( ! PyArg_ParseTupleAndKeywords(args, kwargs, "disO!O!O!|O!sbbbbbhbi", kwlist, &boxsize,&nthreads,&binfile, &PyArray_Type,&x1_obj, @@ -1836,7 +1986,7 @@ static PyObject *countpairs_countpairs_xi(PyObject *self, PyObject *args, PyObje PyObject_Print(kwargs, stdout, 0); fprintf(stdout, "\n"); - + char msg[1024]; int len=snprintf(msg, 1024,"ArgumentError: In xi> Could not parse the arguments. Input parameters are: \n"); @@ -1846,7 +1996,7 @@ static PyObject *countpairs_countpairs_xi(PyObject *self, PyObject *args, PyObje if(status != EXIT_SUCCESS) { fprintf(stderr,"Error message does not contain all of the keywords\n"); } - + countpairs_error_out(module,msg); Py_RETURN_NONE; } @@ -1869,10 +2019,10 @@ static PyObject *countpairs_countpairs_xi(PyObject *self, PyObject *args, PyObje size_t element_size; const int64_t ND1 = check_dims_and_datatype(module, x1_obj, y1_obj, z1_obj, weights1_obj, &element_size); if(ND1 == -1) { - //Error has already been set -> simply return + //Error has already been set -> simply return Py_RETURN_NONE; } - + /* Ensure the weights are of the right shape (n_weights, n_particles) */ if(weights1_obj != NULL){ // A numpy dimension of length -1 will be expanded to n_weights @@ -1880,7 +2030,7 @@ static PyObject *countpairs_countpairs_xi(PyObject *self, PyObject *args, PyObje PyArray_Dims pdims = {.ptr = &(dims[0]), .len = 2}; weights1_obj = (PyArrayObject *) PyArray_Newshape(weights1_obj, &pdims, NPY_CORDER); } - + /* Validate the user's choice of weighting method */ weight_method_t weighting_method; int wstatus = get_weight_method_by_name(weighting_method_str, &weighting_method); @@ -1899,7 +2049,7 @@ static PyObject *countpairs_countpairs_xi(PyObject *self, PyObject *args, PyObje countpairs_error_out(module, msg); Py_RETURN_NONE; } - + if(extra.weights0.num_weights > 0 && found_weights > MAX_NUM_WEIGHTS){ char msg[1024]; snprintf(msg, 1024, "ValueError: In %s: Provided %d weights-per-particle, but the code was compiled with MAX_NUM_WEIGHTS=%d.\n", @@ -1907,7 +2057,7 @@ static PyObject *countpairs_countpairs_xi(PyObject *self, PyObject *args, PyObje countpairs_error_out(module, msg); Py_RETURN_NONE; } - + /* Interpret the input objects as numpy arrays. */ const int requirements = NPY_ARRAY_IN_ARRAY; PyObject *x1_array = NULL, *y1_array = NULL, *z1_array = NULL, *weights1_array = NULL; @@ -1938,12 +2088,12 @@ static PyObject *countpairs_countpairs_xi(PyObject *self, PyObject *args, PyObje if(weights1_array != NULL){ weights1 = PyArray_DATA((PyArrayObject *) weights1_array); } - + /* Pack the weights into extra_options */ for(int64_t w = 0; w < extra.weights0.num_weights; w++){ extra.weights0.weights[w] = (char *) weights1 + w*ND1*element_size; } - + NPY_BEGIN_THREADS_DEF; NPY_BEGIN_THREADS; @@ -1994,15 +2144,320 @@ static PyObject *countpairs_countpairs_xi(PyObject *self, PyObject *args, PyObje return Py_BuildValue("(Od)", ret, c_api_time); } + +static PyObject *countpairs_countpairs_s_mu(PyObject *self, PyObject *args, PyObject *kwargs) +{ +#if PY_MAJOR_VERSION < 3 + (void) self; + PyObject *module = NULL;//should not be used -> setting to NULL so any attempts to dereference will result in a crash. +#else + //In python3, self is simply the module object that was returned earlier by init + PyObject *module = self; +#endif + PyArrayObject *x1_obj=NULL, *y1_obj=NULL, *z1_obj=NULL, *weights1_obj=NULL; + PyArrayObject *x2_obj=NULL, *y2_obj=NULL, *z2_obj=NULL, *weights2_obj=NULL; + int autocorr=0; + int nthreads=4; + + double mu_max; + int nmu_bins; + char *binfile, *weighting_method_str = NULL; + struct config_options options = get_config_options(); + options.verbose = 0; + options.instruction_set = -1; + options.periodic = 1; + options.c_api_timer = 0; + int8_t xbin_ref=options.bin_refine_factors[0], + ybin_ref=options.bin_refine_factors[1], + zbin_ref=options.bin_refine_factors[2]; + + static char *kwlist[] = { + "autocorr", + "nthreads", + "binfile", + "mu_max", + "nmu_bins", + "X1", + "Y1", + "Z1", + "weights1", + "X2", + "Y2", + "Z2", + "weights2", + "periodic", + "verbose", /* keyword verbose -> print extra info at runtime + progressbar */ + "boxsize", + "output_savg", + "xbin_refine_factor", + "ybin_refine_factor", + "zbin_refine_factor", + "max_cells_per_dim", + "c_api_timer", + "isa",/* instruction set to use of type enum isa; valid values are AVX, SSE, FALLBACK */ + "weight_type", + NULL + }; + + if ( ! PyArg_ParseTupleAndKeywords(args, kwargs, "iisdiO!O!O!|O!O!O!O!O!bbdbbbbhbis", kwlist, + &autocorr,&nthreads,&binfile, &mu_max, &nmu_bins, + &PyArray_Type,&x1_obj, + &PyArray_Type,&y1_obj, + &PyArray_Type,&z1_obj, + &PyArray_Type,&weights1_obj, + &PyArray_Type,&x2_obj, + &PyArray_Type,&y2_obj, + &PyArray_Type,&z2_obj, + &PyArray_Type,&weights2_obj, + &(options.periodic), + &(options.verbose), + &(options.boxsize), + &(options.need_avg_sep), + &xbin_ref, &ybin_ref, &zbin_ref, + &(options.max_cells_per_dim), + &(options.c_api_timer), + &(options.instruction_set), + &weighting_method_str) + + ) { + PyObject_Print(kwargs, stdout, 0); + fprintf(stdout, "\n"); + + char msg[1024]; + int len=snprintf(msg, 1024,"ArgumentError: In DDsmu> Could not parse the arguments. Input parameters are: \n"); + + /* How many keywords do we have? Subtract 1 because of the last NULL */ + const size_t nitems = sizeof(kwlist)/sizeof(*kwlist) - 1; + int status = print_kwlist_into_msg(msg, 1024, len, kwlist, nitems); + if(status != EXIT_SUCCESS) { + fprintf(stderr,"Error message does not contain all of the keywords\n"); + } + + countpairs_error_out(module,msg); + Py_RETURN_NONE; + } + options.autocorr=autocorr; + /*This is for the fastest isa */ + if(options.instruction_set == -1) { + options.instruction_set = highest_isa; + } + + if(xbin_ref != options.bin_refine_factors[0] || + ybin_ref != options.bin_refine_factors[1] || + zbin_ref != options.bin_refine_factors[2]) { + options.bin_refine_factors[0] = xbin_ref; + options.bin_refine_factors[1] = ybin_ref; + options.bin_refine_factors[2] = zbin_ref; + set_bin_refine_scheme(&options, BINNING_CUST);//custom binning -> code will honor requested binning scheme + } + + size_t element_size; + /* How many data points are there? And are they all of floating point type */ + const int64_t ND1 = check_dims_and_datatype(module, x1_obj, y1_obj, z1_obj, weights1_obj, &element_size); + if(ND1 == -1) { + //Error has already been set -> simply return + Py_RETURN_NONE; + } + + /* Ensure the weights are of the right shape (n_weights, n_particles) */ + if(weights1_obj != NULL){ + // A numpy dimension of length -1 will be expanded to n_weights + npy_intp dims[2] = {-1, ND1}; + PyArray_Dims pdims = {.ptr = &(dims[0]), .len = 2}; + weights1_obj = (PyArrayObject *) PyArray_Newshape(weights1_obj, &pdims, NPY_CORDER); + } + + /* Validate the user's choice of weighting method */ + weight_method_t weighting_method; + int wstatus = get_weight_method_by_name(weighting_method_str, &weighting_method); + if(wstatus != EXIT_SUCCESS){ + char msg[1024]; + snprintf(msg, 1024, "ValueError: In %s: unknown weight_type %s!", __FUNCTION__, weighting_method_str); + countpairs_error_out(module, msg); + Py_RETURN_NONE; + } + int found_weights = weights1_obj == NULL ? 0 : PyArray_SHAPE(weights1_obj)[0]; + struct extra_options extra = get_extra_options(weighting_method); + if(extra.weights0.num_weights > 0 && extra.weights0.num_weights != found_weights){ + char msg[1024]; + snprintf(msg, 1024, "ValueError: In %s: specified weighting method %s which requires %"PRId64" weight(s)-per-particle, but found %d weight(s) instead!\n", + __FUNCTION__, weighting_method_str, extra.weights0.num_weights, found_weights); + countpairs_error_out(module, msg); + Py_RETURN_NONE; + } + + if(extra.weights0.num_weights > 0 && found_weights > MAX_NUM_WEIGHTS){ + char msg[1024]; + snprintf(msg, 1024, "ValueError: In %s: Provided %d weights-per-particle, but the code was compiled with MAX_NUM_WEIGHTS=%d.\n", + __FUNCTION__, found_weights, MAX_NUM_WEIGHTS); + countpairs_error_out(module, msg); + Py_RETURN_NONE; + } + + int64_t ND2=ND1; + if(autocorr == 0) { + char msg[1024]; + if(x2_obj == NULL || y2_obj == NULL || z2_obj == NULL) { + snprintf(msg, 1024, "ValueError: In %s: If autocorr is 0, need to pass the second set of positions (X2=numpy array, Y2=numpy array, Z2=numpy array).\n", + __FUNCTION__); + countpairs_error_out(module, msg); + Py_RETURN_NONE; + } + if((weights1_obj == NULL) != (weights2_obj == NULL)){ + snprintf(msg, 1024, "ValueError: In %s: If autocorr is 0, must pass either zero or two sets of weights.\n", + __FUNCTION__); + countpairs_error_out(module, msg); + Py_RETURN_NONE; + } + + size_t element_size2; + ND2 = check_dims_and_datatype(module, x2_obj, y2_obj, z2_obj, weights2_obj, &element_size2); + if(ND2 == -1) { + //Error has already been set -> simply return + Py_RETURN_NONE; + } + /* Ensure the weights are of the right shape (n_weights, n_particles) */ + if(weights2_obj != NULL){ + npy_intp dims[2] = {-1, ND2}; + PyArray_Dims pdims = {.ptr = &(dims[0]), .len = 2}; + weights2_obj = (PyArrayObject *) PyArray_Newshape(weights2_obj, &pdims, NPY_CORDER); + } + + if(element_size != element_size2) { + snprintf(msg, 1024, "TypeError: In %s: The two arrays must have the same data-type. First array is of type %s while second array is of type %s\n", + __FUNCTION__, element_size == 4 ? "floats":"doubles", element_size2 == 4 ? "floats":"doubles"); + countpairs_error_out(module, msg); + Py_RETURN_NONE; + } + } + + /* Interpret the input objects as numpy arrays. */ + const int requirements = NPY_ARRAY_IN_ARRAY; + PyObject *x1_array = NULL, *y1_array = NULL, *z1_array = NULL, *weights1_array = NULL; + PyObject *x2_array = NULL, *y2_array = NULL, *z2_array = NULL, *weights2_array = NULL; + x1_array = PyArray_FromArray(x1_obj, NOTYPE_DESCR, requirements); + y1_array = PyArray_FromArray(y1_obj, NOTYPE_DESCR, requirements); + z1_array = PyArray_FromArray(z1_obj, NOTYPE_DESCR, requirements); + if(weights1_obj != NULL){ + weights1_array = PyArray_FromArray(weights1_obj, NOTYPE_DESCR, requirements); + } + + if(autocorr == 0) { + x2_array = PyArray_FromArray(x2_obj, NOTYPE_DESCR, requirements); + y2_array = PyArray_FromArray(y2_obj, NOTYPE_DESCR, requirements); + z2_array = PyArray_FromArray(z2_obj, NOTYPE_DESCR, requirements); + if(weights2_obj != NULL){ + weights2_array = PyArray_FromArray(weights2_obj, NOTYPE_DESCR, requirements); + } + } + + if (x1_array == NULL || y1_array == NULL || z1_array == NULL || + (autocorr == 0 && (x2_array == NULL || y2_array == NULL || z2_array == NULL))) { + Py_XDECREF(x1_array); + Py_XDECREF(y1_array); + Py_XDECREF(z1_array); + Py_XDECREF(weights1_array); + + Py_XDECREF(x2_array); + Py_XDECREF(y2_array); + Py_XDECREF(z2_array); + Py_XDECREF(weights2_array); + char msg[1024]; + snprintf(msg, 1024, "TypeError: In %s: Could not convert input to arrays of allowed floating point types (doubles or floats). Are you passing numpy arrays?", + __FUNCTION__); + countpairs_error_out(module, msg); + Py_RETURN_NONE; + } + + + /* Get pointers to the data as C-types. */ + void *X1 = NULL, *Y1 = NULL, *Z1 = NULL, *weights1=NULL; + void *X2 = NULL, *Y2 = NULL, *Z2 = NULL, *weights2=NULL; + X1 = PyArray_DATA((PyArrayObject *) x1_array); + Y1 = PyArray_DATA((PyArrayObject *) y1_array); + Z1 = PyArray_DATA((PyArrayObject *) z1_array); + if(weights1_array != NULL){ + weights1 = PyArray_DATA((PyArrayObject *) weights1_array); + } + + if(autocorr == 0) { + X2 = PyArray_DATA((PyArrayObject *) x2_array); + Y2 = PyArray_DATA((PyArrayObject *) y2_array); + Z2 = PyArray_DATA((PyArrayObject *) z2_array); + if(weights2_array != NULL){ + weights2 = PyArray_DATA((PyArrayObject *) weights2_array); + } + } + + /* Pack the weights into extra_options */ + for(int64_t w = 0; w < extra.weights0.num_weights; w++){ + extra.weights0.weights[w] = (char *) weights1 + w*ND1*element_size; + if(autocorr == 0){ + extra.weights1.weights[w] = (char *) weights2 + w*ND2*element_size; + } + } + + NPY_BEGIN_THREADS_DEF; + NPY_BEGIN_THREADS; + + options.float_type = element_size; + results_countpairs_s_mu results; + double c_api_time = 0.0; + int status = countpairs_s_mu(ND1,X1,Y1,Z1, + ND2,X2,Y2,Z2, + nthreads, + autocorr, + binfile, + mu_max, + nmu_bins, + &results, + &options, + &extra); + if(options.c_api_timer) { + c_api_time = options.c_api_time; + } + NPY_END_THREADS; + + /* Clean up. */ + Py_DECREF(x1_array);Py_DECREF(y1_array);Py_DECREF(z1_array);Py_XDECREF(weights1_array);//x1 should absolutely not be NULL + Py_XDECREF(x2_array);Py_XDECREF(y2_array);Py_XDECREF(z2_array);Py_XDECREF(weights2_array);//x2 might be NULL depending on value of autocorr + if(status != EXIT_SUCCESS) { + Py_RETURN_NONE; + } + + + /* Build the output list */ + PyObject *ret = PyList_New(0);//create an empty list + double smin=results.supp[0]; + const double dmu = mu_max/(double)nmu_bins;//mu_min is assumed to be 0.0 + for(int i=1;i setting to NULL so any attempts to dereference will result in a crash. + PyObject *module = NULL;//should not be used -> setting to NULL so any attempts to dereference will result in a crash. #else //In python3, self is simply the module object that was returned earlier by init PyObject *module = self; -#endif +#endif PyArrayObject *x1_obj=NULL, *y1_obj=NULL, *z1_obj=NULL; double rmax; @@ -2018,11 +2473,11 @@ static PyObject *countpairs_countspheres_vpf(PyObject *self, PyObject *args, PyO /* Reset the bin refine factors default (since the VPF is symmetric in XYZ, conceptually the binning should be identical in all three directions)*/ int bin_ref[] = {1,1,1}; set_bin_refine_factors(&options, bin_ref); - + int8_t xbin_ref=options.bin_refine_factors[0], ybin_ref=options.bin_refine_factors[1], zbin_ref=options.bin_refine_factors[2]; - + static char *kwlist[] = { "rmax", "nbins", @@ -2062,17 +2517,17 @@ static PyObject *countpairs_countspheres_vpf(PyObject *self, PyObject *args, PyO PyObject_Print(kwargs, stdout, 0); fprintf(stdout, "\n"); - + char msg[1024]; int len=snprintf(msg, 1024,"ArgumentError: In vpf> Could not parse the arguments. Input parameters are: \n"); - + /* How many keywords do we have? Subtract 1 because of the last NULL */ const size_t nitems = sizeof(kwlist)/sizeof(*kwlist) - 1; int status = print_kwlist_into_msg(msg, 1024, len, kwlist, nitems); if(status != EXIT_SUCCESS) { fprintf(stderr,"Error message does not contain all of the keywords\n"); } - + countpairs_error_out(module,msg); Py_RETURN_NONE; } @@ -2088,12 +2543,12 @@ static PyObject *countpairs_countspheres_vpf(PyObject *self, PyObject *args, PyO options.bin_refine_factors[2] = zbin_ref; set_bin_refine_scheme(&options, BINNING_CUST);//custom binning -> code will honor requested binning scheme } - + /* How many data points are there? And are they all of floating point type */ size_t element_size; const int64_t ND1 = check_dims_and_datatype(module, x1_obj, y1_obj, z1_obj, NULL, &element_size); if(ND1 == -1) { - //Error has already been set -> simply return + //Error has already been set -> simply return Py_RETURN_NONE; } diff --git a/theory/python_bindings/call_correlation_functions.py b/theory/python_bindings/call_correlation_functions.py index f9e78fb6..1ceb5f46 100644 --- a/theory/python_bindings/call_correlation_functions.py +++ b/theory/python_bindings/call_correlation_functions.py @@ -26,7 +26,8 @@ countpairs_rp_pi as DDrppi,\ countpairs_wp as wp,\ countpairs_xi as xi,\ - countspheres_vpf as vpf + countspheres_vpf as vpf,\ + countpairs_s_mu as DDsmu def read_text_file(filename, encoding="utf-8"): @@ -273,6 +274,35 @@ def main(): .format(items[0], items[1], items[2], items[3], items[4], items[5])) print("-------------------------------------------------------------------------") + mu_max = 0.5 + nmu_bins = 10 + + print("\nRunning 2-D correlation function DD(s,mu)") + results_DDsmu, _ = DDsmu(autocorr=autocorr, + nthreads=nthreads, + binfile=binfile, + mu_max=mu_max, + nmu_bins=nmu_bins, + X1=x, + Y1=y, + Z1=z, + weights1=np.ones_like(x), + weight_type='pair_product', + verbose=True, + periodic=periodic, + boxsize=boxsize, + output_savg=True) + print("\n# ****** DD(s,mu): first {0} bins ******* " + .format(numbins_to_print)) + print("# smin smax savg mu_max npairs weightavg") + print("########################################################################") + for ibin in range(numbins_to_print): + items = results_DDsmu[ibin] + print("{0:12.4f} {1:12.4f} {2:10.4f} {3:10.1f} {4:10d} {5:10.4f}" + .format(items[0], items[1], items[2], items[3], items[4], items[5])) + print("------------------------------------------------------------------------") + + print("\nRunning 2-D projected correlation function wp(rp)") results_wp, _, _ = wp(boxsize=boxsize, pimax=pimax, nthreads=nthreads, diff --git a/theory/tests/Makefile b/theory/tests/Makefile index a4a24546..41e7d2d1 100644 --- a/theory/tests/Makefile +++ b/theory/tests/Makefile @@ -8,12 +8,14 @@ IO_DIR := $(ROOT_DIR)/io THEORY_DIR := $(ROOT_DIR)/theory DD_DIR := $(THEORY_DIR)/DD DDrppi_DIR := $(THEORY_DIR)/DDrppi +DDsmu_DIR := $(THEORY_DIR)/DDsmu WP_DIR := $(THEORY_DIR)/wp XI_DIR := $(THEORY_DIR)/xi VPF_DIR := $(THEORY_DIR)/vpf DD_LIB := countpairs DDrppi_LIB := countpairs_rp_pi +DDsmu_LIB := countpairs_s_mu WP_LIB := countpairs_wp XI_LIB := countpairs_xi VPF_LIB := countspheres @@ -29,10 +31,10 @@ endif SRC1 := test_periodic.c $(IO_DIR)/io.c $(IO_DIR)/ftread.c $(UTILS_DIR)/utils.c OBJS1 := $(SRC1:.c=.o) -C_LIBRARIES := $(DD_DIR)/lib$(DD_LIB).a $(DDrppi_DIR)/lib$(DDrppi_LIB).a $(WP_DIR)/lib$(WP_LIB).a \ +C_LIBRARIES := $(DD_DIR)/lib$(DD_LIB).a $(DDrppi_DIR)/lib$(DDrppi_LIB).a $(DDsmu_DIR)/lib$(DDsmu_LIB).a $(WP_DIR)/lib$(WP_LIB).a \ $(XI_DIR)/lib$(XI_LIB).a $(VPF_DIR)/lib$(VPF_LIB).a -INCL := $(IO_DIR)/io.h $(IO_DIR)/ftread.h $(UTILS_DIR)/utils.h \ - $(DD_DIR)/$(DD_LIB).h $(DDrppi_DIR)/$(DDrppi_LIB).h $(WP_DIR)/$(WP_LIB).h \ +INCL := $(IO_DIR)/io.h $(IO_DIR)/ftread.h $(UTILS_DIR)/utils.h $(UTILS_DIR)/tests_common.h \ + $(DD_DIR)/$(DD_LIB).h $(DDrppi_DIR)/$(DDrppi_LIB).h $(DDsmu_DIR)/$(DDsmu_LIB).h $(WP_DIR)/$(WP_LIB).h \ $(XI_DIR)/$(XI_LIB).h $(VPF_DIR)/$(VPF_LIB).h SRC2 := test_nonperiodic.c $(UTILS_DIR)/utils.c $(IO_DIR)/io.c $(IO_DIR)/ftread.c @@ -40,10 +42,10 @@ OBJS2 := $(SRC2:.c=.o) all: tests $(TARGETS) $(INCL) uncompress $(ROOT_DIR)/theory.options $(ROOT_DIR)/common.mk Makefile -test_periodic: $(OBJS1) $(C_LIBRARIES) $(INCL) $(ROOT_DIR)/theory.options $(ROOT_DIR)/common.mk Makefile +test_periodic: $(OBJS1) $(C_LIBRARIES) $(INCL) $(ROOT_DIR)/theory.options $(ROOT_DIR)/common.mk Makefile $(CC) $(OBJS1) $(C_LIBRARIES) $(GSL_LINK) $(CLINK) -o $@ -test_nonperiodic: $(OBJS2) $(C_LIBRARIES) $(INCL) $(ROOT_DIR)/theory.options $(ROOT_DIR)/common.mk Makefile +test_nonperiodic: $(OBJS2) $(C_LIBRARIES) $(INCL) $(ROOT_DIR)/theory.options $(ROOT_DIR)/common.mk Makefile $(CC) $(OBJS2) $(C_LIBRARIES) $(CLINK) -o $@ %.o: %.c $(INCL) $(ROOT_DIR)/theory.options $(ROOT_DIR)/common.mk Makefile @@ -55,6 +57,9 @@ $(DD_DIR)/lib$(DD_LIB).a: $(DD_DIR)/*.c $(DD_DIR)/*.c.src $(DD_DIR)/*.h.src $(RO $(DDrppi_DIR)/lib$(DDrppi_LIB).a: $(DDrppi_DIR)/*.c $(DDrppi_DIR)/*.c.src $(DDrppi_DIR)/*.h.src $(ROOT_DIR)/theory.options $(ROOT_DIR)/common.mk $(MAKE) -C $(DDrppi_DIR) libs +$(DDsmu_DIR)/lib$(DDsmu_LIB).a: $(DDsmu_DIR)/*.c $(DDsmu_DIR)/*.c.src $(DDsmu_DIR)/*.h.src $(ROOT_DIR)/theory.options $(ROOT_DIR)/common.mk + $(MAKE) -C $(DDsmu_DIR) libs + $(WP_DIR)/lib$(WP_LIB).a: $(WP_DIR)/*.c $(WP_DIR)/*.c.src $(WP_DIR)/*.h.src $(ROOT_DIR)/theory.options $(ROOT_DIR)/common.mk $(MAKE) -C $(WP_DIR) libs @@ -65,9 +70,9 @@ $(VPF_DIR)/lib$(VPF_LIB).a: $(VPF_DIR)/*.c $(VPF_DIR)/*.c.src $(VPF_DIR)/*.h.src $(MAKE) -C $(VPF_DIR) libs python_lib: tests $(OBJS1) $(INCL) $(ROOT_DIR)/theory.options $(ROOT_DIR)/common.mk Makefile | $(ROOT_DIR)/lib - @echo + @echo @echo "All THEORY tests are done. Now checking that the C extensions work." - @echo + @echo $(MAKE) -C ../python_bindings tests tests: test_periodic test_nonperiodic @@ -86,8 +91,12 @@ wp: test_periodic ./test_periodic 2 DDrppi: test_periodic test_nonperiodic - ./test_periodic 0 5 6 7 - ./test_nonperiodic 1 2 + ./test_periodic 0 6 7 8 + ./test_nonperiodic 1 3 + +DDsmu: test_periodic test_nonperiodic + ./test_periodic 5 + ./test_nonperiodic 2 DD: test_periodic test_nonperiodic ./test_periodic 1 @@ -99,8 +108,9 @@ vpf: test_periodic xi: test_periodic ./test_periodic 4 +.PHONY: celna clena celan clean + +celna clena celan: clean clean: $(RM) $(targets) $(OBJS1) $(OBJS2) $(RM) -R *.dSYM - - diff --git a/theory/tests/Mr19_DDsmu_nonperiodic b/theory/tests/Mr19_DDsmu_nonperiodic new file mode 100644 index 00000000..39f5c453 --- /dev/null +++ b/theory/tests/Mr19_DDsmu_nonperiodic @@ -0,0 +1,140 @@ + 16692 0.20339722 -0.62204752 0.05000000 0.24966241 + 16470 0.20331567 -0.62204752 0.10000000 0.24710486 + 16016 0.20372512 -0.62204752 0.15000000 0.24565266 + 16050 0.20363876 -0.62204752 0.20000000 0.24788391 + 15916 0.20350369 -0.62204752 0.25000000 0.25101277 + 16272 0.20313386 -0.62204752 0.30000000 0.24895674 + 16230 0.20348917 -0.62204752 0.35000000 0.25338644 + 16546 0.20385236 -0.62204752 0.40000000 0.24980049 + 16526 0.20347987 -0.62204752 0.45000000 0.24824338 + 16324 0.20365368 -0.62204752 0.50000000 0.24607593 + 25118 0.29047101 -0.46820059 0.05000000 0.24741942 + 24792 0.28963305 -0.46820059 0.10000000 0.25267582 + 25086 0.29024286 -0.46820059 0.15000000 0.24901836 + 24722 0.29013252 -0.46820059 0.20000000 0.25010222 + 25022 0.29068373 -0.46820059 0.25000000 0.25151603 + 25014 0.28978432 -0.46820059 0.30000000 0.24788683 + 25092 0.29024169 -0.46820059 0.35000000 0.24838181 + 24996 0.29044902 -0.46820059 0.40000000 0.25580759 + 25044 0.28959921 -0.46820059 0.45000000 0.24973243 + 24610 0.29003703 -0.46820059 0.50000000 0.24841258 + 39030 0.41310991 -0.31435498 0.05000000 0.24873583 + 38388 0.41324114 -0.31435498 0.10000000 0.24837949 + 37854 0.41296066 -0.31435498 0.15000000 0.24745815 + 38026 0.41399658 -0.31435498 0.20000000 0.25035322 + 38274 0.41374743 -0.31435498 0.25000000 0.24580413 + 38604 0.41366503 -0.31435498 0.30000000 0.25080159 + 37936 0.41347934 -0.31435498 0.35000000 0.25182989 + 38476 0.41330508 -0.31435498 0.40000000 0.25113755 + 38446 0.41365482 -0.31435498 0.45000000 0.24936805 + 38322 0.41335469 -0.31435498 0.50000000 0.25166295 + 58284 0.58847036 -0.16050875 0.05000000 0.25020375 + 57664 0.58868424 -0.16050875 0.10000000 0.24883266 + 58050 0.58847073 -0.16050875 0.15000000 0.24703402 + 58574 0.58908260 -0.16050875 0.20000000 0.24857010 + 58054 0.58811167 -0.16050875 0.25000000 0.24999327 + 57676 0.58852707 -0.16050875 0.30000000 0.25020662 + 57812 0.58820372 -0.16050875 0.35000000 0.25040234 + 58400 0.58818573 -0.16050875 0.40000000 0.25057091 + 57954 0.58872986 -0.16050875 0.45000000 0.24965003 + 57778 0.58854156 -0.16050875 0.50000000 0.24997801 + 83592 0.83726945 -0.00666210 0.05000000 0.24998233 + 84400 0.83844570 -0.00666210 0.10000000 0.24862569 + 85084 0.83793227 -0.00666210 0.15000000 0.24851677 + 84796 0.83807338 -0.00666210 0.20000000 0.25001269 + 84248 0.83848332 -0.00666210 0.25000000 0.24972551 + 84860 0.83836965 -0.00666210 0.30000000 0.24923526 + 85374 0.83781963 -0.00666210 0.35000000 0.24773429 + 84728 0.83813240 -0.00666210 0.40000000 0.25042875 + 84652 0.83791795 -0.00666210 0.45000000 0.25026937 + 85016 0.83819879 -0.00666210 0.50000000 0.24971834 + 118788 1.19295365 0.14718457 0.05000000 0.25037935 + 118906 1.19265243 0.14718457 0.10000000 0.24915018 + 117776 1.19427135 0.14718457 0.15000000 0.24950589 + 120088 1.19373114 0.14718457 0.20000000 0.25066734 + 119402 1.19333891 0.14718457 0.25000000 0.25095498 + 121212 1.19294985 0.14718457 0.30000000 0.24910440 + 119042 1.19384043 0.14718457 0.35000000 0.24951204 + 120494 1.19303713 0.14718457 0.40000000 0.25028328 + 119208 1.19336002 0.14718457 0.45000000 0.24925978 + 120362 1.19308712 0.14718457 0.50000000 0.25168495 + 171592 1.70474276 0.30103000 0.05000000 0.24881097 + 170524 1.70434256 0.30103000 0.10000000 0.25096788 + 172864 1.70482526 0.30103000 0.15000000 0.25124184 + 173080 1.70509778 0.30103000 0.20000000 0.24959425 + 173230 1.70451935 0.30103000 0.25000000 0.25037203 + 174068 1.70575581 0.30103000 0.30000000 0.25001682 + 173370 1.70491087 0.30103000 0.35000000 0.24961422 + 173504 1.70421163 0.30103000 0.40000000 0.25015015 + 173384 1.70495092 0.30103000 0.45000000 0.25004891 + 175106 1.70524692 0.30103000 0.50000000 0.24864867 + 281176 2.44058087 0.45487534 0.05000000 0.24921168 + 282048 2.43990693 0.45487534 0.10000000 0.24960329 + 283034 2.43795647 0.45487534 0.15000000 0.24988415 + 283986 2.43917192 0.45487534 0.20000000 0.25103485 + 282324 2.43828188 0.45487534 0.25000000 0.24987784 + 283626 2.43841569 0.45487534 0.30000000 0.24946471 + 282438 2.43973167 0.45487534 0.35000000 0.24984319 + 282940 2.43923414 0.45487534 0.40000000 0.24882798 + 286006 2.43798930 0.45487534 0.45000000 0.24928019 + 285594 2.43981163 0.45487534 0.50000000 0.24905817 + 532804 3.49212451 0.60872281 0.05000000 0.24996206 + 530772 3.48966468 0.60872281 0.10000000 0.24898679 + 530256 3.49046850 0.60872281 0.15000000 0.25031266 + 531860 3.49468260 0.60872281 0.20000000 0.24968356 + 534886 3.49251076 0.60872281 0.25000000 0.25001477 + 534710 3.49149216 0.60872281 0.30000000 0.24922175 + 536232 3.49198070 0.60872281 0.35000000 0.24901246 + 535324 3.48953193 0.60872281 0.40000000 0.24939518 + 536586 3.49189361 0.60872281 0.45000000 0.24975535 + 538130 3.49257671 0.60872281 0.50000000 0.24955356 + 1138098 4.98779581 0.76256829 0.05000000 0.25000554 + 1134530 4.98682546 0.76256829 0.10000000 0.24994273 + 1131410 4.98576422 0.76256829 0.15000000 0.25053053 + 1129014 4.98384170 0.76256829 0.20000000 0.25053172 + 1128418 4.98402162 0.76256829 0.25000000 0.24980761 + 1134178 4.98555156 0.76256829 0.30000000 0.24973701 + 1137424 4.98529618 0.76256829 0.35000000 0.24908474 + 1136920 4.98521532 0.76256829 0.40000000 0.24988729 + 1130638 4.98483436 0.76256829 0.45000000 0.24977635 + 1130654 4.98518466 0.76256829 0.50000000 0.25027365 + 2565210 7.11954114 0.91641447 0.05000000 0.24984163 + 2550026 7.11785952 0.91641447 0.10000000 0.24953513 + 2550216 7.11824618 0.91641447 0.15000000 0.24930153 + 2551448 7.11813405 0.91641447 0.20000000 0.24937393 + 2551182 7.11654315 0.91641447 0.25000000 0.24950969 + 2551058 7.11708846 0.91641447 0.30000000 0.24947281 + 2555568 7.11670860 0.91641447 0.35000000 0.24988287 + 2541210 7.11573632 0.91641447 0.40000000 0.24989037 + 2534490 7.11495121 0.91641447 0.45000000 0.24978067 + 2531530 7.11853657 0.91641447 0.50000000 0.25041968 + 6140136 10.15832627 1.07025958 0.05000000 0.24935784 + 6134522 10.15851017 1.07025958 0.10000000 0.24941126 + 6114376 10.15981186 1.07025958 0.15000000 0.24898925 + 6095996 10.16081907 1.07025958 0.20000000 0.24984614 + 6070144 10.16135587 1.07025958 0.25000000 0.24990914 + 6056906 10.16012949 1.07025958 0.30000000 0.24959204 + 6056284 10.15961453 1.07025958 0.35000000 0.24946016 + 6041606 10.15759601 1.07025958 0.40000000 0.24930955 + 6056874 10.15689495 1.07025958 0.45000000 0.24932114 + 6047742 10.15663323 1.07025958 0.50000000 0.24934432 + 15182384 14.48878258 1.22410814 0.05000000 0.24909997 + 15147790 14.48819977 1.22410814 0.10000000 0.24957603 + 15131684 14.48850811 1.22410814 0.15000000 0.24946850 + 15123448 14.48342154 1.22410814 0.20000000 0.24950576 + 15093890 14.49069292 1.22410814 0.25000000 0.24969058 + 15064712 14.49152684 1.22410814 0.30000000 0.24984536 + 15035580 14.49233646 1.22410814 0.35000000 0.24979204 + 15020350 14.48772643 1.22410814 0.40000000 0.24961736 + 15006472 14.48914620 1.22410814 0.45000000 0.24972672 + 15009476 14.49032842 1.22410814 0.50000000 0.24964864 + 39140742 20.67496688 1.37795248 0.05000000 0.24958415 + 39105020 20.66745098 1.37795248 0.10000000 0.24958584 + 39004936 20.66815741 1.37795248 0.15000000 0.24949485 + 38945200 20.66820055 1.37795248 0.20000000 0.24967691 + 38888220 20.66681410 1.37795248 0.25000000 0.24965315 + 38721022 20.66850293 1.37795248 0.30000000 0.24958649 + 38618624 20.66668659 1.37795248 0.35000000 0.24966218 + 38651274 20.66767160 1.37795248 0.40000000 0.24973893 + 38683556 20.67058559 1.37795248 0.45000000 0.24975961 + 38778746 20.66625446 1.37795248 0.50000000 0.24983466 diff --git a/theory/tests/Mr19_DDsmu_periodic b/theory/tests/Mr19_DDsmu_periodic new file mode 100644 index 00000000..e1606f75 --- /dev/null +++ b/theory/tests/Mr19_DDsmu_periodic @@ -0,0 +1,140 @@ + 16696 0.20340501 -0.62204752 0.05000000 0.24968146 + 16488 0.20332144 -0.62204752 0.10000000 0.24706347 + 16020 0.20373232 -0.62204752 0.15000000 0.24562233 + 16060 0.20364371 -0.62204752 0.20000000 0.24784059 + 15934 0.20350385 -0.62204752 0.25000000 0.25108589 + 16284 0.20314149 -0.62204752 0.30000000 0.24887897 + 16234 0.20349115 -0.62204752 0.35000000 0.25340692 + 16550 0.20385818 -0.62204752 0.40000000 0.24976416 + 16538 0.20347811 -0.62204752 0.45000000 0.24813122 + 16334 0.20365597 -0.62204752 0.50000000 0.24612709 + 25138 0.29047320 -0.46820059 0.05000000 0.24740801 + 24816 0.28963528 -0.46820059 0.10000000 0.25263034 + 25100 0.29024627 -0.46820059 0.15000000 0.24906452 + 24734 0.29013424 -0.46820059 0.20000000 0.25003660 + 25046 0.29069771 -0.46820059 0.25000000 0.25139261 + 25030 0.28979884 -0.46820059 0.30000000 0.24791067 + 25124 0.29025353 -0.46820059 0.35000000 0.24846020 + 25010 0.29045907 -0.46820059 0.40000000 0.25582840 + 25088 0.28960990 -0.46820059 0.45000000 0.24973555 + 24652 0.29005464 -0.46820059 0.50000000 0.24851525 + 39076 0.41310723 -0.31435498 0.05000000 0.24874608 + 38432 0.41324844 -0.31435498 0.10000000 0.24834401 + 37920 0.41298222 -0.31435498 0.15000000 0.24751310 + 38088 0.41400263 -0.31435498 0.20000000 0.25035322 + 38340 0.41375542 -0.31435498 0.25000000 0.24579433 + 38664 0.41367900 -0.31435498 0.30000000 0.25075408 + 38002 0.41348494 -0.31435498 0.35000000 0.25190770 + 38538 0.41331965 -0.31435498 0.40000000 0.25107039 + 38528 0.41366017 -0.31435498 0.45000000 0.24932288 + 38382 0.41336992 -0.31435498 0.50000000 0.25165435 + 58392 0.58845657 -0.16050875 0.05000000 0.25012367 + 57786 0.58870852 -0.16050875 0.10000000 0.24888890 + 58200 0.58847174 -0.16050875 0.15000000 0.24709230 + 58712 0.58909389 -0.16050875 0.20000000 0.24857269 + 58186 0.58813286 -0.16050875 0.25000000 0.25012674 + 57800 0.58853899 -0.16050875 0.30000000 0.25024506 + 57968 0.58821597 -0.16050875 0.35000000 0.25039608 + 58532 0.58819346 -0.16050875 0.40000000 0.25062984 + 58088 0.58872466 -0.16050875 0.45000000 0.24969933 + 57948 0.58854816 -0.16050875 0.50000000 0.24996402 + 83794 0.83729359 -0.00666210 0.05000000 0.25001428 + 84672 0.83845940 -0.00666210 0.10000000 0.24871135 + 85358 0.83797630 -0.00666210 0.15000000 0.24843082 + 85076 0.83811044 -0.00666210 0.20000000 0.25001990 + 84548 0.83850795 -0.00666210 0.25000000 0.24976886 + 85196 0.83841976 -0.00666210 0.30000000 0.24923974 + 85676 0.83787346 -0.00666210 0.35000000 0.24775034 + 85004 0.83817048 -0.00666210 0.40000000 0.25048931 + 84954 0.83795899 -0.00666210 0.45000000 0.25029859 + 85288 0.83825321 -0.00666210 0.50000000 0.24974315 + 119352 1.19295578 0.14718457 0.05000000 0.25031211 + 119520 1.19272930 0.14718457 0.10000000 0.24918593 + 118354 1.19427653 0.14718457 0.15000000 0.24953022 + 120690 1.19377006 0.14718457 0.20000000 0.25062360 + 119956 1.19338542 0.14718457 0.25000000 0.25086077 + 121794 1.19294302 0.14718457 0.30000000 0.24909890 + 119662 1.19387752 0.14718457 0.35000000 0.24958285 + 121042 1.19307377 0.14718457 0.40000000 0.25038331 + 119736 1.19342878 0.14718457 0.45000000 0.24926798 + 120924 1.19318640 0.14718457 0.50000000 0.25182151 + 172634 1.70484178 0.30103000 0.05000000 0.24901150 + 171622 1.70447783 0.30103000 0.10000000 0.25106794 + 173936 1.70492875 0.30103000 0.15000000 0.25128342 + 174218 1.70516371 0.30103000 0.20000000 0.24955715 + 174358 1.70468983 0.30103000 0.25000000 0.25049471 + 175166 1.70582292 0.30103000 0.30000000 0.25006190 + 174508 1.70498750 0.30103000 0.35000000 0.24963414 + 174688 1.70438109 0.30103000 0.40000000 0.25008698 + 174588 1.70507521 0.30103000 0.45000000 0.25011769 + 176394 1.70539512 0.30103000 0.50000000 0.24870294 + 283488 2.44073106 0.45487534 0.05000000 0.24923506 + 284490 2.44015027 0.45487534 0.10000000 0.24965515 + 285480 2.43804390 0.45487534 0.15000000 0.24986291 + 286390 2.43923239 0.45487534 0.20000000 0.25104629 + 284834 2.43849273 0.45487534 0.25000000 0.24991494 + 286494 2.43853899 0.45487534 0.30000000 0.24953764 + 285406 2.43996221 0.45487534 0.35000000 0.25001257 + 285932 2.43958068 0.45487534 0.40000000 0.24889016 + 289136 2.43852888 0.45487534 0.45000000 0.24934841 + 288778 2.44042085 0.45487534 0.50000000 0.24910449 + 538774 3.49265466 0.60872281 0.05000000 0.25012685 + 536890 3.49012486 0.60872281 0.10000000 0.24912778 + 536766 3.49103694 0.60872281 0.15000000 0.25047942 + 538644 3.49523525 0.60872281 0.20000000 0.24974164 + 541876 3.49306442 0.60872281 0.25000000 0.25001247 + 542040 3.49198551 0.60872281 0.30000000 0.24933670 + 544062 3.49246414 0.60872281 0.35000000 0.24904686 + 543728 3.48998211 0.60872281 0.40000000 0.24936274 + 545484 3.49250135 0.60872281 0.45000000 0.24991215 + 546724 3.49294220 0.60872281 0.50000000 0.24958501 + 1156554 4.98855112 0.76256829 0.05000000 0.25013543 + 1154302 4.98751036 0.76256829 0.10000000 0.25012477 + 1151936 4.98682179 0.76256829 0.15000000 0.25064812 + 1150726 4.98495569 0.76256829 0.20000000 0.25063066 + 1151192 4.98529231 0.76256829 0.25000000 0.24988710 + 1157706 4.98676745 0.76256829 0.30000000 0.24977810 + 1161690 4.98614358 0.76256829 0.35000000 0.24910945 + 1161492 4.98583037 0.76256829 0.40000000 0.24997636 + 1155500 4.98580328 0.76256829 0.45000000 0.24973296 + 1156280 4.98603707 0.76256829 0.50000000 0.25020194 + 2630150 7.12157827 0.91641447 0.05000000 0.24980989 + 2616906 7.12028119 0.91641447 0.10000000 0.24948964 + 2617738 7.12041075 0.91641447 0.15000000 0.24922114 + 2621348 7.11986117 0.91641447 0.20000000 0.24937453 + 2623228 7.11807429 0.91641447 0.25000000 0.24950429 + 2624028 7.11885765 0.91641447 0.30000000 0.24953469 + 2629056 7.11856852 0.91641447 0.35000000 0.24991871 + 2616664 7.11762632 0.91641447 0.40000000 0.25000758 + 2607754 7.11656104 0.91641447 0.45000000 0.24987818 + 2605020 7.12005098 0.91641447 0.50000000 0.25040764 + 6363294 10.16119903 1.07025958 0.05000000 0.24945822 + 6365066 10.16177346 1.07025958 0.10000000 0.24950650 + 6350118 10.16341397 1.07025958 0.15000000 0.24906329 + 6338594 10.16494264 1.07025958 0.20000000 0.24988379 + 6318838 10.16566686 1.07025958 0.25000000 0.24998068 + 6311932 10.16404943 1.07025958 0.30000000 0.24965104 + 6309244 10.16369761 1.07025958 0.35000000 0.24947432 + 6299678 10.16204343 1.07025958 0.40000000 0.24929928 + 6316354 10.16138045 1.07025958 0.45000000 0.24933026 + 6304980 10.16126849 1.07025958 0.50000000 0.24930246 + 15979970 14.49650011 1.22410814 0.05000000 0.24919628 + 15971062 14.49621019 1.22410814 0.10000000 0.24968706 + 15977284 14.49627589 1.22410814 0.15000000 0.24954267 + 15998070 14.49184107 1.22410814 0.20000000 0.24954113 + 15986760 14.49923658 1.22410814 0.25000000 0.24976108 + 15975338 14.49975675 1.22410814 0.30000000 0.24983244 + 15962496 14.49935401 1.22410814 0.35000000 0.24983263 + 15944790 14.49520336 1.22410814 0.40000000 0.24960287 + 15927326 14.49714924 1.22410814 0.45000000 0.24974935 + 15924152 14.49931252 1.22410814 0.50000000 0.24963448 + 42194174 20.69085123 1.37795248 0.05000000 0.24954990 + 42226860 20.68331626 1.37795248 0.10000000 0.24947298 + 42206962 20.68446245 1.37795248 0.15000000 0.24942812 + 42206278 20.68412907 1.37795248 0.20000000 0.24965358 + 42193166 20.68325619 1.37795248 0.25000000 0.24964480 + 42074518 20.68514150 1.37795248 0.30000000 0.24956370 + 42014558 20.68325092 1.37795248 0.35000000 0.24967507 + 42075126 20.68469578 1.37795248 0.40000000 0.24977415 + 42129724 20.68714916 1.37795248 0.45000000 0.24974958 + 42257974 20.68303823 1.37795248 0.50000000 0.24983016 diff --git a/theory/tests/test_nonperiodic.c b/theory/tests/test_nonperiodic.c index 16a9e0d3..1a6300b4 100644 --- a/theory/tests/test_nonperiodic.c +++ b/theory/tests/test_nonperiodic.c @@ -6,29 +6,18 @@ directory at https://github.com/manodeep/Corrfunc/ */ -#include -#include -#include -#include -#include -#include - -#ifndef MAXLEN -#define MAXLEN 500 -#endif - -#include "defs.h" -#include "utils.h" +#include "tests_common.h" #include "io.h" -#include "ftread.h" #include "../DD/countpairs.h" #include "../DDrppi/countpairs_rp_pi.h" +#include "../DDsmu/countpairs_s_mu.h" char tmpoutputfile[]="./test_nonperiodic_output.txt"; int test_nonperiodic_DD(const char *correct_outputfile); int test_nonperiodic_DDrppi(const char *correct_outputfile); +int test_nonperiodic_DDsmu(const char *correct_outputfile); void read_data_and_set_globals(const char *firstfilename, const char *firstformat,const char *secondfilename,const char *secondformat); //Global variables @@ -38,25 +27,15 @@ double *X1=NULL,*Y1=NULL,*Z1=NULL,*weights1=NULL; int ND2; double *X2=NULL,*Y2=NULL,*Z2=NULL,*weights2=NULL; -char binfile[]="bins"; -double pimax=40.0; -double boxsize=420.0; -#if defined(_OPENMP) -const int nthreads=4; -#else -const int nthreads=1; -#endif - char current_file1[MAXLEN],current_file2[MAXLEN]; struct config_options options; - -const double maxdiff = 1e-9; -const double maxreldiff = 1e-6; //end of global variables int test_nonperiodic_DD(const char *correct_outputfile) { int autocorr = (X1==X2) ? 1:0; + results_countpairs results; + int ret = EXIT_FAILURE; // Set up the weights pointers weight_method_t weight_method = PAIR_PRODUCT; @@ -64,53 +43,54 @@ int test_nonperiodic_DD(const char *correct_outputfile) extra.weights0.weights[0] = weights1; extra.weights1.weights[0] = weights2; - //Do the straight-up DD counts - results_countpairs results; - int status = countpairs(ND1,X1,Y1,Z1, - ND2,X2,Y2,Z2, - nthreads, - autocorr, - binfile, - &results, - &options, - &extra); - if(status != EXIT_SUCCESS) { - return status; - } - - int ret = EXIT_FAILURE; - FILE *fp=my_fopen(correct_outputfile,"r"); - if(fp == NULL) { - free_results(&results); - return EXIT_FAILURE; - } - for(int i=1;iDD, 1->DDrppi - - const char correct_outputfiles[][MAXLEN] = {"Mr19_DD_nonperiodic","Mr19_DDrppi_nonperiodic","cmass_DR_nonperiodic"}; - const char firstfilename[][MAXLEN] = {"../tests/data/gals_Mr19.ff","../tests/data/gals_Mr19.ff","../tests/data/cmassmock_Zspace.ff"}; - const char firstfiletype[][MAXLEN] = {"f","f","f"}; - const char secondfilename[][MAXLEN] = {"../tests/data/gals_Mr19.ff","../tests/data/gals_Mr19.ff","../tests/data/random_Zspace.ff"}; - const char secondfiletype[][MAXLEN] = {"f","f","f"}; - - const double allpimax[] = {40.0,40.0,80.0}; - - int (*allfunctions[]) (const char *) = {test_nonperiodic_DD,test_nonperiodic_DDrppi}; - const int numfunctions=2;//2 functions total + const int function_pointer_index[] = {0,1,2,1};//0->DD, 1->DDrppi, 2->DDsmu + + const char correct_outputfiles[][MAXLEN] = {"Mr19_DD_nonperiodic", + "Mr19_DDrppi_nonperiodic", + "Mr19_DDsmu_nonperiodic", + "cmass_DR_nonperiodic"}; + const char firstfilename[][MAXLEN] = {"../tests/data/gals_Mr19.ff", + "../tests/data/gals_Mr19.ff", + "../tests/data/gals_Mr19.ff", + "../tests/data/cmassmock_Zspace.ff"}; + const char firstfiletype[][MAXLEN] = {"f","f","f","f"}; + const char secondfilename[][MAXLEN] = {"../tests/data/gals_Mr19.ff", + "../tests/data/gals_Mr19.ff", + "../tests/data/gals_Mr19.ff", + "../tests/data/random_Zspace.ff"}; + const char secondfiletype[][MAXLEN] = {"f","f","f","f"}; + + const double allpimax[] = {40.0,40.0,40.0,80.0}; + + int (*allfunctions[]) (const char *) = {test_nonperiodic_DD,test_nonperiodic_DDrppi,test_nonperiodic_DDsmu}; + const int numfunctions=3;//3 functions total int total_tests=0,skipped=0; diff --git a/theory/tests/test_periodic.c b/theory/tests/test_periodic.c index e59da2b3..9a0e6615 100644 --- a/theory/tests/test_periodic.c +++ b/theory/tests/test_periodic.c @@ -6,23 +6,12 @@ directory at https://github.com/manodeep/Corrfunc/ */ -#include -#include -#include -#include -#include -#include - -#ifndef MAXLEN -#define MAXLEN 500 -#endif - -#include "defs.h" +#include "tests_common.h" #include "io.h" -#include "utils.h" #include "../DD/countpairs.h" #include "../DDrppi/countpairs_rp_pi.h" +#include "../DDsmu/countpairs_s_mu.h" #include "../wp/countpairs_wp.h" #include "../xi/countpairs_xi.h" #include "../vpf/countspheres.h" @@ -30,8 +19,8 @@ char tmpoutputfile[]="./test_periodic_output.txt"; int test_periodic_DD(const char *correct_outputfile); -/* int test_periodic_DD_weighted(const char *correct_outputfile); */ int test_periodic_DDrppi(const char *correct_outputfile); +int test_periodic_DDsmu(const char *correct_outputfile); int test_wp(const char *correct_outputfile); int test_vpf(const char *correct_outputfile); int test_xi(const char *correct_outputfile); @@ -39,6 +28,7 @@ int test_xi(const char *correct_outputfile); void read_data_and_set_globals(const char *firstfilename, const char *firstformat, const char *secondfilename, const char *secondformat); + //Global variables int ND1; double *X1=NULL,*Y1=NULL,*Z1=NULL,*weights1=NULL; @@ -46,26 +36,16 @@ double *X1=NULL,*Y1=NULL,*Z1=NULL,*weights1=NULL; int ND2; double *X2=NULL,*Y2=NULL,*Z2=NULL,*weights2=NULL; -char binfile[]="bins"; -double pimax=40.0; -double boxsize=420.0; -#ifdef _OPENMP -const int nthreads=4; -#else -const int nthreads=1; -#endif - char current_file1[MAXLEN],current_file2[MAXLEN]; struct config_options options; -const double maxdiff = 1e-9; -const double maxreldiff = 1e-6; - //end global variables int test_periodic_DD(const char *correct_outputfile) { int autocorr = (X1==X2) ? 1:0; + results_countpairs results; + int ret = EXIT_FAILURE; // Set up the weights pointers weight_method_t weight_method = PAIR_PRODUCT; @@ -73,55 +53,57 @@ int test_periodic_DD(const char *correct_outputfile) extra.weights0.weights[0] = weights1; extra.weights1.weights[0] = weights2; - //Do the straight-up DD counts - results_countpairs results; - int status = countpairs(ND1,X1,Y1,Z1, - ND2,X2,Y2,Z2, - nthreads, - autocorr, - binfile, - &results, - &options, - &extra); - if(status != EXIT_SUCCESS) { - return status; - } - - int ret = EXIT_FAILURE; - double rlow=results.rupp[0]; - FILE *fp = my_fopen(correct_outputfile,"r"); - for(int i=1;iDD, 1->DDrppi,2->wp, 3->vpf, 4->xi + const int function_pointer_index[] = {1,0,2,3,4,5,1,1,1};//0->DD, 1->DDrppi,2->wp, 3->vpf, 4->xi, 5->DDsmu const char correct_outputfiles[][MAXLEN] = {"Mr19_DDrppi_periodic", "Mr19_DD_periodic", "Mr19_wp", "Mr19_vpf_periodic", "Mr19_xi", + "Mr19_DDsmu_periodic", "cmass_DD_periodic", "cmass_DR_periodic", "cmass_RR_periodic"}; @@ -544,27 +626,30 @@ int main(int argc, char **argv) "../tests/data/gals_Mr19.ff", "../tests/data/gals_Mr19.ff", "../tests/data/gals_Mr19.ff", + "../tests/data/gals_Mr19.ff", "../tests/data/cmassmock_Zspace.ff", "../tests/data/cmassmock_Zspace.ff", "../tests/data/random_Zspace.ff"}; - const char firstfiletype[][MAXLEN] = {"f","f","f","f","f","f","f","f"}; + const char firstfiletype[][MAXLEN] = {"f","f","f","f","f","f","f","f","f"}; const char secondfilename[][MAXLEN] = {"../tests/data/gals_Mr19.ff", "../tests/data/gals_Mr19.ff", "../tests/data/gals_Mr19.ff", "../tests/data/gals_Mr19.ff", "../tests/data/gals_Mr19.ff", + "../tests/data/gals_Mr19.ff", "../tests/data/cmassmock_Zspace.ff", "../tests/data/random_Zspace.ff", "../tests/data/random_Zspace.ff"}; - const char secondfiletype[][MAXLEN] = {"f","f","f","f","f","f","f","f"}; - const double allpimax[] = {40.0,40.0,40.0,40.0,40.0,80.0,80.0,80.0}; + const char secondfiletype[][MAXLEN] = {"f","f","f","f","f","f","f","f","f"}; + const double allpimax[] = {40.0,40.0,40.0,40.0,40.0,40.0,80.0,80.0,80.0}; int (*allfunctions[]) (const char *) = {test_periodic_DD, test_periodic_DDrppi, test_wp, test_vpf, - test_xi}; - const int numfunctions=5;//5 functions total + test_xi, + test_periodic_DDsmu}; + const int numfunctions=6;//6 functions total int total_tests=0,skipped=0; diff --git a/utils/defs.h b/utils/defs.h index 020f2542..ffe26016 100644 --- a/utils/defs.h +++ b/utils/defs.h @@ -19,7 +19,7 @@ extern "C" { #endif -#define API_VERSION STR("2.0.1") +#define API_VERSION STR("2.1.0") typedef enum { DEFAULT=-42,/* present simply to make the enum a signed int*/ @@ -187,6 +187,12 @@ static inline void set_bin_refine_factors(struct config_options *options, const reset_bin_refine_scheme(options); } +static inline void set_custom_bin_refine_factors(struct config_options *options, const int bin_refine_factors[3]) +{ + set_bin_refine_factors(options, bin_refine_factors); + set_bin_refine_scheme(options, BINNING_CUST); +} + static inline void reset_bin_refine_factors(struct config_options *options) { /* refine factors of 2,2,1 in the xyz dims diff --git a/utils/macros.h b/utils/macros.h index 7e6daa93..77793ae7 100644 --- a/utils/macros.h +++ b/utils/macros.h @@ -8,7 +8,7 @@ #define ADD_DIFF_TIME(t0,t1) ((t1.tv_sec - t0.tv_sec) + 1e-6*(t1.tv_usec - t0.tv_usec)) #define REALTIME_ELAPSED_NS(t0, t1) ((t1.tv_sec - t0.tv_sec)*1000000000.0 + (t1.tv_nsec - t0.tv_nsec)) - + #define ALIGNMENT 32 #define STRINGIFY(x) #x @@ -25,16 +25,16 @@ thread_timings->second_cellindex = second_cellid; \ } -/* Taken from http://stackoverflow.com/questions/19403233/compile-time-struct-size-check-error-out-if-odd +/* Taken from http://stackoverflow.com/questions/19403233/compile-time-struct-size-check-error-out-if-odd which is in turn taken from the linux kernel */ /* #define BUILD_BUG_OR_ZERO(e) (sizeof(struct{ int:-!!(e);})) */ /* #define ENSURE_STRUCT_SIZE(e, size) BUILD_BUG_OR_ZERO(sizeof(e) != size) */ -/* However, the previous one gives me an unused-value warning and I do not want - to turn that compiler warning off. Therefore, this version, which results in - an unused local typedef warning is used. I turn off the corresponding warning +/* However, the previous one gives me an unused-value warning and I do not want + to turn that compiler warning off. Therefore, this version, which results in + an unused local typedef warning is used. I turn off the corresponding warning in common.mk (-Wno-unused-local-typedefs) via CFLAGS */ -#define BUILD_BUG_OR_ZERO(cond, msg) typedef volatile char assertion_on_##msg[( !!(cond) )*2-1 ] +#define BUILD_BUG_OR_ZERO(cond, msg) typedef volatile char assertion_on_##msg[( !!(cond) )*2-1 ] #define ENSURE_STRUCT_SIZE(e, size) BUILD_BUG_OR_ZERO(sizeof(e) == size, sizeof_struct_config_options) /* Macro Constants */ @@ -56,6 +56,7 @@ #define RP_UNICODE "rp" #define THETA_UNICODE "\u03B8" #define OMEGA_UNICODE "\u03C9" +#define MU_UNICODE "\u03BC" #define PI_SAFE "pi" #define XI_SAFE "xi" @@ -63,6 +64,7 @@ #define RP_SAFE "rp" #define THETA_SAFE "theta" #define OMEGA "omega" +#define MU_SAFE "mu" #ifdef USE_UNICODE @@ -70,6 +72,7 @@ #define XI_CHAR XI_UNICODE #define PIMAX_CHAR PIMAX_UNICODE #define RP_CHAR RP_UNICODE +#define MU_CHAR MU_UNICODE #define THETA_CHAR THETA_UNICODE #define OMEGA_CHAR OMEGA_UNICODE #define UNICODE_WARNING "\n\ @@ -83,6 +86,7 @@ the ROOT DIRECTORY of ``Corrfunc`` and re-install the entire packge.\n" #else #define PI_CHAR PI_SAFE #define XI_CHAR XI_SAFE +#define MU_CHAR MU_SAFE #define PIMAX_CHAR PIMAX_SAFE #define RP_CHAR RP_SAFE #define THETA_CHAR THETA_SAFE @@ -142,7 +146,7 @@ the ROOT DIRECTORY of ``Corrfunc`` and re-install the entire packge.\n" fprintf(stderr,"Can not handle signal = %d\n", signo); \ } \ previous_handlers[i] = prev; \ - } + } #define RESET_INTERRUPT_HANDLERS() \ for(size_t i=0;i +#include +#include +#include +#include +#include + +#ifndef MAXLEN +#define MAXLEN 500 +#endif + +#include "defs.h" +#include "utils.h" + +#ifdef INTEGRATION_TESTS + +#warning "Running (SLOW) integration tests" + +/* Define the instruction sets that are supported by the compiler */ +const isa valid_instruction_sets[] = {FALLBACK +#ifdef __SSE4_2__ + ,SSE42 +#endif +#ifdef __AVX__ + ,AVX +#endif +}; + +/* Strings corresponding to the instruction sets in the array `valid_instruction_sets` */ +const char isa_name[][20] = {"FALLBACK" +#ifdef __SSE4_2__ + ,"SSE42" +#endif +#ifdef __AVX__ + , "AVX" +#endif +}; + +/* This is a fun C tid-bit. The sizeof(valid_instruction_sets) refers to the total bytes + required to store the array. As in the typeof valid_instruction_sets is int[3] when + all 3 instructions sets are supported */ +const int num_instructions = sizeof(valid_instruction_sets)/sizeof(valid_instruction_sets[0]); + +/* The max. value of bin refine factor to probe. Each of bin refinements factors is set from [1, max_binref] + (inclusive) */ +const int min_bin_ref = 1, max_bin_ref = 3; + +/* Macro to setup the loop over instruction sets, various bin factors and then run + the tests */ +#define BEGIN_INTEGRATION_TEST_SECTION \ + do { \ + int dotest = 1; \ + const isa old_isa = options.instruction_set; \ + int fastest_bin_ref[] = {1, 1, 1}; \ + double fastest_time = 1e30; \ + struct timespec t0, t1; \ + for(int iset=0;iset no need for exhaustive testing */ +#define BEGIN_INTEGRATION_TEST_SECTION do { +#define END_INTEGRATION_TEST_SECTION } while(0) + +#endif + + +#ifdef _OPENMP +const int nthreads=4; +#else +const int nthreads=1; +#endif + +const double maxdiff = 1e-9; +const double maxreldiff = 1e-6; + +char binfile[]="../tests/bins"; +char angular_binfile[]="../tests/angular_bins"; +double pimax=40.0; +double theory_mu_max=0.5; +double mocks_mu_max=1.0; +int nmu_bins=10; +double boxsize=420.0; +