tests/iotools/test_sodapro.py

"""
test iotools for sodapro
"""

import pandas as pd
import numpy as np
import requests
import pytest

from pvlib.iotools import sodapro
from tests.conftest import TESTS_DATA_DIR, assert_frame_equal


testfile_mcclear_verbose = TESTS_DATA_DIR / 'cams_mcclear_1min_verbose.csv'
testfile_mcclear_monthly = TESTS_DATA_DIR / 'cams_mcclear_monthly.csv'
testfile_radiation_verbose = TESTS_DATA_DIR / 'cams_radiation_1min_verbose.csv'
testfile_radiation_monthly = TESTS_DATA_DIR / 'cams_radiation_monthly.csv'


index_verbose = pd.date_range('2020-06-01 12', periods=4, freq='1min',
                              tz='UTC')
index_monthly = pd.date_range('2020-01-01', periods=4, freq='1MS')


dtypes_mcclear_verbose = [
    'object', 'float64', 'float64', 'float64', 'float64', 'float64', 'float64',
    'float64', 'float64', 'float64', 'float64', 'float64', 'float64',
    'float64', 'float64', 'float64', 'float64', 'float64', 'int64', 'float64',
    'float64', 'float64', 'float64']

dtypes_mcclear = [
    'object', 'float64', 'float64', 'float64', 'float64', 'float64']

dtypes_radiation_verbose = [
    'object', 'float64', 'float64', 'float64', 'float64', 'float64', 'float64',
    'float64', 'float64', 'float64', 'float64', 'float64', 'float64',
    'float64', 'float64', 'float64', 'float64', 'float64', 'float64',
    'float64', 'float64', 'float64', 'float64', 'int64', 'float64', 'float64',
    'float64', 'float64', 'float64', 'int64', 'int64', 'float64', 'float64',
    'float64', 'float64']

dtypes_radiation = [
    'object', 'float64', 'float64', 'float64', 'float64', 'float64', 'float64',
    'float64', 'float64', 'float64', 'float64']


columns_mcclear_verbose = [
    'Observation period', 'ghi_extra', 'ghi_clear', 'bhi_clear',
    'dhi_clear', 'dni_clear', 'solar_zenith', 'summer/winter split', 'tco3',
    'tcwv', 'AOD BC', 'AOD DU', 'AOD SS', 'AOD OR', 'AOD SU', 'AOD NI',
    'AOD AM', 'alpha', 'Aerosol type', 'fiso', 'fvol', 'fgeo', 'albedo']

columns_mcclear = [
    'Observation period', 'ghi_extra', 'ghi_clear', 'bhi_clear', 'dhi_clear',
    'dni_clear']

columns_radiation_verbose = [
    'Observation period', 'ghi_extra', 'ghi_clear', 'bhi_clear', 'dhi_clear',
    'dni_clear', 'ghi', 'bhi', 'dhi', 'dni', 'Reliability', 'solar_zenith',
    'summer/winter split', 'tco3', 'tcwv', 'AOD BC', 'AOD DU', 'AOD SS',
    'AOD OR', 'AOD SU', 'AOD NI', 'AOD AM', 'alpha', 'Aerosol type', 'fiso',
    'fvol', 'fgeo', 'albedo', 'Cloud optical depth', 'Cloud coverage',
    'Cloud type', 'GHI no corr', 'BHI no corr', 'DHI no corr', 'BNI no corr']

columns_radiation_verbose_unmapped = [
    'Observation period', 'TOA', 'Clear sky GHI', 'Clear sky BHI',
    'Clear sky DHI', 'Clear sky BNI', 'GHI', 'BHI', 'DHI', 'BNI',
    'Reliability', 'sza', 'summer/winter split', 'tco3', 'tcwv', 'AOD BC',
    'AOD DU', 'AOD SS', 'AOD OR', 'AOD SU', 'AOD NI', 'AOD AM', 'alpha',
    'Aerosol type', 'fiso', 'fvol', 'fgeo', 'albedo', 'Cloud optical depth',
    'Cloud coverage', 'Cloud type', 'GHI no corr', 'BHI no corr',
    'DHI no corr', 'BNI no corr']

columns_radiation = [
    'Observation period', 'ghi_extra', 'ghi_clear', 'bhi_clear', 'dhi_clear',
    'dni_clear', 'ghi', 'bhi', 'dhi', 'dni', 'Reliability']


values_mcclear_verbose = np.array([
    ['2020-06-01T12:00:00.0/2020-06-01T12:01:00.0', 1084.194, 848.5020,
     753.564, 94.938, 920.28, 35.0308, 0.9723, 341.0221, 17.7962, 0.0065,
     0.0067, 0.0008, 0.0215, 0.0252, 0.0087, 0.0022, np.nan, -1, 0.1668,
     0.0912, 0.0267, 0.1359],
    ['2020-06-01T12:01:00.0/2020-06-01T12:02:00.0', 1083.504, 847.866, 752.904,
     94.962, 920.058, 35.0828, 0.9723, 341.0223, 17.802, 0.0065, 0.0067,
     0.0008, 0.0215, 0.0253, 0.0087, 0.0022, np.nan, -1, 0.1668, 0.0912,
     0.0267, 0.1359],
    ['2020-06-01T12:02:00.0/2020-06-01T12:03:00.0', 1082.802, 847.224, 752.232,
     94.986, 919.836, 35.1357, 0.9723, 341.0224, 17.8079, 0.0065, 0.0067,
     0.0008, 0.0216, 0.0253, 0.0087, 0.0022, np.nan, -1, 0.1668, 0.0912,
     0.0267, 0.1359],
    ['2020-06-01T12:03:00.0/2020-06-01T12:04:00.0', 1082.088, 846.564, 751.554,
     95.01, 919.614, 35.1896, 0.9723, 341.0226, 17.8137, 0.0065, 0.0067,
     0.0008, 0.0217, 0.0253, 0.0087, 0.0022, np.nan, -1, 0.1668, 0.0912,
     0.0267, 0.1359]])

values_mcclear_monthly = np.array([
    ['2020-01-01T00:00:00.0/2020-02-01T00:00:00.0', 67.4314, 39.5494,
     26.1998, 13.3496, 142.1562],
    ['2020-02-01T00:00:00.0/2020-03-01T00:00:00.0', 131.2335, 84.7849,
     58.3855, 26.3994, 202.4865],
    ['2020-03-01T00:00:00.0/2020-04-01T00:00:00.0', 232.3323, 163.176,
     125.1675, 38.0085, 307.5254],
    ['2020-04-01T00:00:00.0/2020-05-01T00:00:00.0', 344.7431, 250.7585,
     197.8757, 52.8829, 387.6707]])

values_radiation_verbose = np.array([
    ['2020-06-01T12:00:00.0/2020-06-01T12:01:00.0', 1084.194, 848.502, 753.564,
     94.938, 920.28, 815.358, 702.342, 113.022, 857.724, 1.0, 35.0308, 0.9723,
     341.0221, 17.7962, 0.0065, 0.0067, 0.0008, 0.0215, 0.0252, 0.0087, 0.0022,
     np.nan, -1, 0.1668, 0.0912, 0.0267, 0.1359, 0.0, 0, 5, 848.502, 753.564,
     94.938, 920.28],
    ['2020-06-01T12:01:00.0/2020-06-01T12:02:00.0', 1083.504, 847.866, 752.904,
     94.962, 920.058, 814.806, 701.73, 113.076, 857.52, 1.0, 35.0828, 0.9723,
     341.0223, 17.802, 0.0065, 0.0067, 0.0008, 0.0215, 0.0253, 0.0087, 0.0022,
     np.nan, -1, 0.1668, 0.0912, 0.0267, 0.1359, 0.0, 0, 5, 847.866, 752.904,
     94.962, 920.058],
    ['2020-06-01T12:02:00.0/2020-06-01T12:03:00.0', 1082.802, 847.224, 752.232,
     94.986, 919.836, 814.182, 701.094, 113.088, 857.298, 1.0, 35.1357, 0.9723,
     341.0224, 17.8079, 0.0065, 0.0067, 0.0008, 0.0216, 0.0253, 0.0087, 0.0022,
     np.nan, -1, 0.1668, 0.0912, 0.0267, 0.1359, 0.0, 0, 5, 847.224, 752.232,
     94.986, 919.836],
    ['2020-06-01T12:03:00.0/2020-06-01T12:04:00.0', 1082.088, 846.564, 751.554,
     95.01, 919.614, 813.612, 700.464, 113.148, 857.094, 1.0, 35.1896, 0.9723,
     341.0226, 17.8137, 0.0065, 0.0067, 0.0008, 0.0217, 0.0253, 0.0087, 0.0022,
     np.nan, -1, 0.1668, 0.0912, 0.0267, 0.1359, 0.0, 0, 5, 846.564, 751.554,
     95.01, 919.614]])

values_radiation_verbose_integrated = np.copy(values_radiation_verbose)
values_radiation_verbose_integrated[:, 1:10] = \
    values_radiation_verbose_integrated[:, 1:10].astype(float)/60
values_radiation_verbose_integrated[:, 31:35] = \
    values_radiation_verbose_integrated[:, 31:35].astype(float)/60

values_radiation_monthly = np.array([
    ['2020-01-01T00:00:00.0/2020-02-01T00:00:00.0', 67.4317, 39.5496,
     26.2, 13.3496, 142.1567, 20.8763, 3.4526, 17.4357, 16.7595, 0.997],
    ['2020-02-01T00:00:00.0/2020-03-01T00:00:00.0', 131.2338, 84.7852,
     58.3858, 26.3994, 202.4871, 47.5197, 13.984, 33.5512, 47.8541, 0.9956],
    ['2020-03-01T00:00:00.0/2020-04-01T00:00:00.0', 232.3325, 163.1762,
     125.1677, 38.0085, 307.5256, 120.1659, 69.6217, 50.5653, 159.576, 0.9949],
    ['2020-04-01T00:00:00.0/2020-05-01T00:00:00.0', 344.7433, 250.7587,
     197.8758, 52.8829, 387.6709, 196.7015, 123.2593, 73.5152, 233.9675,
     0.9897]])


# @pytest.fixture
def generate_expected_dataframe(values, columns, index, dtypes):
    """Create dataframe from arrays of values, columns and index, in order to
    use this dataframe to compare to.
    """
    expected = pd.DataFrame(values, columns=columns, index=index)
    expected.index.freq = None
    for (col, _dtype) in zip(expected.columns, dtypes):
        expected[col] = expected[col].astype(_dtype)
    return expected


@pytest.mark.parametrize('testfile,index,columns,values,dtypes', [
    (testfile_mcclear_verbose, index_verbose, columns_mcclear_verbose,
     values_mcclear_verbose, dtypes_mcclear_verbose),
    (testfile_mcclear_monthly, index_monthly, columns_mcclear,
     values_mcclear_monthly, dtypes_mcclear),
    (testfile_radiation_verbose, index_verbose, columns_radiation_verbose,
     values_radiation_verbose, dtypes_radiation_verbose),
    (testfile_radiation_monthly, index_monthly, columns_radiation,
     values_radiation_monthly, dtypes_radiation)])
def test_read_cams(testfile, index, columns, values, dtypes):
    expected = generate_expected_dataframe(values, columns, index, dtypes)
    out, metadata = sodapro.read_cams(testfile, integrated=False,
                                      map_variables=True)
    assert_frame_equal(out, expected, check_less_precise=True)


def test_read_cams_metadata():
    _, metadata = sodapro.read_cams(testfile_mcclear_monthly, integrated=False)
    assert metadata['Time reference'] == 'Universal time (UT)'
    assert metadata['noValue'] == 'nan'
    assert metadata['latitude'] == 55.7906
    assert metadata['longitude'] == 12.5251
    assert metadata['altitude'] == 39.0
    assert metadata['radiation_unit'] == 'W/m^2'
    assert metadata['time_step'] == '1MS'


@pytest.mark.parametrize('testfile,index,columns,values,dtypes,identifier', [
    (testfile_mcclear_monthly, index_monthly, columns_mcclear,
     values_mcclear_monthly, dtypes_mcclear, 'mcclear'),
    (testfile_radiation_monthly, index_monthly, columns_radiation,
     values_radiation_monthly, dtypes_radiation, 'cams_radiation')])
def test_get_cams(requests_mock, testfile, index, columns, values, dtypes,
                  identifier):
    """Test that get_cams generates the correct URI request and that parse_cams
    is being called correctly"""
    # Open local test file containing McClear mothly data
    with open(testfile, 'r') as test_file:
        mock_response = test_file.read()
    # Specify the full URI of a specific example, this ensures that all of the
    # inputs are passing on correctly
    url_test_cams = f'https://api.soda-solardata.com/service/wps?DataInputs=latitude=55.7906;longitude=12.5251;altitude=80;date_begin=2020-01-01;date_end=2020-05-04;time_ref=UT;summarization=P01M;username=pvlib-admin%2540googlegroups.com;verbose=false&Service=WPS&Request=Execute&Identifier=get_{identifier}&version=1.0.0&RawDataOutput=irradiation'  # noqa: E501

    requests_mock.get(url_test_cams, text=mock_response,
                      headers={'Content-Type': 'application/csv'})

    # Make API call - an error is raised if requested URI does not match
    out, metadata = sodapro.get_cams(
        start=pd.Timestamp('2020-01-01'),
        end=pd.Timestamp('2020-05-04'),
        latitude=55.7906,
        longitude=12.5251,
        email='pvlib-admin@googlegroups.com',
        identifier=identifier,
        altitude=80,
        time_step='1MS',
        verbose=False,
        integrated=False)
    expected = generate_expected_dataframe(values, columns, index, dtypes)
    assert_frame_equal(out, expected, check_less_precise=True)

    # Test if Warning is raised if verbose mode is True and time_step != '1min'
    with pytest.warns(UserWarning, match='Verbose mode only supports'):
        _ = sodapro.get_cams(
            start=pd.Timestamp('2020-01-01'),
            end=pd.Timestamp('2020-05-04'),
            latitude=55.7906,
            longitude=12.5251,
            email='pvlib-admin@googlegroups.com',
            identifier=identifier,
            altitude=80,
            time_step='1MS',
            verbose=True)


def test_get_cams_bad_request(requests_mock):
    """Test that a the correct errors/warnings ares raised for invalid
    requests inputs. Also tests if the specified server url gets used"""

    # Subset of an xml file returned for errornous requests
    mock_response_bad_text = """<?xml version="1.0" encoding="utf-8"?>
    <ows:Exception exceptionCode="NoApplicableCode" locator="None">
    <ows:ExceptionText>Failed to execute WPS process [get_mcclear]:
        Please, register yourself at www.soda-pro.com
    </ows:ExceptionText>"""

    url_cams_bad_request = 'https://pro.soda-is.com/service/wps?DataInputs=latitude=55.7906;longitude=12.5251;altitude=-999;date_begin=2020-01-01;date_end=2020-05-04;time_ref=TST;summarization=PT01H;username=test%2540test.com;verbose=false&Service=WPS&Request=Execute&Identifier=get_mcclear&version=1.0.0&RawDataOutput=irradiation'  # noqa: E501

    requests_mock.get(url_cams_bad_request, status_code=400,
                      text=mock_response_bad_text)

    # Test if HTTPError is raised if incorrect input is specified
    # In the below example a non-registrered email is specified
    with pytest.raises(requests.exceptions.HTTPError,
                       match='Failed to execute WPS process'):
        _ = sodapro.get_cams(
            start=pd.Timestamp('2020-01-01'),
            end=pd.Timestamp('2020-05-04'),
            latitude=55.7906,
            longitude=12.5251,
            email='test@test.com',  # a non-registrered email
            identifier='mcclear',
            time_ref='TST',
            verbose=False,
            time_step='1h',
            server='pro.soda-is.com')
    # Test if value error is raised if incorrect identifier is specified
    with pytest.raises(ValueError, match='Identifier must be either'):
        _ = sodapro.get_cams(
            start=pd.Timestamp('2020-01-01'),
            end=pd.Timestamp('2020-05-04'),
            latitude=55.7906,
            longitude=12.5251,
            email='test@test.com',
            identifier='test',  # incorrect identifier
            server='pro.soda-is.com')
    # Test if value error is raised if incorrect time step is specified
    with pytest.raises(ValueError, match='Time step not recognized'):
        _ = sodapro.get_cams(
            start=pd.Timestamp('2020-01-01'),
            end=pd.Timestamp('2020-05-04'),
            latitude=55.7906,
            longitude=12.5251,
            email='test@test.com',
            identifier='mcclear',
            time_step='test',  # incorrect time step
            server='pro.soda-is.com')