Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function to retrieve example dataset paths #1763

Closed
wants to merge 27 commits into from
Closed
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
e46a60f
Add function, tests and case example
echedey-ls Jun 7, 2023
29c7554
Fix type annotations
echedey-ls Jun 7, 2023
725e299
Yeet this stupid error
echedey-ls Jun 7, 2023
05dc263
Remove unneeded feature
echedey-ls Jun 8, 2023
b0f38d5
Renamed to locate_example_dataset
echedey-ls Jun 8, 2023
b236fce
Use pathlib & check existence of test files
echedey-ls Jun 8, 2023
884f2de
Add public documentation, hope it works
echedey-ls Jun 8, 2023
dad6835
I forgot this assert :v
echedey-ls Jun 8, 2023
5490561
Update plot_greensboro_kimber_soiling.py
echedey-ls Jun 8, 2023
eacedae
Don't show examples backreference
echedey-ls Jun 9, 2023
28fe7de
Update v0.10.0.rst (without user name since I'm already mentioned at …
echedey-ls Jun 9, 2023
aa91f28
Apply Kevin's implementation suggestions
echedey-ls Jun 10, 2023
5954a79
Update plot_greensboro_kimber_soiling.py
echedey-ls Jun 10, 2023
0e55f18
Update iotools.rst
echedey-ls Jun 10, 2023
311f765
Will this fix the table?
echedey-ls Jun 10, 2023
3a91cd7
This should be fine now
echedey-ls Jun 10, 2023
4a38b0e
Substitute occurrences of pvlib.__file__ or similar where appropiate
echedey-ls Jun 10, 2023
5e1a33c
Merge branch 'main' into dataset-retrieve-function
echedey-ls Jun 10, 2023
64e8700
Merge branch 'main' into dataset-retrieve-function
echedey-ls Jun 14, 2023
f5bfcf6
Revert "Substitute occurrences of pvlib.__file__ or similar where app…
echedey-ls Jun 18, 2023
8c78cb4
Revert "Will this fix the table?"
echedey-ls Jun 18, 2023
6215608
Delete custom path behaviour, rename to get_example_dataset_path
echedey-ls Jun 18, 2023
aa08e09
Use function in tests only
echedey-ls Jun 18, 2023
61571be
Forgot to update tests
echedey-ls Jun 18, 2023
caec4f4
solve stupid errors
echedey-ls Jun 18, 2023
586381a
Merge branch 'main' into dataset-retrieve-function
echedey-ls Aug 3, 2023
e8c615e
Update whatsnew entries
echedey-ls Aug 3, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions docs/examples/adr-pvarray/plot_simulate_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,11 @@
Author: Anton Driesse
"""

import os
import pandas as pd
import matplotlib.pyplot as plt

import pvlib
from pvlib import iotools, location
from pvlib import iotools, location, tools
from pvlib.irradiance import get_total_irradiance
from pvlib.pvarray import pvefficiency_adr

Expand All @@ -26,10 +25,9 @@
# Read a TMY3 file containing weather data and select needed columns
#

PVLIB_DIR = pvlib.__path__[0]
DATA_FILE = os.path.join(PVLIB_DIR, 'data', '723170TYA.CSV')
tmy3_filepath = tools.get_test_dataset_path('723170TYA.CSV')

tmy, metadata = iotools.read_tmy3(DATA_FILE, coerce_year=1990,
tmy, metadata = iotools.read_tmy3(tmy3_filepath, coerce_year=1990,
map_variables=True)

df = pd.DataFrame({'ghi': tmy['ghi'], 'dhi': tmy['dhi'], 'dni': tmy['dni'],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,18 @@
# GHI into the diffuse and direct components. The separate components are
# needed to estimate the total irradiance on a tilted surface.

import pathlib
from matplotlib import pyplot as plt
import pandas as pd
from pvlib.iotools import read_tmy3
from pvlib.solarposition import get_solarposition
from pvlib import irradiance
import pvlib
from pvlib import irradiance, tools

# For this example we use the Greensboro, North Carolina, TMY3 file which is
# in the pvlib data directory. TMY3 are made from the median months from years
# of data measured from 1990 to 2010. Therefore we change the timestamps to a
# common year, 1990.
DATA_DIR = pathlib.Path(pvlib.__file__).parent / 'data'
greensboro, metadata = read_tmy3(DATA_DIR / '723170TYA.CSV', coerce_year=1990,
tmy3_filepath = tools.get_test_dataset_path('723170TYA.CSV')
greensboro, metadata = read_tmy3(tmy3_filepath, coerce_year=1990,
map_variables=True)

# Many of the diffuse fraction estimation methods require the "true" zenith, so
Expand Down
8 changes: 3 additions & 5 deletions docs/examples/irradiance-transposition/plot_seasonal_tilt.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,9 @@
# to use a custom Mount class to use the Seasonal Tilt strategy
# with :py:class:`~pvlib.modelchain.ModelChain`.

import pvlib
from pvlib import pvsystem, location, modelchain, iotools
from pvlib import pvsystem, location, modelchain, iotools, tools
from pvlib.temperature import TEMPERATURE_MODEL_PARAMETERS
import pandas as pd
import pathlib
import matplotlib.pyplot as plt
from dataclasses import dataclass

Expand All @@ -43,8 +41,8 @@ def get_orientation(self, solar_zenith, solar_azimuth):
# First let's grab some weather data and make sure our mount produces tilts
# like we expect:

DATA_DIR = pathlib.Path(pvlib.__file__).parent / 'data'
tmy, metadata = iotools.read_tmy3(DATA_DIR / '723170TYA.CSV', coerce_year=1990,
tmy3_filepath = tools.get_test_dataset_path('723170TYA.CSV')
tmy, metadata = iotools.read_tmy3(tmy3_filepath, coerce_year=1990,
map_variables=True)
# shift from TMY3 right-labeled index to left-labeled index:
tmy.index = tmy.index - pd.Timedelta(hours=1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,19 @@
# insolation is calculated for each strategy to show how orientation affects
# seasonal irradiance collection.

import pvlib
from pvlib import location
from pvlib import irradiance
from pvlib import tracking
from pvlib import tools
from pvlib.iotools import read_tmy3
import pandas as pd
from matplotlib import pyplot as plt
import pathlib

# get full path to the data directory
DATA_DIR = pathlib.Path(pvlib.__file__).parent / 'data'
# get full path to the example file
tmy3_filepath = tools.get_test_dataset_path('723170TYA.CSV')

# get TMY3 dataset
tmy, metadata = read_tmy3(DATA_DIR / '723170TYA.CSV', coerce_year=1990,
map_variables=True)
tmy, metadata = read_tmy3(tmy3_filepath, coerce_year=1990, map_variables=True)
# TMY3 datasets are right-labeled (AKA "end of interval") which means the last
# interval of Dec 31, 23:00 to Jan 1 00:00 is labeled Jan 1 00:00. When rolling
# up hourly irradiance to monthly insolation, a spurious January value is
Expand Down
10 changes: 5 additions & 5 deletions docs/examples/soiling/plot_fig3A_hsu_soiling_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,18 @@
# PM2.5 and PM10 data come from the EPA. First, let's read in the
# weather data and run the HSU soiling model:

import pathlib
from matplotlib import pyplot as plt
from pvlib import soiling
import pvlib
import pandas as pd

# get full path to the data directory
DATA_DIR = pathlib.Path(pvlib.__file__).parent / 'data'
# get full path to the example file
soiling_hsu_filepath = \
pvlib.tools.get_test_dataset_path('soiling_hsu_example_inputs.csv')

# read rainfall, PM2.5, and PM10 data from file
imperial_county = pd.read_csv(DATA_DIR / 'soiling_hsu_example_inputs.csv',
index_col=0, parse_dates=True)
imperial_county = pd.read_csv(soiling_hsu_filepath, index_col=0,
parse_dates=True)
rainfall = imperial_county['rain']
depo_veloc = {'2_5': 0.0009, '10': 0.004} # default values from [1] (m/s)
rain_accum_period = pd.Timedelta('1h') # default
Expand Down
11 changes: 6 additions & 5 deletions docs/examples/soiling/plot_greensboro_kimber_soiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,16 @@
# step.

from datetime import datetime
import pathlib
from matplotlib import pyplot as plt
from pvlib.iotools import read_tmy3
from pvlib.soiling import kimber
import pvlib
from pvlib.tools import get_test_dataset_path

# get full path to the data directory
DATA_DIR = pathlib.Path(pvlib.__file__).parent / 'data'
# get full path to the dataset file
tmy_filepath = get_test_dataset_path('723170TYA.CSV')

# get TMY3 data with rain
greensboro, _ = read_tmy3(DATA_DIR / '723170TYA.CSV', coerce_year=1990,
greensboro, _ = read_tmy3(tmy_filepath, coerce_year=1990,
map_variables=True)
# get the rain data
greensboro_rain = greensboro['Lprecip depth (mm)']
Expand All @@ -65,3 +64,5 @@
plt.tight_layout()

plt.show()

# %%
5 changes: 5 additions & 0 deletions docs/sphinx/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,11 @@ def setup(app):
# Modules for which function/class level galleries are created. In
# this case only pvlib, could include others though. must be tuple of str
'doc_module': ('pvlib',),

# objects to exclude from implicit backreferences
# https://sphinx-gallery.github.io/stable/configuration.html
# Section #add-mini-galleries-for-api-documentation
'exclude_implicit_doc': {r'pvlib\.tools\.get_test_dataset_path'},
}
# supress warnings in gallery output
# https://sphinx-gallery.github.io/stable/configuration.html
Expand Down
7 changes: 7 additions & 0 deletions docs/sphinx/source/reference/iotools.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,10 @@ in some files.

location.Location.from_tmy
location.Location.from_epw

Functions for locating the example data files included in pvlib.

.. autosummary::
:toctree: generated/

tools.get_test_dataset_path
16 changes: 4 additions & 12 deletions docs/sphinx/source/user_guide/clearsky.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ We'll need these imports for the examples below.

.. ipython::

In [1]: import os

In [1]: import itertools

In [1]: import matplotlib.pyplot as plt
Expand All @@ -40,7 +38,7 @@ We'll need these imports for the examples below.

In [1]: import pvlib

In [1]: from pvlib import clearsky, atmosphere, solarposition
In [1]: from pvlib import clearsky, atmosphere, solarposition, tools

In [1]: from pvlib.location import Location

Expand Down Expand Up @@ -130,13 +128,9 @@ the year. You could run it in a loop to create plots for all months.

In [1]: import calendar

In [1]: import os

In [1]: import h5py

In [1]: pvlib_path = os.path.dirname(os.path.abspath(pvlib.clearsky.__file__))

In [1]: filepath = os.path.join(pvlib_path, 'data', 'LinkeTurbidities.h5')
In [1]: filepath = tools.get_test_dataset_path('LinkeTurbidities.h5')

In [1]: def plot_turbidity_map(month, vmin=1, vmax=100):
...: plt.figure();
Expand Down Expand Up @@ -210,13 +204,11 @@ wavelengths [Bir80]_, and is implemented in

.. ipython::

In [1]: pvlib_data = os.path.join(os.path.dirname(pvlib.__file__), 'data')

In [1]: mbars = 100 # conversion factor from mbars to Pa

In [1]: tmy_file = os.path.join(pvlib_data, '703165TY.csv') # TMY file
In [1]: tmy_file_path = tools.get_test_dataset_path('703165TY.csv') # TMY file

In [1]: tmy_data, tmy_header = read_tmy3(tmy_file, coerce_year=1999, map_variables=True)
In [1]: tmy_data, tmy_header = read_tmy3(tmy_file_path, coerce_year=1999, map_variables=True)

In [1]: tl_historic = clearsky.lookup_linke_turbidity(time=tmy_data.index,
...: latitude=tmy_header['latitude'], longitude=tmy_header['longitude'])
Expand Down
3 changes: 3 additions & 0 deletions docs/sphinx/source/whatsnew/v0.10.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ Deprecations

Enhancements
~~~~~~~~~~~~
* Add :py:func:`pvlib.tools.locate_example_dataset` to get example and test
files under `pvlib/data` path.
(:issue:`924`, :pull:`1763`)


Bug fixes
Expand Down
6 changes: 2 additions & 4 deletions pvlib/clearsky.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
to calculate clear sky GHI, DNI, and DHI.
"""

import os
from collections import OrderedDict
import calendar

Expand All @@ -14,7 +13,7 @@
import h5py

from pvlib import atmosphere, tools
from pvlib.tools import _degrees_to_index
from pvlib.tools import _degrees_to_index, get_test_dataset_path


def ineichen(apparent_zenith, airmass_absolute, linke_turbidity,
Expand Down Expand Up @@ -189,8 +188,7 @@ def lookup_linke_turbidity(time, latitude, longitude, filepath=None,
# 1st column: 179.9583 W, 2nd column: 179.875 W

if filepath is None:
pvlib_path = os.path.dirname(os.path.abspath(__file__))
filepath = os.path.join(pvlib_path, 'data', 'LinkeTurbidities.h5')
filepath = get_test_dataset_path('LinkeTurbidities.h5')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is definitely not test data


latitude_index = _degrees_to_index(latitude, coordinate='latitude')
longitude_index = _degrees_to_index(longitude, coordinate='longitude')
Expand Down
7 changes: 3 additions & 4 deletions pvlib/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@

# Will Holmgren, University of Arizona, 2014-2016.

import os
import datetime

import pandas as pd
import pytz
import h5py

from pvlib import solarposition, clearsky, atmosphere, irradiance
from pvlib.tools import _degrees_to_index
from pvlib.tools import _degrees_to_index, get_test_dataset_path


class Location:
"""
Expand Down Expand Up @@ -426,8 +426,7 @@ def lookup_altitude(latitude, longitude):

"""

pvlib_path = os.path.dirname(os.path.abspath(__file__))
filepath = os.path.join(pvlib_path, 'data', 'Altitude.h5')
filepath = get_test_dataset_path('Altitude.h5')

latitude_index = _degrees_to_index(latitude, coordinate='latitude')
longitude_index = _degrees_to_index(longitude, coordinate='longitude')
Expand Down
18 changes: 6 additions & 12 deletions pvlib/pvsystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import functools
import io
import itertools
import os
from urllib.request import urlopen
import numpy as np
from scipy import constants
Expand All @@ -20,7 +19,7 @@

from pvlib import (atmosphere, iam, inverter, irradiance,
singlediode as _singlediode, temperature)
from pvlib.tools import _build_kwargs, _build_args
from pvlib.tools import _build_kwargs, _build_args, get_test_dataset_path


# a dict of required parameter names for each DC power model
Expand Down Expand Up @@ -2371,24 +2370,19 @@ def retrieve_sam(name=None, path=None):

if name is not None:
name = name.lower()
data_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), 'data')
if name == 'cecmod':
csvdata = os.path.join(
data_path, 'sam-library-cec-modules-2019-03-05.csv')
csvdata = 'sam-library-cec-modules-2019-03-05.csv'
elif name == 'sandiamod':
csvdata = os.path.join(
data_path, 'sam-library-sandia-modules-2015-6-30.csv')
csvdata = 'sam-library-sandia-modules-2015-6-30.csv'
elif name == 'adrinverter':
csvdata = os.path.join(
data_path, 'adr-library-cec-inverters-2019-03-05.csv')
csvdata = 'adr-library-cec-inverters-2019-03-05.csv'
elif name in ['cecinverter', 'sandiainverter']:
# Allowing either, to provide for old code,
# while aligning with current expectations
csvdata = os.path.join(
data_path, 'sam-library-cec-inverters-2019-03-05.csv')
csvdata = 'sam-library-cec-inverters-2019-03-05.csv'
else:
raise ValueError(f'invalid name {name}')
csvdata = get_test_dataset_path(csvdata)
elif path is not None:
if path.startswith('http'):
response = urlopen(path)
Expand Down
6 changes: 2 additions & 4 deletions pvlib/spectrum/mismatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d
import os


def get_example_spectral_response(wavelength=None):
Expand Down Expand Up @@ -117,10 +116,9 @@ def get_am15g(wavelength=None):
'''
# Contributed by Anton Driesse (@adriesse), PV Performance Labs. Aug. 2022

pvlib_path = pvlib.__path__[0]
filepath = os.path.join(pvlib_path, 'data', 'astm_g173_am15g.csv')
am15g_file = pvlib.tools.get_test_dataset_path('astm_g173_am15g.csv')

am15g = pd.read_csv(filepath, index_col=0).squeeze()
am15g = pd.read_csv(am15g_file, index_col=0).squeeze()

if wavelength is not None:
interpolator = interp1d(am15g.index, am15g,
Expand Down
24 changes: 24 additions & 0 deletions pvlib/tests/test_tools.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import pytest

import pvlib
from pvlib import tools
import numpy as np
import pathlib


@pytest.mark.parametrize('keys, input_dict, expected', [
Expand Down Expand Up @@ -95,3 +97,25 @@ def test_degrees_to_index_1():
'latitude' or 'longitude' is passed."""
with pytest.raises(IndexError): # invalid value for coordinate argument
tools._degrees_to_index(degrees=22.0, coordinate='width')


@pytest.mark.parametrize('location', [tuple(), ('data',)])
def test_get_test_dataset_path_passes(location):
expected_dataset = '723170TYA.CSV'
assert pathlib.Path(pvlib.__path__[0], 'data',
expected_dataset).exists()
assert tools.get_test_dataset_path(expected_dataset, *location) \
.name == expected_dataset
assert tools.get_test_dataset_path(pathlib.Path(expected_dataset)) \
.name == expected_dataset
assert tools.get_test_dataset_path(expected_dataset).exists()


def test_get_test_dataset_path_fails_on_not_found():
error_prompt = "Dataset has not been found in pvlib at .*. " \
"Please check dataset name."
nonexistent_file = "_Texto_cualquiera.-formato-"
assert not pathlib.Path(pvlib.__path__[0], 'data',
nonexistent_file).exists()
with pytest.raises(ValueError, match=error_prompt):
tools.get_test_dataset_path(nonexistent_file)
Loading