From 8790e7ed38ea5419ba711216e06f661ab9c17955 Mon Sep 17 00:00:00 2001 From: "Yngve S. Kristiansen" Date: Wed, 15 Nov 2023 09:47:01 +0100 Subject: [PATCH] eclsum -> summary --- res2df/csv2res.py | 2 +- res2df/resdatafiles.py | 12 +++---- res2df/summary.py | 82 +++++++++++++++++++++--------------------- tests/test_eclfiles.py | 2 +- tests/test_summary.py | 56 ++++++++++++++--------------- 5 files changed, 77 insertions(+), 77 deletions(-) diff --git a/res2df/csv2res.py b/res2df/csv2res.py index dcd040c7a..cdeec3199 100644 --- a/res2df/csv2res.py +++ b/res2df/csv2res.py @@ -51,7 +51,7 @@ def get_parser() -> argparse.ArgumentParser: summary_parser = subparsers.add_parser( "summary", - help="Write EclSum UNSMRY files", + help="Write summary UNSMRY files", description=("Write Eclipse UNSMRY files from CSV files."), ) summary.fill_reverse_parser(summary_parser) diff --git a/res2df/resdatafiles.py b/res2df/resdatafiles.py index 6207ded37..ab65da265 100644 --- a/res2df/resdatafiles.py +++ b/res2df/resdatafiles.py @@ -70,7 +70,7 @@ def __init__(self, eclbase): # Set class variables to None self._egridfile = None # Should be ResdataFile self._initfile = None # Should be ResdataFile - self._eclsum = None # Should be Summary + self._summary = None # Should be Summary self._egrid = None # Should be Grid @@ -138,7 +138,7 @@ def get_egridfile(self) -> ResdataFile: return self._egridfile - def get_eclsum(self, include_restart: bool = True) -> Summary: + def get_summary(self, include_restart: bool = True) -> Summary: """Find and return the summary file and return as Summary object @@ -146,15 +146,15 @@ def get_eclsum(self, include_restart: bool = True) -> Summary: include_restart: Sent to libecl for whether restart files should be traversed. """ - if not self._eclsum: + if not self._summary: smryfilename = self._eclbase + ".UNSMRY" if not Path(smryfilename).is_file(): raise FileNotFoundError( errno.ENOENT, os.strerror(errno.ENOENT), smryfilename ) logger.info("Opening UNSMRY file: %s", smryfilename) - self._eclsum = Summary(smryfilename, include_restart=include_restart) - return self._eclsum + self._summary = Summary(smryfilename, include_restart=include_restart) + return self._summary def get_initfile(self) -> ResdataFile: """Find and return the INIT file as an ResdataFile object""" @@ -207,7 +207,7 @@ def close(self) -> None: self._egridfile = None self._initfile = None # This is necessary for garbage collection to close the Summary file: - self._eclsum = None + self._summary = None self._rstfile = None self._rftfile = None diff --git a/res2df/summary.py b/res2df/summary.py index bcf228a46..cc988050d 100644 --- a/res2df/summary.py +++ b/res2df/summary.py @@ -84,7 +84,7 @@ def _ensure_date_or_none(some_date: Optional[Union[str, dt.date]]) -> Optional[d def _crop_datelist( - eclsumsdates: List[dt.datetime], + summarysdates: List[dt.datetime], freq: Union[dt.date, dt.datetime, str], start_date: Optional[dt.date] = None, end_date: Optional[dt.date] = None, @@ -94,7 +94,7 @@ def _crop_datelist( only cropped or returned as is. Args: - eclsumsdates: list of datetimes, typically coming from Summary.dates + summarysdates: list of datetimes, typically coming from Summary.dates freq: Either a date or datetime, or a frequency string "raw", "first" or "last". start_date: Dates prior to this date will be cropped. @@ -105,7 +105,7 @@ def _crop_datelist( """ datetimes: Union[List[dt.date], List[dt.datetime]] = [] # type: ignore if freq == FREQ_RAW: - datetimes = eclsumsdates + datetimes = summarysdates datetimes.sort() if start_date: # Convert to datetime (at 00:00:00) @@ -117,9 +117,9 @@ def _crop_datelist( datetimes = [x for x in datetimes if x < end_date] datetimes = datetimes + [end_date] elif freq == FREQ_FIRST: - datetimes = [min(eclsumsdates).date()] + datetimes = [min(summarysdates).date()] elif freq == FREQ_LAST: - datetimes = [max(eclsumsdates).date()] + datetimes = [max(summarysdates).date()] elif isinstance(freq, (dt.date, dt.datetime)): datetimes = [freq] return datetimes @@ -193,7 +193,7 @@ def _fallback_date_range(start: dt.date, end: dt.date, freq: str) -> List[dt.dat def resample_smry_dates( - eclsumsdates: List[dt.datetime], + summarysdates: List[dt.datetime], freq: str = FREQ_RAW, normalize: bool = True, start_date: Optional[Union[str, dt.date]] = None, @@ -206,7 +206,7 @@ def resample_smry_dates( can be returned, on the same date range. Incoming dates can also be cropped. Args: - eclsumsdates: list of datetimes, typically coming from Summary.dates + summarysdates: list of datetimes, typically coming from Summary.dates freq: string denoting requested frequency for the returned list of datetime. 'raw' will return the input datetimes (no resampling). @@ -233,7 +233,7 @@ def resample_smry_dates( if freq in [FREQ_RAW, FREQ_FIRST, FREQ_LAST] or isinstance( freq, (dt.date, dt.datetime) ): - return _crop_datelist(eclsumsdates, freq, start_date, end_date) + return _crop_datelist(summarysdates, freq, start_date, end_date) # In case freq is an ISO-date(time)-string, interpret as such: try: @@ -244,8 +244,8 @@ def resample_smry_dates( pass # These are datetime.datetime, not datetime.date - start_smry = min(eclsumsdates) - end_smry = max(eclsumsdates) + start_smry = min(summarysdates) + end_smry = max(summarysdates) # Normalize start and end date according to frequency by extending the time range. # [1997-11-05, 2020-03-02] and monthly frequecy @@ -355,10 +355,10 @@ def df( column_keys = [column_keys] if isinstance(resdatafiles, Summary): - eclsum = resdatafiles + summary = resdatafiles else: try: - eclsum = resdatafiles.get_eclsum(include_restart=include_restart) + summary = resdatafiles.get_summary(include_restart=include_restart) except OSError: logger.warning("Error reading summary instance, returning empty dataframe") return pd.DataFrame() @@ -366,7 +366,7 @@ def df( time_index_arg: Optional[Union[List[dt.date], List[dt.datetime]]] if isinstance(time_index, str) and time_index == "raw": time_index_arg = resample_smry_dates( - eclsum.dates, + summary.dates, "raw", False, start_date, @@ -374,7 +374,7 @@ def df( ) elif isinstance(time_index, str): time_index_arg = resample_smry_dates( - eclsum.dates, + summary.dates, time_index, True, start_date, @@ -402,8 +402,8 @@ def df( time_index_str or "raw", ) - # dframe = eclsum.pandas_frame(time_index_arg, column_keys) - dframe = _libecl_eclsum_pandas_frame(eclsum, time_index_arg, column_keys) + # dframe = summary.pandas_frame(time_index_arg, column_keys) + dframe = _libecl_summary_pandas_frame(summary, time_index_arg, column_keys) logger.info( "Dataframe with smry data ready, %d columns and %d rows", @@ -415,7 +415,7 @@ def df( dframe = _merge_params(dframe, paramfile, resdatafiles) # Add metadata as an attribute the dataframe, using experimental Pandas features: - meta = smry_meta(eclsum) + meta = smry_meta(summary) # Slice meta to dataframe columns: dframe.attrs["meta"] = { column_key: meta[column_key] for column_key in dframe if column_key in meta @@ -592,20 +592,20 @@ def smry_meta(resdatafiles: ResdataFiles) -> Dict[str, Dict[str, Any]]: * wgname (str or None) """ if isinstance(resdatafiles, Summary): - eclsum = resdatafiles + summary = resdatafiles else: - eclsum = resdatafiles.get_eclsum() + summary = resdatafiles.get_summary() meta: Dict[str, Dict[str, Any]] = {} - for col in eclsum.keys(): + for col in summary.keys(): meta[col] = {} - meta[col]["unit"] = eclsum.unit(col) - meta[col]["is_total"] = eclsum.is_total(col) - meta[col]["is_rate"] = eclsum.is_rate(col) - meta[col]["is_historical"] = eclsum.smspec_node(col).is_historical() - meta[col]["keyword"] = eclsum.smspec_node(col).keyword - meta[col]["wgname"] = eclsum.smspec_node(col).wgname - num = eclsum.smspec_node(col).get_num() + meta[col]["unit"] = summary.unit(col) + meta[col]["is_total"] = summary.is_total(col) + meta[col]["is_rate"] = summary.is_rate(col) + meta[col]["is_historical"] = summary.smspec_node(col).is_historical() + meta[col]["keyword"] = summary.smspec_node(col).keyword + meta[col]["wgname"] = summary.smspec_node(col).wgname + num = summary.smspec_node(col).get_num() if num is not None: meta[col]["get_num"] = num return meta @@ -679,7 +679,7 @@ def _fix_dframe_for_libecl(dframe: pd.DataFrame) -> pd.DataFrame: return dframe -def df2ressum( +def df2summary( dframe: pd.DataFrame, casename: str = "SYNTHETIC", ) -> Summary: @@ -701,12 +701,12 @@ def df2ressum( raise ValueError(f"Do not use dots in casename {casename}") dframe = _fix_dframe_for_libecl(dframe) - return _libecl_eclsum_from_pandas(casename, dframe) + return _libecl_summary_from_pandas(casename, dframe) # return Summary.from_pandas(casename, dframe) -def _libecl_eclsum_pandas_frame( - eclsum: Summary, +def _libecl_summary_pandas_frame( + summary: Summary, time_index: Optional[Union[List[dt.date], List[dt.datetime]]] = None, column_keys: Optional[List[str]] = None, ) -> pd.DataFrame: @@ -717,24 +717,24 @@ def _libecl_eclsum_pandas_frame( https://github.com/equinor/ecl/issues/802 """ if column_keys is None: - keywords = SummaryKeyWordVector(eclsum, add_keywords=True) + keywords = SummaryKeyWordVector(summary, add_keywords=True) else: - keywords = SummaryKeyWordVector(eclsum) + keywords = SummaryKeyWordVector(summary) for key in column_keys: keywords.add_keywords(key) # pylint: disable=protected-access if time_index is None: - time_index = eclsum.dates # Changed from libecl + time_index = summary.dates # Changed from libecl data = np.zeros([len(time_index), len(keywords)]) Summary._init_pandas_frame( - eclsum, keywords, data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) + summary, keywords, data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) ) else: - time_points = eclsum._make_time_vector(time_index) + time_points = summary._make_time_vector(time_index) data = np.zeros([len(time_points), len(keywords)]) Summary._init_pandas_frame_interp( - eclsum, + summary, keywords, time_points, data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), @@ -754,7 +754,7 @@ def _libecl_eclsum_pandas_frame( return frame -def _libecl_eclsum_from_pandas( +def _libecl_summary_from_pandas( case: str, frame: pd.DataFrame, dims: Optional[List[int]] = None, @@ -882,7 +882,7 @@ def fill_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParser: def fill_reverse_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParser: - """Fill a parser for the operation: dataframe -> eclsum files""" + """Fill a parser for the operation: dataframe -> summary files""" parser.add_argument( "-o", @@ -938,10 +938,10 @@ def summary_reverse_main(args) -> None: # Summary.fwrite() can only write to current directory: cwd = os.getcwd() - eclsum = df2ressum(summary_df, eclbase) + summary = df2summary(summary_df, eclbase) try: os.chdir(outputdir) - Summary.fwrite(eclsum) + Summary.fwrite(summary) finally: os.chdir(cwd) diff --git a/tests/test_eclfiles.py b/tests/test_eclfiles.py index d9f219e69..bcbe9b147 100644 --- a/tests/test_eclfiles.py +++ b/tests/test_eclfiles.py @@ -56,7 +56,7 @@ def test_filedescriptors(): assert len(list(fd_dir.glob("*"))) == pre_fd_count assert resdatafiles._rstfile is None - resdatafiles.get_eclsum() + resdatafiles.get_summary() assert len(list(fd_dir.glob("*"))) == pre_fd_count + 1 resdatafiles.close() assert len(list(fd_dir.glob("*"))) == pre_fd_count diff --git a/tests/test_summary.py b/tests/test_summary.py index 80a0269f9..e22f3a182 100644 --- a/tests/test_summary.py +++ b/tests/test_summary.py @@ -18,7 +18,7 @@ _fix_dframe_for_libecl, date_range, df, - df2ressum, + df2summary, resample_smry_dates, smry_meta, ) @@ -411,9 +411,9 @@ def test_foreseeable_future(tmp_path): {"DATE": "2500-01-01", "FPR": 180}, ] ) - eclsum = df2ressum(src_dframe, casename="PLUGABANDON") + summary = df2summary(src_dframe, casename="PLUGABANDON") - dframe = summary.df(eclsum) + dframe = summary.df(summary) assert ( dframe.index == [ @@ -426,7 +426,7 @@ def test_foreseeable_future(tmp_path): ).all() # Try with time interpolation involved: - dframe = summary.df(eclsum, time_index="yearly") + dframe = summary.df(summary, time_index="yearly") assert len(dframe) == 501 assert dframe.index.max() == datetime.date(year=2500, month=1, day=1) @@ -437,8 +437,8 @@ def test_foreseeable_future(tmp_path): "FPR": range(70), } ) - eclsum = df2ressum(src_dframe, casename="PLUGABANDON") - dframe = summary.df(eclsum) + summary = df2summary(src_dframe, casename="PLUGABANDON") + dframe = summary.df(summary) # Still buggy: assert dframe.index[-1] == dt(2068, 12, 31, 23, 57, 52) @@ -449,8 +449,8 @@ def test_foreseeable_future(tmp_path): "FPR": range(69), } ) - eclsum = df2ressum(src_dframe, casename="PLUGABANDON") - dframe = summary.df(eclsum) + summary = df2summary(src_dframe, casename="PLUGABANDON") + dframe = summary.df(summary) # Works fine when stepping only 68 years: assert dframe.index[-1] == dt(2468, 1, 1, 0, 0, 0) @@ -635,7 +635,7 @@ def test_resample_smry_dates(): resdatafiles = ResdataFiles(REEK) - ecldates = resdatafiles.get_eclsum().dates + ecldates = resdatafiles.get_summary().dates assert isinstance(resample_smry_dates(ecldates), list) assert isinstance(resample_smry_dates(ecldates, freq="last"), list) @@ -812,7 +812,7 @@ def test_unique_datetime_retain_index_name(filepath): def test_smry_meta(): - """Test obtaining metadata dictionary for summary vectors from an EclSum object""" + """Test obtaining metadata dictionary for summary vectors from an summary object""" meta = smry_meta(ResdataFiles(REEK)) assert isinstance(meta, dict) @@ -850,9 +850,9 @@ def test_smry_meta_synthetic(): {"DATE": np.datetime64("2016-01-01"), "FOPT": 1000, "FOPR": 100}, ] ).set_index("DATE") - synt_meta = smry_meta(df2ressum(dframe)) + synt_meta = smry_meta(df2summary(dframe)) - # Dummy unit provided by EclSum: + # Dummy unit provided by summary: assert synt_meta["FOPT"]["unit"] == "UNIT" @@ -944,7 +944,7 @@ def test_smry_meta_synthetic(): ], ) def test_fix_dframe_for_libecl(dframe, expected_dframe): - """Test the dataframe preprocessor/validator for df2ressum works""" + """Test the dataframe preprocessor/validator for df2summary works""" pd.testing.assert_frame_equal( _fix_dframe_for_libecl(dframe), expected_dframe, check_index_type=False ) @@ -1019,19 +1019,19 @@ def test_fix_dframe_for_libecl(dframe, expected_dframe): ), ], ) -def test_df2ressum(dframe): - """Test that a dataframe can be converted to an EclSum object, and then read +def test_df2summary(dframe): + """Test that a dataframe can be converted to an summary object, and then read back again""" # Massage the dframe first so we can assert on equivalence after. dframe = _fix_dframe_for_libecl(dframe) - eclsum = df2ressum(dframe) + summary = df2summary(dframe) if dframe.empty: - assert eclsum is None + assert summary is None return - dframe_roundtrip = df(eclsum) + dframe_roundtrip = df(summary) pd.testing.assert_frame_equal( dframe.sort_index(axis=1), dframe_roundtrip.sort_index(axis=1), @@ -1039,7 +1039,7 @@ def test_df2ressum(dframe): ) -def test_df2ressum_datetimeindex(): +def test_df2summary_datetimeindex(): """Test that providing a dataframe with a datetimeindex also works""" dframe = pd.DataFrame( [ @@ -1049,21 +1049,21 @@ def test_df2ressum_datetimeindex(): dframe["DATE"] = pd.to_datetime(dframe["DATE"]) dframe.set_index("DATE") - roundtrip = df(df2ressum(dframe)) + roundtrip = df(df2summary(dframe)) assert isinstance(roundtrip.index, pd.DatetimeIndex) assert roundtrip["FOPR"].values == [100] assert roundtrip["FOPT"].values == [1000] def test_duplicated_summary_vectors(caplog): - """EclSum files on disk may contain repeated vectors + """summary files on disk may contain repeated vectors if the user has inserted a vector name twice in the SUMMARY section res2df.summary.df() should deduplicate this, and give a warning. """ - # res2df.df2ressum() is not able to mock such a UNSMRY file. + # res2df.df2summary() is not able to mock such a UNSMRY file. dupe_datafile = ( TESTDIR / "data" @@ -1176,13 +1176,13 @@ def test_res2df_errors(tmp_path): Path("FOO.DATA").write_text("RUNSPEC", encoding="utf8") assert str(ResdataFiles("FOO").get_ecldeck()).strip() == "RUNSPEC" with pytest.raises(OSError): - ResdataFiles("FOO").get_eclsum() + ResdataFiles("FOO").get_summary() # Getting a dataframe from bogus data should give empty data: assert df(ResdataFiles("FOO")).empty -def test_df2ressum_errors(): +def test_df2summary_errors(): """Test various error conditions, checking that the correct error message is emitted""" dframe = pd.DataFrame( @@ -1191,18 +1191,18 @@ def test_df2ressum_errors(): ] ) with pytest.raises(ValueError, match="casename foobar must be UPPER CASE"): - df2ressum(dframe, casename="foobar") + df2summary(dframe, casename="foobar") with pytest.raises(ValueError, match="Do not use dots in casename"): - df2ressum(dframe, casename="FOOBAR.UNSMRY") # .UNSMRY should not be included + df2summary(dframe, casename="FOOBAR.UNSMRY") # .UNSMRY should not be included # No date included: with pytest.raises(ValueError, match="dataframe must have a datetime index"): - df2ressum(pd.DataFrame([{"FOPT": 1000}])) + df2summary(pd.DataFrame([{"FOPT": 1000}])) @pytest.mark.integration def test_csv2res_summary(tmp_path, mocker): - """Check that we can call df2ressum through the csv2res command line + """Check that we can call df2summary through the csv2res command line utility""" dframe = pd.DataFrame( [