diff --git a/emmet-builders/emmet/builders/utils.py b/emmet-builders/emmet/builders/utils.py index 7a417fe983..05f4915b7a 100644 --- a/emmet-builders/emmet/builders/utils.py +++ b/emmet-builders/emmet/builders/utils.py @@ -1,6 +1,8 @@ -from typing import Set, Union, Any +from __future__ import annotations +from typing import Set, Union, Any, Literal, Optional import sys import os +from pathlib import Path from gzip import GzipFile import orjson import json @@ -216,11 +218,45 @@ def __exit__(self, exc_type, exc_val, exc_tb): sys.stdout = self._original_stdout -def get_potcar_stats(): +def get_potcar_stats( + method: Literal["potcar", "pymatgen", "stored"] = "potcar", + path_to_stored_stats: Optional[Union[str, os.PathLike, Path]] = None, +) -> dict[str, Any]: + """ + Get the POTCAR stats used in MP calculations to validate POTCARs. + + Args: + method : Literal[str : "potcar","pymatgen","stored"] = "potcar" + Method to generate the POTCAR stats: + - "potcar": regenerate stats from a user's POTCAR library. + - "pymatgen": regenerate stats from the stored pymatgen + summary stats dict. This has the downside of the possibility + of finding multiple matching POTCAR stats for older POTCAR + releases. As of 25 March, 2024, it does not appear that the + MP POTCARs have duplicates + - "stored": load a stored dict of POTCAR stats. + path_to_stored_stats : str, os.Pathlike, Path, or None + If a str, the path to the stored summary stats file. + If None, defaults to + `importlib.resources.file("emmet.builders.vasp") / "mp_potcar_stats.json.gz"` + Returns: + dict, of POTCAR summary stats. + """ default_settings = EmmetBuildSettings() stats: dict[str, dict] = {} # type: ignore + if method == "stored": + from monty.serialization import loadfn + + if path_to_stored_stats is None: + from importlib.resources import files + + path_to_stored_stats = str( + files("emmet.builders.vasp") / "mp_potcar_stats.json.gz" + ) + return loadfn(path_to_stored_stats) + for ( calc_type, input_set, @@ -231,14 +267,29 @@ def get_potcar_stats(): functional = _input._config_dict["POTCAR_FUNCTIONAL"] for potcar_symbol in _input.CONFIG["POTCAR"].values(): - potcar = PotcarSingle.from_symbol_and_functional( - symbol=potcar_symbol, functional=functional - ) - summary_stats = potcar._summary_stats.copy() - # fallback method for validation - use header hash and symbol - # note that the potcar_spec assigns PotcarSingle.symbol to "titel" - summary_stats["titel"] = potcar.TITEL - summary_stats["hash"] = potcar.md5_header_hash + if method == "potcar": + potcar = PotcarSingle.from_symbol_and_functional( + symbol=potcar_symbol, functional=functional + ) + summary_stats = potcar._summary_stats.copy() + # fallback method for validation - use header hash and symbol + # note that the potcar_spec assigns PotcarSingle.symbol to "titel" + # whereas the ***correct*** field is `header` + summary_stats["titel"] = potcar.header + summary_stats["hash"] = potcar.md5_header_hash + summary_stats = [summary_stats] + + elif method == "pymatgen": + summary_stats = [] + for _, entries in PotcarSingle._potcar_summary_stats[ + functional + ].items(): + summary_stats += [ + {**entry, "titel": None, "hash": None} + for entry in entries + if entry["symbol"] == potcar_symbol + ] + stats[calc_type].update({potcar_symbol: summary_stats}) return stats diff --git a/emmet-builders/emmet/builders/vasp/mp_potcar_stats.json.gz b/emmet-builders/emmet/builders/vasp/mp_potcar_stats.json.gz new file mode 100644 index 0000000000..9fd490246b Binary files /dev/null and b/emmet-builders/emmet/builders/vasp/mp_potcar_stats.json.gz differ diff --git a/emmet-builders/setup.py b/emmet-builders/setup.py index 58b119cdf5..b764156794 100644 --- a/emmet-builders/setup.py +++ b/emmet-builders/setup.py @@ -11,6 +11,7 @@ long_description_content_type="text/markdown", url="https://github.com/materialsproject/emmet", packages=find_namespace_packages(include=["emmet.*"]), + package_data={"emmet.builders": ["*.json", "*.json.gz"]}, install_requires=[ "emmet-core[all]", "emmet-core[ml]", diff --git a/emmet-builders/tests/test_utils.py b/emmet-builders/tests/test_utils.py index 6fd1e12a19..82d509e100 100644 --- a/emmet-builders/tests/test_utils.py +++ b/emmet-builders/tests/test_utils.py @@ -6,9 +6,11 @@ ) from pymatgen.analysis.diffusion.neb.full_path_mapper import MigrationGraph from numpy.testing import assert_almost_equal -from monty.serialization import loadfn +from monty.serialization import loadfn, dumpfn from emmet.core.settings import EmmetSettings +import pytest + def test_maximal_spanning_non_intersecting_subsets(): assert maximal_spanning_non_intersecting_subsets([{"A"}, {"A", "B"}]) == { @@ -59,11 +61,12 @@ def test_get_hop_cutoff(test_dir): assert len(check_mg.unique_hops) == 6 -def test_get_potcar_stats(): +@pytest.mark.parametrize("method", ("potcar", "pymatgen", "stored")) +def test_get_potcar_stats(method: str, tmp_path): calc_type = EmmetSettings().VASP_DEFAULT_INPUT_SETS try: - potcar_stats = get_potcar_stats() + potcar_stats = get_potcar_stats(method=method) except Exception as exc: if "No POTCAR for" in str(exc): # No Potcar library available, skip test @@ -78,7 +81,21 @@ def test_get_potcar_stats(): # ensure that each entry has needed fields for both # legacy and modern potcar validation assert all( - set(potcar_stats[calc_type][symb]) - == set(["hash", "keywords", "titel", "stats"]) - for symb in potcar_stats[calc_type] + [ + set(entry) == set(["hash", "keywords", "titel", "stats"]) + for entry in entries + ] + for entries in potcar_stats[calc_type].values() + ) + + if method == "stored": + new_stats_path = tmp_path / "_temp_potcar_stats.json" + dumpfn(potcar_stats, new_stats_path) + + new_potcar_stats = get_potcar_stats( + method="stored", path_to_stored_stats=new_stats_path + ) + assert all( + potcar_stats[calc_type] == new_potcar_stats[calc_type] + for calc_type in potcar_stats ) diff --git a/emmet-cli/emmet/cli/calc.py b/emmet-cli/emmet/cli/calc.py index f0d51398e1..43d008596f 100644 --- a/emmet-cli/emmet/cli/calc.py +++ b/emmet-cli/emmet/cli/calc.py @@ -26,7 +26,7 @@ _UNPACK_INT = struct.Struct("