Skip to content

Commit

Permalink
Fix potcar checking (#972)
Browse files Browse the repository at this point in the history
* Fix potcar_spec to use PotcarSingle.header rather than PotcarSingle.symbol; add options to emmet.builders.utils.get_potcar_stats to generate reference stats by different methods; fix tests

* fix emmet.builders.utils test to use pytest.tmp_path fixture

* linting

* fix mypy error

* mypy linting
  • Loading branch information
esoteric-ephemera committed Mar 25, 2024
1 parent 560e8fa commit 93353a9
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 34 deletions.
71 changes: 61 additions & 10 deletions emmet-builders/emmet/builders/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from typing import Set, Union, Any
from __future__ import annotations
from typing import Set, Union, Any, Literal, Optional
import sys
import os
from pathlib import Path
from gzip import GzipFile
import orjson
import json
Expand Down Expand Up @@ -216,11 +218,45 @@ def __exit__(self, exc_type, exc_val, exc_tb):
sys.stdout = self._original_stdout


def get_potcar_stats():
def get_potcar_stats(
method: Literal["potcar", "pymatgen", "stored"] = "potcar",
path_to_stored_stats: Optional[Union[str, os.PathLike, Path]] = None,
) -> dict[str, Any]:
"""
Get the POTCAR stats used in MP calculations to validate POTCARs.
Args:
method : Literal[str : "potcar","pymatgen","stored"] = "potcar"
Method to generate the POTCAR stats:
- "potcar": regenerate stats from a user's POTCAR library.
- "pymatgen": regenerate stats from the stored pymatgen
summary stats dict. This has the downside of the possibility
of finding multiple matching POTCAR stats for older POTCAR
releases. As of 25 March, 2024, it does not appear that the
MP POTCARs have duplicates
- "stored": load a stored dict of POTCAR stats.
path_to_stored_stats : str, os.Pathlike, Path, or None
If a str, the path to the stored summary stats file.
If None, defaults to
`importlib.resources.file("emmet.builders.vasp") / "mp_potcar_stats.json.gz"`
Returns:
dict, of POTCAR summary stats.
"""
default_settings = EmmetBuildSettings()

stats: dict[str, dict] = {} # type: ignore

if method == "stored":
from monty.serialization import loadfn

if path_to_stored_stats is None:
from importlib.resources import files

path_to_stored_stats = str(
files("emmet.builders.vasp") / "mp_potcar_stats.json.gz"
)
return loadfn(path_to_stored_stats)

for (
calc_type,
input_set,
Expand All @@ -231,14 +267,29 @@ def get_potcar_stats():
functional = _input._config_dict["POTCAR_FUNCTIONAL"]

for potcar_symbol in _input.CONFIG["POTCAR"].values():
potcar = PotcarSingle.from_symbol_and_functional(
symbol=potcar_symbol, functional=functional
)
summary_stats = potcar._summary_stats.copy()
# fallback method for validation - use header hash and symbol
# note that the potcar_spec assigns PotcarSingle.symbol to "titel"
summary_stats["titel"] = potcar.TITEL
summary_stats["hash"] = potcar.md5_header_hash
if method == "potcar":
potcar = PotcarSingle.from_symbol_and_functional(
symbol=potcar_symbol, functional=functional
)
summary_stats = potcar._summary_stats.copy()
# fallback method for validation - use header hash and symbol
# note that the potcar_spec assigns PotcarSingle.symbol to "titel"
# whereas the ***correct*** field is `header`
summary_stats["titel"] = potcar.header
summary_stats["hash"] = potcar.md5_header_hash
summary_stats = [summary_stats]

elif method == "pymatgen":
summary_stats = []
for _, entries in PotcarSingle._potcar_summary_stats[
functional
].items():
summary_stats += [
{**entry, "titel": None, "hash": None}
for entry in entries
if entry["symbol"] == potcar_symbol
]

stats[calc_type].update({potcar_symbol: summary_stats})

return stats
Binary file not shown.
1 change: 1 addition & 0 deletions emmet-builders/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
long_description_content_type="text/markdown",
url="https://github.com/materialsproject/emmet",
packages=find_namespace_packages(include=["emmet.*"]),
package_data={"emmet.builders": ["*.json", "*.json.gz"]},
install_requires=[
"emmet-core[all]",
"emmet-core[ml]",
Expand Down
29 changes: 23 additions & 6 deletions emmet-builders/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
)
from pymatgen.analysis.diffusion.neb.full_path_mapper import MigrationGraph
from numpy.testing import assert_almost_equal
from monty.serialization import loadfn
from monty.serialization import loadfn, dumpfn
from emmet.core.settings import EmmetSettings

import pytest


def test_maximal_spanning_non_intersecting_subsets():
assert maximal_spanning_non_intersecting_subsets([{"A"}, {"A", "B"}]) == {
Expand Down Expand Up @@ -59,11 +61,12 @@ def test_get_hop_cutoff(test_dir):
assert len(check_mg.unique_hops) == 6


def test_get_potcar_stats():
@pytest.mark.parametrize("method", ("potcar", "pymatgen", "stored"))
def test_get_potcar_stats(method: str, tmp_path):
calc_type = EmmetSettings().VASP_DEFAULT_INPUT_SETS

try:
potcar_stats = get_potcar_stats()
potcar_stats = get_potcar_stats(method=method)
except Exception as exc:
if "No POTCAR for" in str(exc):
# No Potcar library available, skip test
Expand All @@ -78,7 +81,21 @@ def test_get_potcar_stats():
# ensure that each entry has needed fields for both
# legacy and modern potcar validation
assert all(
set(potcar_stats[calc_type][symb])
== set(["hash", "keywords", "titel", "stats"])
for symb in potcar_stats[calc_type]
[
set(entry) == set(["hash", "keywords", "titel", "stats"])
for entry in entries
]
for entries in potcar_stats[calc_type].values()
)

if method == "stored":
new_stats_path = tmp_path / "_temp_potcar_stats.json"
dumpfn(potcar_stats, new_stats_path)

new_potcar_stats = get_potcar_stats(
method="stored", path_to_stored_stats=new_stats_path
)
assert all(
potcar_stats[calc_type] == new_potcar_stats[calc_type]
for calc_type in potcar_stats
)
2 changes: 1 addition & 1 deletion emmet-cli/emmet/cli/calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

_UNPACK_INT = struct.Struct("<i").unpack
logger = logging.getLogger("emmet")
canonical_structures = defaultdict(dict)
canonical_structures = defaultdict(dict) # type: ignore[var-annotated]


def get_format(fname):
Expand Down
49 changes: 32 additions & 17 deletions emmet-core/emmet/core/vasp/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,28 +356,43 @@ def _potcar_stats_check(task_doc, potcar_stats: dict):
break

if use_legacy_hash_check:
all_match = all(
entry[key] == ref_summ_stats[key]
for key in (
"hash",
"titel",
all_match = any(
all(
entry[key] == ref_stat[key]
for key in (
"hash",
"titel",
)
)
for ref_stat in ref_summ_stats
)

else:
all_match = all(
set(ref_summ_stats["keywords"][key])
== set(entry["summary_stats"]["keywords"][key])
for key in ["header", "data"]
) and all(
abs(
ref_summ_stats["stats"][key][stat]
- entry["summary_stats"]["stats"][key][stat]
all_match = False
for ref_stat in ref_summ_stats:
key_match = all(
set(ref_stat["keywords"][key])
== set(entry["summary_stats"]["keywords"][key])
for key in ["header", "data"]
)
< data_tol
for stat in ["MEAN", "ABSMEAN", "VAR", "MIN", "MAX"]
for key in ["header", "data"]
)

data_match = False
if key_match:
data_match = all(
abs(
ref_stat["stats"][key][stat]
- entry["summary_stats"]["stats"][key][stat]
)
< data_tol
for stat in ["MEAN", "ABSMEAN", "VAR", "MIN", "MAX"]
for key in ["header", "data"]
)
all_match = key_match and data_match

if all_match:
# Found at least one match to reference POTCAR summary stats,
# that suffices for the check
break

if not all_match:
break
Expand Down

0 comments on commit 93353a9

Please sign in to comment.