Skip to content

Commit

Permalink
Changes for tackling the LevelOfTheory errors (#970)
Browse files Browse the repository at this point in the history
* Adding qc_tasks and calculation.py

* big_commit for implementing the drone fucntionality of atomate(qchem) in emmet

* ran pre-commit locally and some minor changes

* writing_unit_tests_for_sp_and_opt

* ran pre-commit on test files

* corrected Union type error

* added numpy custom validators

* datetime import problem

* allowing arbitrary types

* further tests

* further tests

* change in io file convention

* checking qcinput

* checking qcinput

* Incorporating all the pydantic 2 changes

* changes in lot, task_type, calc_type

* removing circular dependency

* calc_doc issue

* Make more fields optional in accordance with pydantic 2

* Corrected the Input Doc problems

* CalcInput smx attribute issue

* correcting input to qcinput and qcoutput

* changes in the Optimization test doc for inputs

* molecule -> initial_molecule

* changes to the sp valid task_schema

* test_output breakdowns

* test_output breakdowns OutputDoc

* test_output breakdowns OutputDoc

* test_output breakdowns OutputDoc

* test_output breakdowns OutputDoc

* test_output breakdowns OutputDoc

* test_output breakdowns OutputDoc

* Changes to the TaskDoc

* Changes to the TaskDoc np.array

* Changes to the conftest

* Changes to the conftest arrays

* Changes to the conftest arrays

* Changes to test code

* Changes to test code

* Changes to test code

* Changes to test code

* Changes to test code

* Changes to test code

* Changes to test code

* Changes to test code

* Changes to test code

* Changes to test code

* fixing bug where solvent field was being accessed as a dict

* forgot pre-commit

* Changed the default args for initial_molecule and optimized_molecule to be Molecule not dict

* deleted the superfluous FW files

* making the TaskDoc.from_directory functionality for generalized to handle qchem calculaion directories not generated through atomate

* resolved the bugs with enthalpy, entropy and parsing frequencies

* added the validate_lot flag to allow users flexibility in TaskDoc creation

* corrected the str errors in level_of_theory

* corrected the downstream errors introduced due to the validate_lot flag
  • Loading branch information
rdguha1995 committed Mar 20, 2024
1 parent d666127 commit 98b75bf
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 36 deletions.
23 changes: 20 additions & 3 deletions emmet-core/emmet/core/qc_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,17 +186,26 @@ def from_qchem_calc_doc(cls, calc_doc: Calculation) -> "InputDoc":
InputDoc
A summary of the input molecule and corresponding calculation parameters
"""
try:
lot_val = calc_doc.level_of_theory.value
except AttributeError:
lot_val = calc_doc.level_of_theory

try:
ct_val = calc_doc.calc_type.value
except AttributeError:
ct_val = calc_doc.calc_type
# TODO : modify this to get the different variables from the task doc.
return cls(
initial_molecule=calc_doc.input.initial_molecule,
rem=calc_doc.input.rem,
level_of_theory=calc_doc.level_of_theory.value,
level_of_theory=lot_val,
task_type=calc_doc.task_type.value,
tags=calc_doc.input.tags,
solvation_lot_info=calc_doc.solvation_lot_info,
# special_run_type = calc_doc.input.special_run_type,
# smiles = calc_doc.input.smiles,
calc_type=calc_doc.calc_type.value,
calc_type=ct_val,
)


Expand Down Expand Up @@ -281,6 +290,7 @@ class TaskDoc(MoleculeMetadata):
def from_directory(
cls: Type[_T],
dir_name: Union[Path, str],
validate_lot: bool = True,
store_additional_json: bool = True,
additional_fields: Dict[str, Any] = None,
**qchem_calculation_kwargs,
Expand All @@ -292,6 +302,9 @@ def from_directory(
----------
dir_name
The path to the folder containing the calculation outputs.
validate_lot
Flag for matching the basis and functional with the list of functionals consistent with MPCules.
Defaults to True. Change to False if you want to create a TaskDoc with other basis sets and functionals.
store_additional_json
Whether to store additional json files in the calculation directory.
additional_fields
Expand Down Expand Up @@ -322,7 +335,11 @@ def from_directory(
continue
else:
calc_doc = Calculation.from_qchem_files(
dir_name, task_name, **files, **qchem_calculation_kwargs
dir_name,
task_name,
**files,
**qchem_calculation_kwargs,
validate_lot=validate_lot,
)
calcs_reversed.append(calc_doc)
# all_qchem_objects.append(qchem_objects)
Expand Down
99 changes: 68 additions & 31 deletions emmet-core/emmet/core/qchem/calculation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Any, Dict, List, Optional, Union

import numpy as np
import warnings
from pydantic import field_validator, BaseModel, Field, ConfigDict
from datetime import datetime
from pymatgen.io.qchem.inputs import QCInput
Expand Down Expand Up @@ -316,7 +317,7 @@ class Calculation(BaseModel):
None,
description="Paths (relative to dir_name) of the QChem output files associated with this calculation",
)
level_of_theory: LevelOfTheory = Field(
level_of_theory: Union[LevelOfTheory, str] = Field(
None,
description="Levels of theory used for the QChem calculation: For instance, B97-D/6-31g*",
)
Expand All @@ -328,7 +329,7 @@ class Calculation(BaseModel):
None,
description="Calculation task type like Single Point, Geometry Optimization. Frequency...",
)
calc_type: CalcType = Field(
calc_type: Union[CalcType, str] = Field(
None,
description="Combination dict of LOT + TaskType: B97-D/6-31g*/VACUUM Geometry Optimization",
)
Expand All @@ -340,6 +341,7 @@ def from_qchem_files(
task_name: str,
qcinput_file: Union[Path, str],
qcoutput_file: Union[Path, str],
validate_lot: bool = True,
store_energy_trajectory: bool = False,
qcinput_kwargs: Optional[Dict] = None,
qcoutput_kwargs: Optional[Dict] = None,
Expand Down Expand Up @@ -410,10 +412,10 @@ def from_qchem_files(
else {k2: Path(v2) for k2, v2 in v.items()}
for k, v in output_file_paths.items()
},
level_of_theory=level_of_theory(input_doc),
solvation_lot_info=lot_solvent_string(input_doc),
level_of_theory=level_of_theory(input_doc, validate_lot=validate_lot),
solvation_lot_info=lot_solvent_string(input_doc, validate_lot=validate_lot),
task_type=task_type(input_doc),
calc_type=calc_type(input_doc),
calc_type=calc_type(input_doc, validate_lot=validate_lot),
)


Expand Down Expand Up @@ -501,7 +503,9 @@ def _find_qchem_files(
return task_files


def level_of_theory(parameters: CalculationInput) -> LevelOfTheory:
def level_of_theory(
parameters: CalculationInput, validate_lot: bool = True
) -> LevelOfTheory:
"""
Returns the level of theory for a calculation,
Expand Down Expand Up @@ -532,19 +536,8 @@ def level_of_theory(parameters: CalculationInput) -> LevelOfTheory:

basis_lower = basis_raw.lower()

functional = [f for f in FUNCTIONALS if f.lower() == funct_lower]
if not functional:
raise ValueError(f"Unexpected functional {funct_lower}!")

functional = functional[0]

basis = [b for b in BASIS_SETS if b.lower() == basis_lower]
if not basis:
raise ValueError(f"Unexpected basis set {basis_lower}!")

basis = basis[0]

solvent_method = parameters.rem.get("solvent_method", "").lower()

if solvent_method == "":
solvation = "VACUUM"
elif solvent_method in ["pcm", "cosmo"]:
Expand All @@ -560,12 +553,44 @@ def level_of_theory(parameters: CalculationInput) -> LevelOfTheory:
else:
raise ValueError(f"Unexpected implicit solvent method {solvent_method}!")

lot = f"{functional}/{basis}/{solvation}"
if validate_lot:
functional = [f for f in FUNCTIONALS if f.lower() == funct_lower]
if not functional:
raise ValueError(f"Unexpected functional {funct_lower}!")

return LevelOfTheory(lot)
functional = functional[0]

basis = [b for b in BASIS_SETS if b.lower() == basis_lower]
if not basis:
raise ValueError(f"Unexpected basis set {basis_lower}!")

basis = basis[0]

lot = f"{functional}/{basis}/{solvation}"

return LevelOfTheory(lot)
else:
warnings.warn(
"User has turned the validate flag off."
"This can have downstream effects if the chosen functional and basis "
"is not in the available sets of MP employed functionals and the user"
"wants to include the TaskDoc in the MP infrastructure."
"Users should ignore this warning if their objective is just to create TaskDocs",
UserWarning,
stacklevel=2,
)
functional = funct_lower
basis = basis_lower
lot = f"{functional}/{basis}/{solvation}"

def solvent(parameters: CalculationInput, custom_smd: Optional[str] = None) -> str:
return lot


def solvent(
parameters: CalculationInput,
validate_lot: bool = True,
custom_smd: Optional[str] = None,
) -> str:
"""
Returns the solvent used for this calculation.
Expand All @@ -574,9 +599,11 @@ def solvent(parameters: CalculationInput, custom_smd: Optional[str] = None) -> s
custom_smd: (Optional) string representing SMD parameters for a
non-standard solvent
"""

lot = level_of_theory(parameters)
solvation = lot.value.split("/")[-1]
lot = level_of_theory(parameters, validate_lot=validate_lot)
if validate_lot:
solvation = lot.value.split("/")[-1]
else:
solvation = lot.split("/")[-1]

if solvation == "PCM":
# dielectric = float(parameters.get("solvent", {}).get("dielectric", 78.39))
Expand Down Expand Up @@ -631,7 +658,9 @@ def solvent(parameters: CalculationInput, custom_smd: Optional[str] = None) -> s


def lot_solvent_string(
parameters: CalculationInput, custom_smd: Optional[str] = None
parameters: CalculationInput,
validate_lot: bool = True,
custom_smd: Optional[str] = None,
) -> str:
"""
Returns a string representation of the level of theory and solvent used for this calculation.
Expand All @@ -641,9 +670,11 @@ def lot_solvent_string(
custom_smd: (Optional) string representing SMD parameters for a
non-standard solvent
"""

lot = level_of_theory(parameters).value
solv = solvent(parameters, custom_smd=custom_smd)
if validate_lot:
lot = level_of_theory(parameters, validate_lot=validate_lot).value
else:
lot = level_of_theory(parameters, validate_lot=validate_lot)
solv = solvent(parameters, custom_smd=custom_smd, validate_lot=validate_lot)
return f"{lot}({solv})"


Expand All @@ -670,14 +701,20 @@ def task_type(


def calc_type(
parameters: CalculationInput, special_run_type: Optional[str] = None
parameters: CalculationInput,
validate_lot: bool = True,
special_run_type: Optional[str] = None,
) -> CalcType:
"""
Determines the calc type
Args:
parameters: CalculationInput parameters
"""
rt = level_of_theory(parameters).value
tt = task_type(parameters, special_run_type=special_run_type).value
return CalcType(f"{rt} {tt}")
if validate_lot:
rt = level_of_theory(parameters, validate_lot=validate_lot).value
return CalcType(f"{rt} {tt}")
else:
rt = level_of_theory(parameters, validate_lot=validate_lot)
return str(f"{rt} {tt}")
4 changes: 2 additions & 2 deletions emmet-core/tests/conftest_qchem.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ class SinglePointTest(SchemaTestData):
"level_of_theory": "wB97M-V/def2-QZVPPD/SMD",
"task_type": "Single Point",
"calc_type": "wB97M-V/def2-QZVPPD/SMD Single Point",
"solvation_lot_nfo": "wB97M-V/def2-QZVPPD/SMD(SOLVENT=WATER)",
"solvation_lot_info": "wB97M-V/def2-QZVPPD/SMD(SOLVENT=WATER)",
},
"output": {
"mulliken": [np.array([-0.713178, 0.357278, 0.3559])],
Expand Down Expand Up @@ -301,7 +301,7 @@ class OptimizationTest(SchemaTestData):
"level_of_theory": "wB97M-V/def2-SVPD/SMD",
"task_type": "Geometry Optimization",
"calc_type": "wB97M-V/def2-SVPD/SMD Geometry Optimization",
"solvation_lot_nfo": "wB97M-V/def2-SVPD/SMD(SOLVENT=WATER)",
"solvation_lot_info": "wB97M-V/def2-SVPD/SMD(SOLVENT=WATER)",
},
"output": {
"initial_molecule": {
Expand Down
30 changes: 30 additions & 0 deletions emmet-core/tests/test_qc_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,33 @@ def test_task_doc(test_dir, object_name):
# Test that additional_fields works
test_doc = TaskDoc.from_directory(dir_name, additional_fields={"foo": "bar"})
assert test_doc.model_dump()["additional_fields"] == {"foo": "bar"}


@pytest.mark.parametrize(
"object_name",
[
pytest.param("SinglePointTest", id="SinglePointTest"),
pytest.param("OptimizationTest", id="OptimizationTest"),
],
)
def test_task_doc_val_flag(test_dir, object_name):
from monty.json import MontyDecoder, jsanitize
from emmet.core.qc_tasks import TaskDoc

test_object = get_test_object(object_name)
dir_name = test_dir / "qchem" / test_object.folder
print(f"The test object is {test_object.task_doc}")
test_doc = TaskDoc.from_directory(dir_name, validate_lot=False)
assert_schemas_equal(test_doc, test_object.task_doc)

# test document can be jsanitized
d = jsanitize(test_doc, strict=True, enum_values=True, allow_bson=True)

# and decoded
MontyDecoder().process_decoded(d)

# Test that additional_fields works
test_doc = TaskDoc.from_directory(
dir_name, validate_lot=False, additional_fields={"foo": "bar"}
)
assert test_doc.model_dump()["additional_fields"] == {"foo": "bar"}

0 comments on commit 98b75bf

Please sign in to comment.