Changes for tackling the LevelOfTheory errors (#970)

* Adding qc_tasks and calculation.py * big_commit for implementing the drone fucntionality of atomate(qchem) in emmet * ran pre-commit locally and some minor changes * writing_unit_tests_for_sp_and_opt * ran pre-commit on test files * corrected Union type error * added numpy custom validators * datetime import problem * allowing arbitrary types * further tests * further tests * change in io file convention * checking qcinput * checking qcinput * Incorporating all the pydantic 2 changes * changes in lot, task_type, calc_type * removing circular dependency * calc_doc issue * Make more fields optional in accordance with pydantic 2 * Corrected the Input Doc problems * CalcInput smx attribute issue * correcting input to qcinput and qcoutput * changes in the Optimization test doc for inputs * molecule -> initial_molecule * changes to the sp valid task_schema * test_output breakdowns * test_output breakdowns OutputDoc * test_output breakdowns OutputDoc * test_output breakdowns OutputDoc * test_output breakdowns OutputDoc * test_output breakdowns OutputDoc * test_output breakdowns OutputDoc * Changes to the TaskDoc * Changes to the TaskDoc np.array * Changes to the conftest * Changes to the conftest arrays * Changes to the conftest arrays * Changes to test code * Changes to test code * Changes to test code * Changes to test code * Changes to test code * Changes to test code * Changes to test code * Changes to test code * Changes to test code * Changes to test code * fixing bug where solvent field was being accessed as a dict * forgot pre-commit * Changed the default args for initial_molecule and optimized_molecule to be Molecule not dict * deleted the superfluous FW files * making the TaskDoc.from_directory functionality for generalized to handle qchem calculaion directories not generated through atomate * resolved the bugs with enthalpy, entropy and parsing frequencies * added the validate_lot flag to allow users flexibility in TaskDoc creation * corrected the str errors in level_of_theory * corrected the downstream errors introduced due to the validate_lot flag
materialsproject · Mar 20, 2024 · 98b75bf · 98b75bf
1 parent d666127
commit 98b75bf
Show file tree

Hide file tree

Showing 4 changed files with 120 additions and 36 deletions.
diff --git a/emmet-core/emmet/core/qc_tasks.py b/emmet-core/emmet/core/qc_tasks.py
@@ -186,17 +186,26 @@ def from_qchem_calc_doc(cls, calc_doc: Calculation) -> "InputDoc":
         InputDoc
             A summary of the input molecule and corresponding calculation parameters
         """
+        try:
+            lot_val = calc_doc.level_of_theory.value
+        except AttributeError:
+            lot_val = calc_doc.level_of_theory
+
+        try:
+            ct_val = calc_doc.calc_type.value
+        except AttributeError:
+            ct_val = calc_doc.calc_type
         # TODO : modify this to get the different variables from the task doc.
         return cls(
             initial_molecule=calc_doc.input.initial_molecule,
             rem=calc_doc.input.rem,
-            level_of_theory=calc_doc.level_of_theory.value,
+            level_of_theory=lot_val,
             task_type=calc_doc.task_type.value,
             tags=calc_doc.input.tags,
             solvation_lot_info=calc_doc.solvation_lot_info,
             # special_run_type = calc_doc.input.special_run_type,
             # smiles = calc_doc.input.smiles,
-            calc_type=calc_doc.calc_type.value,
+            calc_type=ct_val,
         )
 
 
@@ -281,6 +290,7 @@ class TaskDoc(MoleculeMetadata):
     def from_directory(
         cls: Type[_T],
         dir_name: Union[Path, str],
+        validate_lot: bool = True,
         store_additional_json: bool = True,
         additional_fields: Dict[str, Any] = None,
         **qchem_calculation_kwargs,
@@ -292,6 +302,9 @@ def from_directory(
         ----------
         dir_name
             The path to the folder containing the calculation outputs.
+        validate_lot
+            Flag for matching the basis and functional with the list of functionals consistent with MPCules.
+            Defaults to True. Change to False if you want to create a TaskDoc with other basis sets and functionals.
         store_additional_json
             Whether to store additional json files in the calculation directory.
         additional_fields
@@ -322,7 +335,11 @@ def from_directory(
                 continue
             else:
                 calc_doc = Calculation.from_qchem_files(
-                    dir_name, task_name, **files, **qchem_calculation_kwargs
+                    dir_name,
+                    task_name,
+                    **files,
+                    **qchem_calculation_kwargs,
+                    validate_lot=validate_lot,
                 )
                 calcs_reversed.append(calc_doc)
                 # all_qchem_objects.append(qchem_objects)

diff --git a/emmet-core/emmet/core/qchem/calculation.py b/emmet-core/emmet/core/qchem/calculation.py
@@ -7,6 +7,7 @@
 from typing import Any, Dict, List, Optional, Union
 
 import numpy as np
+import warnings
 from pydantic import field_validator, BaseModel, Field, ConfigDict
 from datetime import datetime
 from pymatgen.io.qchem.inputs import QCInput
@@ -316,7 +317,7 @@ class Calculation(BaseModel):
         None,
         description="Paths (relative to dir_name) of the QChem output files associated with this calculation",
     )
-    level_of_theory: LevelOfTheory = Field(
+    level_of_theory: Union[LevelOfTheory, str] = Field(
         None,
         description="Levels of theory used for the QChem calculation: For instance, B97-D/6-31g*",
     )
@@ -328,7 +329,7 @@ class Calculation(BaseModel):
         None,
         description="Calculation task type like Single Point, Geometry Optimization. Frequency...",
     )
-    calc_type: CalcType = Field(
+    calc_type: Union[CalcType, str] = Field(
         None,
         description="Combination dict of LOT + TaskType: B97-D/6-31g*/VACUUM Geometry Optimization",
     )
@@ -340,6 +341,7 @@ def from_qchem_files(
         task_name: str,
         qcinput_file: Union[Path, str],
         qcoutput_file: Union[Path, str],
+        validate_lot: bool = True,
         store_energy_trajectory: bool = False,
         qcinput_kwargs: Optional[Dict] = None,
         qcoutput_kwargs: Optional[Dict] = None,
@@ -410,10 +412,10 @@ def from_qchem_files(
                 else {k2: Path(v2) for k2, v2 in v.items()}
                 for k, v in output_file_paths.items()
             },
-            level_of_theory=level_of_theory(input_doc),
-            solvation_lot_info=lot_solvent_string(input_doc),
+            level_of_theory=level_of_theory(input_doc, validate_lot=validate_lot),
+            solvation_lot_info=lot_solvent_string(input_doc, validate_lot=validate_lot),
             task_type=task_type(input_doc),
-            calc_type=calc_type(input_doc),
+            calc_type=calc_type(input_doc, validate_lot=validate_lot),
         )
 
 
@@ -501,7 +503,9 @@ def _find_qchem_files(
     return task_files
 
 
-def level_of_theory(parameters: CalculationInput) -> LevelOfTheory:
+def level_of_theory(
+    parameters: CalculationInput, validate_lot: bool = True
+) -> LevelOfTheory:
     """
 
     Returns the level of theory for a calculation,
@@ -532,19 +536,8 @@ def level_of_theory(parameters: CalculationInput) -> LevelOfTheory:
 
     basis_lower = basis_raw.lower()
 
-    functional = [f for f in FUNCTIONALS if f.lower() == funct_lower]
-    if not functional:
-        raise ValueError(f"Unexpected functional {funct_lower}!")
-
-    functional = functional[0]
-
-    basis = [b for b in BASIS_SETS if b.lower() == basis_lower]
-    if not basis:
-        raise ValueError(f"Unexpected basis set {basis_lower}!")
-
-    basis = basis[0]
-
     solvent_method = parameters.rem.get("solvent_method", "").lower()
+
     if solvent_method == "":
         solvation = "VACUUM"
     elif solvent_method in ["pcm", "cosmo"]:
@@ -560,12 +553,44 @@ def level_of_theory(parameters: CalculationInput) -> LevelOfTheory:
     else:
         raise ValueError(f"Unexpected implicit solvent method {solvent_method}!")
 
-    lot = f"{functional}/{basis}/{solvation}"
+    if validate_lot:
+        functional = [f for f in FUNCTIONALS if f.lower() == funct_lower]
+        if not functional:
+            raise ValueError(f"Unexpected functional {funct_lower}!")
 
-    return LevelOfTheory(lot)
+        functional = functional[0]
 
+        basis = [b for b in BASIS_SETS if b.lower() == basis_lower]
+        if not basis:
+            raise ValueError(f"Unexpected basis set {basis_lower}!")
+
+        basis = basis[0]
+
+        lot = f"{functional}/{basis}/{solvation}"
+
+        return LevelOfTheory(lot)
+    else:
+        warnings.warn(
+            "User has turned the validate flag off."
+            "This can have downstream effects if the chosen functional and basis "
+            "is not in the available sets of MP employed functionals and the user"
+            "wants to include the TaskDoc in the MP infrastructure."
+            "Users should ignore this warning if their objective is just to create TaskDocs",
+            UserWarning,
+            stacklevel=2,
+        )
+        functional = funct_lower
+        basis = basis_lower
+        lot = f"{functional}/{basis}/{solvation}"
 
-def solvent(parameters: CalculationInput, custom_smd: Optional[str] = None) -> str:
+        return lot
+
+
+def solvent(
+    parameters: CalculationInput,
+    validate_lot: bool = True,
+    custom_smd: Optional[str] = None,
+) -> str:
     """
     Returns the solvent used for this calculation.
 
@@ -574,9 +599,11 @@ def solvent(parameters: CalculationInput, custom_smd: Optional[str] = None) -> s
         custom_smd: (Optional) string representing SMD parameters for a
         non-standard solvent
     """
-
-    lot = level_of_theory(parameters)
-    solvation = lot.value.split("/")[-1]
+    lot = level_of_theory(parameters, validate_lot=validate_lot)
+    if validate_lot:
+        solvation = lot.value.split("/")[-1]
+    else:
+        solvation = lot.split("/")[-1]
 
     if solvation == "PCM":
         # dielectric = float(parameters.get("solvent", {}).get("dielectric", 78.39))
@@ -631,7 +658,9 @@ def solvent(parameters: CalculationInput, custom_smd: Optional[str] = None) -> s
 
 
 def lot_solvent_string(
-    parameters: CalculationInput, custom_smd: Optional[str] = None
+    parameters: CalculationInput,
+    validate_lot: bool = True,
+    custom_smd: Optional[str] = None,
 ) -> str:
     """
     Returns a string representation of the level of theory and solvent used for this calculation.
@@ -641,9 +670,11 @@ def lot_solvent_string(
         custom_smd: (Optional) string representing SMD parameters for a
         non-standard solvent
     """
-
-    lot = level_of_theory(parameters).value
-    solv = solvent(parameters, custom_smd=custom_smd)
+    if validate_lot:
+        lot = level_of_theory(parameters, validate_lot=validate_lot).value
+    else:
+        lot = level_of_theory(parameters, validate_lot=validate_lot)
+    solv = solvent(parameters, custom_smd=custom_smd, validate_lot=validate_lot)
     return f"{lot}({solv})"
 
 
@@ -670,14 +701,20 @@ def task_type(
 
 
 def calc_type(
-    parameters: CalculationInput, special_run_type: Optional[str] = None
+    parameters: CalculationInput,
+    validate_lot: bool = True,
+    special_run_type: Optional[str] = None,
 ) -> CalcType:
     """
     Determines the calc type
 
     Args:
         parameters: CalculationInput parameters
     """
-    rt = level_of_theory(parameters).value
     tt = task_type(parameters, special_run_type=special_run_type).value
-    return CalcType(f"{rt} {tt}")
+    if validate_lot:
+        rt = level_of_theory(parameters, validate_lot=validate_lot).value
+        return CalcType(f"{rt} {tt}")
+    else:
+        rt = level_of_theory(parameters, validate_lot=validate_lot)
+        return str(f"{rt} {tt}")
diff --git a/emmet-core/tests/conftest_qchem.py b/emmet-core/tests/conftest_qchem.py
@@ -145,7 +145,7 @@ class SinglePointTest(SchemaTestData):
             "level_of_theory": "wB97M-V/def2-QZVPPD/SMD",
             "task_type": "Single Point",
             "calc_type": "wB97M-V/def2-QZVPPD/SMD Single Point",
-            "solvation_lot_nfo": "wB97M-V/def2-QZVPPD/SMD(SOLVENT=WATER)",
+            "solvation_lot_info": "wB97M-V/def2-QZVPPD/SMD(SOLVENT=WATER)",
         },
         "output": {
             "mulliken": [np.array([-0.713178, 0.357278, 0.3559])],
@@ -301,7 +301,7 @@ class OptimizationTest(SchemaTestData):
             "level_of_theory": "wB97M-V/def2-SVPD/SMD",
             "task_type": "Geometry Optimization",
             "calc_type": "wB97M-V/def2-SVPD/SMD Geometry Optimization",
-            "solvation_lot_nfo": "wB97M-V/def2-SVPD/SMD(SOLVENT=WATER)",
+            "solvation_lot_info": "wB97M-V/def2-SVPD/SMD(SOLVENT=WATER)",
         },
         "output": {
             "initial_molecule": {

diff --git a/emmet-core/tests/test_qc_task.py b/emmet-core/tests/test_qc_task.py
@@ -88,3 +88,33 @@ def test_task_doc(test_dir, object_name):
     # Test that additional_fields works
     test_doc = TaskDoc.from_directory(dir_name, additional_fields={"foo": "bar"})
     assert test_doc.model_dump()["additional_fields"] == {"foo": "bar"}
+
+
+@pytest.mark.parametrize(
+    "object_name",
+    [
+        pytest.param("SinglePointTest", id="SinglePointTest"),
+        pytest.param("OptimizationTest", id="OptimizationTest"),
+    ],
+)
+def test_task_doc_val_flag(test_dir, object_name):
+    from monty.json import MontyDecoder, jsanitize
+    from emmet.core.qc_tasks import TaskDoc
+
+    test_object = get_test_object(object_name)
+    dir_name = test_dir / "qchem" / test_object.folder
+    print(f"The test object is {test_object.task_doc}")
+    test_doc = TaskDoc.from_directory(dir_name, validate_lot=False)
+    assert_schemas_equal(test_doc, test_object.task_doc)
+
+    # test document can be jsanitized
+    d = jsanitize(test_doc, strict=True, enum_values=True, allow_bson=True)
+
+    # and decoded
+    MontyDecoder().process_decoded(d)
+
+    # Test that additional_fields works
+    test_doc = TaskDoc.from_directory(
+        dir_name, validate_lot=False, additional_fields={"foo": "bar"}
+    )
+    assert test_doc.model_dump()["additional_fields"] == {"foo": "bar"}