diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..4b5a294 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,4 @@ +{ + "python-envs.defaultEnvManager": "ms-python.python:conda", + "python-envs.defaultPackageManager": "ms-python.python:conda" +} \ No newline at end of file diff --git a/pysipfenn/misc/conveniences.py b/pysipfenn/misc/conveniences.py index 7d26e22..d87277f 100644 --- a/pysipfenn/misc/conveniences.py +++ b/pysipfenn/misc/conveniences.py @@ -1,22 +1,143 @@ -from importlib.resources import files +import ast +import inspect import json +from importlib.resources import files +from importlib import import_module +import pkgutil + +def _find_pymatgen_class(class_name: str): + """Locate a class anywhere in pymatgen, robust to module reorganization.""" + import pymatgen + for _, modname, _ in pkgutil.walk_packages(pymatgen.__path__, prefix="pymatgen."): + try: + mod = import_module(modname) + except Exception: + continue + obj = getattr(mod, class_name, None) + if isinstance(obj, type) and obj.__module__.startswith("pymatgen"): + return obj + return None + +def patchCovalentRadiiForExoticElements() -> None: + """ + """ + patchRadii = { + "Bk": 1.68, + "Cf": 1.68, + "Es": 1.65, + "Fm": 1.67, + "Md": 1.73, + "No": 1.76, + "Lr": 1.61, + "Rf": 1.57, + "Db": 1.49, + "Sg": 1.43, + "Bh": 1.41, + "Hs": 1.34, + "Mt": 1.29, + "Ds": 1.28, + "Rg": 1.21, + "Cn": 1.22, + "Nh": 1.36, + "Fl": 1.43, + "Mc": 1.62, + "Lv": 1.75, + "Ts": 1.65, + "Og": 1.57, + } + + + CovalentRadius = _find_pymatgen_class("CovalentRadius") + if CovalentRadius is None: + raise RuntimeError( + "Could not locate `CovalentRadius` class in pymatgen; " + "pymatgen's layout may have changed and this patch needs updating." + ) + source_file = inspect.getsourcefile(CovalentRadius) + with open(source_file, "r") as f: + src = f.read() + + dict_node = None + for cls in ast.walk(ast.parse(src)): + if not (isinstance(cls, ast.ClassDef) and cls.name == "CovalentRadius"): + continue + for stmt in cls.body: + if isinstance(stmt, ast.AnnAssign) and isinstance(stmt.target, ast.Name): + target, value = stmt.target.id, stmt.value + elif (isinstance(stmt, ast.Assign) + and len(stmt.targets) == 1 + and isinstance(stmt.targets[0], ast.Name)): + target, value = stmt.targets[0].id, stmt.value + else: + continue + if target == "radius" and isinstance(value, ast.Dict): + dict_node = value + break + break + + if dict_node is None: + raise RuntimeError( + f"Could not locate `CovalentRadius.radius` dict in {source_file}; " + "pymatgen's layout may have changed and this patch needs updating." + ) + + existing = ast.literal_eval(dict_node) + # Skip writing if the file is already up to date with our patch values. + if any(existing.get(el) is None for el in patchRadii): + merged = {**patchRadii, **existing} + + # Match pymatgen's existing indentation by reading it from the source + # rather than hardcoding spaces, so the patch survives style changes. + src_lines = src.splitlines(keepends=True) + first_key = dict_node.keys[0] + entry_indent = src_lines[first_key.lineno - 1][:first_key.col_offset] + close_indent = src_lines[dict_node.end_lineno - 1][:dict_node.end_col_offset - 1] + + new_literal = "{\n" + "".join( + f'{entry_indent}"{el}": {v},\n' for el, v in merged.items() + ) + close_indent + "}" + + # Convert (line, col) bounds to byte offsets and splice. + line_starts = [0] + for line in src.splitlines(keepends=True): + line_starts.append(line_starts[-1] + len(line)) + start = line_starts[dict_node.lineno - 1] + dict_node.col_offset + end = line_starts[dict_node.end_lineno - 1] + dict_node.end_col_offset + + src = src[:start] + new_literal + src[end:] + with open(source_file, "w") as f: + f.write(src) def patchPymatgenForExoticElements( x: bool = True, - iupacOrder: bool = True + iupacOrder: bool = True, + radii: bool = True, ) -> None: - """Patches pymatgen's ``core/periodic_table.json`` with (selectable) electronegativities and IUPAC ordering values - needed to correctly handle some exotic chemical elements. The IUPAC rules are followed exactly per Table VI in the - same reference. The electronegativity values are `not` Pauling ones but based on Oganov 2021 and are meant to be + """ + Patch pymatgen's installed element data for elements whose properties are + missing or incomplete in the default pymatgen data files. + + This function directly edits files inside the installed pymatgen package: + + 1. Patches pymatgen's ``core/periodic_table.json`` with (selectable) electronegativities and IUPAC ordering values + needed to correctly handle some exotic chemical elements. The IUPAC rules are followed exactly per Table VI in the + same reference. The electronegativity values are `not` Pauling ones but based on Oganov 2021 and are meant to be used primarily for providing trend information for ML model deployment (has to be included in training). + 2. CovalentRadius.radius + Adds missing covalent radii for elements Bk through Og using `ast` to locate the dictionary definition in + pymatgen's source code, merge in the missing values, and write the updated literal back to disk. Radii reference + values from Pekka Pyykkö, The Journal of Physical Chemistry A 2015 119 (11), 2326-2337, + DOI: 10.1021/jp5065819 + Args: x: Patch electronegativities. iupacOrder: Patch IUPAC ordering of elements in chemical formulas so that they can be handled at all. + radii: Patch ``CovalentRadius.radius`` with covalent radii for elements past Cm. Returns: - None. The ``core/periodic_table.json`` file in local install of ``pymatgen`` is patched. Reinstall or upgrade - of ``pymatgen`` reverses the changes. + None. The ``core/periodic_table.json`` files and the python file containing the ``CovalentRadius`` in the + local install of ``pymatgen`` are patched. Reinstall or upgrade ``pymatgen`` to reverse the changes. """ patchIUPAC = { @@ -66,7 +187,12 @@ def patchPymatgenForExoticElements( if x: for el in patchX: pt[el]["X"] = patchX[el] - if iupacOrder: + if iupacOrder: for el in patchIUPAC: pt[el]["IUPAC ordering"] = patchIUPAC[el] - json.dump(pt, f) \ No newline at end of file + json.dump(pt, f) + + # Patch covalent radii on disk. + # We locate the dict with `ast` and splice a merged literal back in. + if radii: + patchCovalentRadiiForExoticElements() \ No newline at end of file diff --git a/pysipfenn/tests/test_conveniences.py b/pysipfenn/tests/test_conveniences.py new file mode 100644 index 0000000..fc3b434 --- /dev/null +++ b/pysipfenn/tests/test_conveniences.py @@ -0,0 +1,260 @@ +import inspect +import json +import warnings +import subprocess +import sys + +import pytest +from importlib.resources import files + +from pysipfenn.misc.conveniences import ( + _find_pymatgen_class, + patchCovalentRadiiForExoticElements, + patchPymatgenForExoticElements, +) + + +EXPECTED_COVALENT_RADII = { + 'Bk': 1.68, + 'Cf': 1.68, + 'Es': 1.65, + 'Fm': 1.67, + 'Md': 1.73, + 'No': 1.76, + 'Lr': 1.61, + 'Rf': 1.57, + 'Db': 1.49, + 'Sg': 1.43, + 'Bh': 1.41, + 'Hs': 1.34, + 'Mt': 1.29, + 'Ds': 1.28, + 'Rg': 1.21, + 'Cn': 1.22, + 'Nh': 1.36, + 'Fl': 1.43, + 'Mc': 1.62, + 'Lv': 1.75, + 'Ts': 1.65, + 'Og': 1.57, + 'H': 0.31, + 'He': 0.28, + 'Li': 1.28, + 'Be': 0.96, + 'B': 0.84, + 'C': 0.73, + 'N': 0.71, + 'O': 0.66, + 'F': 0.57, + 'Ne': 0.58, + 'Na': 1.66, + 'Mg': 1.41, + 'Al': 1.21, + 'Si': 1.11, + 'P': 1.07, + 'S': 1.05, + 'Cl': 1.02, + 'Ar': 1.06, + 'K': 2.03, + 'Ca': 1.76, + 'Sc': 1.7, + 'Ti': 1.6, + 'V': 1.53, + 'Cr': 1.39, + 'Mn': 1.5, + 'Fe': 1.42, + 'Co': 1.38, + 'Ni': 1.24, + 'Cu': 1.32, + 'Zn': 1.22, + 'Ga': 1.22, + 'Ge': 1.2, + 'As': 1.19, + 'Se': 1.2, + 'Br': 1.2, + 'Kr': 1.16, + 'Rb': 2.2, + 'Sr': 1.95, + 'Y': 1.9, + 'Zr': 1.75, + 'Nb': 1.64, + 'Mo': 1.54, + 'Tc': 1.47, + 'Ru': 1.46, + 'Rh': 1.42, + 'Pd': 1.39, + 'Ag': 1.45, + 'Cd': 1.44, + 'In': 1.42, + 'Sn': 1.39, + 'Sb': 1.39, + 'Te': 1.38, + 'I': 1.39, + 'Xe': 1.4, + 'Cs': 2.44, + 'Ba': 2.15, + 'La': 2.07, + 'Ce': 2.04, + 'Pr': 2.03, + 'Nd': 2.01, + 'Pm': 1.99, + 'Sm': 1.98, + 'Eu': 1.98, + 'Gd': 1.96, + 'Tb': 1.94, + 'Dy': 1.92, + 'Ho': 1.92, + 'Er': 1.89, + 'Tm': 1.9, + 'Yb': 1.87, + 'Lu': 1.87, + 'Hf': 1.75, + 'Ta': 1.7, + 'W': 1.62, + 'Re': 1.51, + 'Os': 1.44, + 'Ir': 1.41, + 'Pt': 1.36, + 'Au': 1.36, + 'Hg': 1.32, + 'Tl': 1.45, + 'Pb': 1.46, + 'Bi': 1.48, + 'Po': 1.4, + 'At': 1.5, + 'Rn': 1.5, + 'Fr': 2.6, + 'Ra': 2.21, + 'Ac': 2.15, + 'Th': 2.06, + 'Pa': 2, + 'U': 1.96, + 'Np': 1.9, + 'Pu': 1.87, + 'Am': 1.8, + 'Cm': 1.69 +} + +_SUBPROCESS_CODE = r""" +import json, math +from pysipfenn.misc.conveniences import _find_pymatgen_class +from pymatgen.core import Element + +CovalentRadius = _find_pymatgen_class("CovalentRadius") +if CovalentRadius is None: + raise RuntimeError("Could not locate CovalentRadius in pymatgen") + +def safe_float(x): + try: + f = float(x) + return None if math.isnan(f) else f + except (TypeError, ValueError): + return None + +state = { + 'radii': dict(CovalentRadius.radius), + 'X_Og': safe_float(Element('Og').X), + 'X_He': safe_float(Element('He').X), + 'X_Ar': safe_float(Element('Ar').X), +} +print('===STATE===') +print(json.dumps(state)) +""" + + +def _read_pymatgen_state(): + """Run pymatgen in a fresh interpreter and return the current state as a dict.""" + result = subprocess.run( + [sys.executable, "-c", _SUBPROCESS_CODE], + capture_output=True, text=True, check=True, + ) + lines = result.stdout.splitlines() + try: + idx = lines.index("===STATE===") + except ValueError: + raise RuntimeError( + f"Subprocess did not emit state marker.\nstdout:\n{result.stdout}\nstderr:\n{result.stderr}" + ) + return json.loads(lines[idx + 1]) + + +def _warn_if_radii_drift(actual_radii): + """Emit a UserWarning (not failure) if patched radii dict differs from the expected snapshot.""" + if actual_radii == EXPECTED_COVALENT_RADII: + return + diff = { + k: (actual_radii.get(k), EXPECTED_COVALENT_RADII.get(k)) + for k in set(actual_radii) | set(EXPECTED_COVALENT_RADII) + if actual_radii.get(k) != EXPECTED_COVALENT_RADII.get(k) + } + warnings.warn( + f"CovalentRadius.radius after patching does not match `EXPECTED_COVALENT_RADII`. " + f"Differences (key: actual vs expected): {diff} \n" + "This may indicate that pymatgen updated their covalent radii dict and the patch is out of sync.", + UserWarning, + stacklevel=2, + ) + +@pytest.fixture +def pymatgen_snapshot(): + """Snapshot pymatgen's mutated files before the test, restore them after. + + Captures the periodic table JSON and the .py file containing CovalentRadius. + Both are written back verbatim during teardown — even if the test raises — + so other tests in the suite are not affected by mutations. + """ + radii_file = inspect.getsourcefile(_find_pymatgen_class("CovalentRadius")) + periodic_table_file = str(files("pymatgen").joinpath("core/periodic_table.json")) + + originals = {} + for path in (radii_file, periodic_table_file): + with open(path, "rb") as f: + originals[path] = f.read() + + yield + + for path, content in originals.items(): + with open(path, "wb") as f: + f.write(content) + +def test_find_pymatgen_class(): + cls = _find_pymatgen_class("CovalentRadius") + assert cls is not None + assert cls.__name__ == "CovalentRadius" + assert cls.__module__.startswith("pymatgen") + assert _find_pymatgen_class("DefinitelyNotAPymatgenClass_xyzzy") is None + +def test_patchCovalentRadiiForExoticElements(pymatgen_snapshot): + patchCovalentRadiiForExoticElements() + state = _read_pymatgen_state() + + expected_patch_keys = { + "Bk", "Cf", "Es", "Fm", "Md", "No", "Lr", "Rf", "Db", "Sg", + "Bh", "Hs", "Mt", "Ds", "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts", "Og", + } + missing = expected_patch_keys - set(state["radii"]) + assert not missing, f"Patched dict is missing keys: {sorted(missing)}" + _warn_if_radii_drift(state["radii"]) + +# def test_patchPymatgenForExoticElements_all_flags(pymatgen_snapshot): +# patchPymatgenForExoticElements() +# state = _read_pymatgen_state() + +# assert state["X_Og"] == pytest.approx(2.59) +# assert state["X_He"] == pytest.approx(4.42) +# assert state["X_Ar"] == pytest.approx(3.57) + +# assert "Bk" in state["radii"] +# assert state["radii"]["Og"] == pytest.approx(1.57) + +# _warn_if_radii_drift(state["radii"]) + +def test_patchPymatgenForExoticElements_only_x(pymatgen_snapshot): + patchPymatgenForExoticElements(x=True, iupacOrder=False, radii=False) + state = _read_pymatgen_state() + + assert state["X_Og"] == pytest.approx(2.59) + assert state["X_He"] == pytest.approx(4.42) + + assert "Bk" not in state["radii"] + assert "Og" not in state["radii"] \ No newline at end of file