"""subreor
Module to reorient a substituent to match a defined coordinate system
First an atom is positioned at the origin.
By convention, this is the atom of the group that is directly bonded to the rest of the molecule.
The rest of the molecule is placed along the -x axis. The remaining axes are defined as follows:
* if there is one lone pair(VSCC), that point lies on the +y
* if there are two lone pairs, the average position of them lies on the +y
* if there are no lone pairs, map the BCPs of the atom at the origin to a reference. Identify the
closest match to the reference to determine a BCP to set as +y
"""
# sys.path.append(sys.path[0].replace("subproptools", "") + "/" + "referenceMaps")
import math # sqrt
import os
import numpy as np
import pandas as pd # data frames
from subproptools import (
qtaim_extract as qt, # sum file manipulation and property extraction
)
from subproptools.reference_maps import _REFERENCE_MAP
_DEFAULT_STAT_DICT = {
"rho": {"mean": 0.290686, "sd": 0.077290},
"lambda1": {"mean": -0.725552, "sd": 0.299756},
"lambda2": {"mean": -0.678830, "sd": 0.291123},
"lambda3": {"mean": 0.583261, "sd": 0.449474},
"DelSqRho": {"mean": -0.821120, "sd": 0.570553},
"Ellipticity": {"mean": 0.077722, "sd": 0.137890},
"V": {"mean": -0.475963, "sd": 0.332327},
"G": {"mean": 0.135342, "sd": 0.184923},
"H": {"mean": -0.340622, "sd": 0.176627},
"DI(R,G)": {"mean": 1.081894, "sd": 0.369324},
}
def _get_bcp_reference(originAtom, numBonds):
"""Takes atom and number of bonds and chooses the right map to use."""
# pylint:disable=too-many-branches
if originAtom == "C" and numBonds == 4: # sp3 carbon
retDict = _REFERENCE_MAP["C"]["sp3"]
elif originAtom == "C" and numBonds == 3: # sp2 carbon
retDict = _REFERENCE_MAP["C"]["sp2"]
# note linear shouldn't get to this point
elif originAtom == "B" and numBonds == 3: # planar boron
# print('planar boron')
retDict = _REFERENCE_MAP["B"]["sp2"]
elif originAtom == "B" and numBonds == 4: # sp3 boron
# print('sp3 boron')
retDict = _REFERENCE_MAP["B"]["sp3"]
elif originAtom == "N" and numBonds == 4:
# print('ammonium')
retDict = _REFERENCE_MAP["N"]["sp3"]
elif originAtom == "N" and numBonds == 3:
# print('ammonium')
retDict = _REFERENCE_MAP["N"]["sp2"]
elif originAtom == "Al" and numBonds == 3: # planar aluminum
retDict = _REFERENCE_MAP["Al"]["sp2"]
# elif originAtom =='Al' and numBonds==4: #sp3 boron
# print('sp3 aluminum')
elif originAtom == "Si" and numBonds == 4: # sp3 carbon
# print('sp3 silicon')
retDict = _REFERENCE_MAP["Si"]["sp3"]
elif originAtom == "Si" and numBonds == 3: # sp2 si
# print('sp2 silicon')
retDict = _REFERENCE_MAP["Si"]["sp2"]
elif originAtom == "P" and numBonds == 4:
# print('phosphonium')
retDict = _REFERENCE_MAP["P"]["sp3"]
elif originAtom == "Se" and numBonds == 4:
retDict = _REFERENCE_MAP["Se"]["sp3"]
elif originAtom == "As" and numBonds == 4:
retDict = _REFERENCE_MAP["As"]["sp3"]
elif originAtom == "Ge" and numBonds == 4:
retDict = _REFERENCE_MAP["Ge"]["sp3"]
elif originAtom == "S" and numBonds == 4:
retDict = _REFERENCE_MAP["S"]["sp3"]
return retDict
def _find_bcp_match(data, originAtomXYZ, negXAtomLabel, originAtomLabel, atomDict):
"""
Finds the atoms connected to the origin atom, and arranges the BCPs in a clockwise sense
(assuming -x atom going into page)
Args:
data: the lines of a .sum file stored as a list
originAtomXYZ - np.array of xyz coordinates of atom that was to be set to origin
negXAtomLabel - the atom connected to origin that is positioned along -x axis. e.g. "H2"
originAtomLabel - the origin atom of substituent bonded to substrate. Format e.g. "C1"
Returns:
Dictionary of BCP properties for atoms (non-neg-x atom) that are bonded to the origin atom.
These are ordered in a clockwise rotational sense
"""
# find bcps connected to origin atom that are not the -x axis atom
originBCPs = qt.find_connected(data, negXAtomLabel, originAtomLabel)
# print(originBCPs)
bcpPropDict = {}
# get the bcp properties
for bcp in originBCPs:
# bcpBlock = qt.lock()
bcpPropDict.update(
{bcp[0] + "-" + bcp[1]: qt.get_bcp_properties(data, atPair=bcp)}
)
# print(bcpPropDict)
if len(bcpPropDict) < 2:
clockwiseKeys = []
elif len(bcpPropDict) == 2:
clockwiseKeys = bcpPropDict
else:
clockwiseKeys = _find_clockwise_rot(
bcpPropDict, originAtomLabel, negXAtomLabel, atomDict, originAtomXYZ
)
# at this point have bcpDictionary ordered from 1st to last with clockwise bcp
return clockwiseKeys # this is a dictionary of bcps
def _find_clockwise_rot(
bcpPropDict,
originAtomLabel,
negXAtomLabel,
atomDict,
originAtomXYZ=np.array([0.0, 0.0, 0.0]),
):
"""given dictionary of bcp properties, find which ones are in a clockwise rotation"""
# return list of dictionary keys ordered for clockwise rotation
crossDict = {}
for key1 in bcpPropDict:
# print(key1)
for key2 in bcpPropDict:
# print(key2)
if key1 != key2:
to_rotate = np.vstack(
(
originAtomXYZ,
atomDict[negXAtomLabel]["xyz"],
bcpPropDict[key1]["xyz"],
bcpPropDict[key2]["xyz"],
)
)
rot_geom = _set_xaxis(
_set_origin(to_rotate, 1),
2,
)
# zero_3d = np.array([0.0, 0.0, 0.0])
bcp_1_xyz = rot_geom[2]
bcp_2_xyz = rot_geom[3]
bcp_2_xyz[0] = 0.0
bcp_1_xyz[0] = 0.0
cross = np.cross(bcp_1_xyz, bcp_2_xyz)[0]
if cross < 0:
bondAt1 = key1.replace(originAtomLabel + "-", "")
bondAt1 = bondAt1.replace("-" + originAtomLabel, "")
bondAt2 = key2.replace(originAtomLabel + "-", "")
bondAt2 = bondAt2.replace("-" + originAtomLabel, "")
crossDict.update(
{
bondAt1
+ "-To-"
+ bondAt2: {"Is Clockwise": cross < 0, "cross": cross}
}
)
return _order_bcp_dict(crossDict, bcpPropDict)
def _order_bcp_dict(crossDict, bcpPropDict):
"""Take a dictionary of BCPs all connected to one atom and return them sorted in clockwise manner"""
orderDict = {}
for cw in crossDict:
orderDict.update({cw: {"Start": cw.split("-")[0], "End": cw.split("-")[2]}})
keysList = list(orderDict.keys())
if len(keysList) == 3:
if (
orderDict[keysList[0]]["End"] != orderDict[keysList[1]]["Start"]
and orderDict[keysList[0]]["End"] == orderDict[keysList[2]]["Start"]
):
reordered_dict = {
k: orderDict[k] for k in [keysList[0], keysList[2], keysList[1]]
}
else:
reordered_dict = orderDict
ordered_bcp_props = {}
for order in reordered_dict:
for bcp in bcpPropDict:
# print(bcp)
if reordered_dict[order]["Start"] in bcp:
ordered_bcp_props.update({bcp: bcpPropDict[bcp]})
continue
else:
ordered_bcp_props = bcpPropDict
return ordered_bcp_props
def _set_origin(xyzArray, originAtom):
"""shifts all points in xyz geometry by originAtom coordinates, returns xyz geometry
Args:
xyzArray: [3XN] np.array where N is number of atoms
origin Atom: integer label of atom in molecule to be used as origin, starting at 1
Returns:
Geometry shifted by setting xyzArray[originAtom-1] as the origin"""
org = xyzArray[
originAtom - 1, # -1 to convert atom index to python starting at 0
]
return xyzArray - org
def _get_lengths(xyzArray):
"""given xyz geometry returns np.array of magnitudes of squared distances from the origin"""
lengths = np.array([])
for atom in xyzArray:
length = 0
for coord in atom:
length += coord**2
lengths = np.append(lengths, length)
return lengths
def _zero_y_for_negx(t_xyz, negXAtom):
"""perform first rotation in setting -x axis to zero the y value
Args:
t_xyz: [Nx3] np.array for N atoms
negXAtom: integer label of atom to be positioned on -x axis
Returns:
[Nx3] rotated geometry with negXAtom on the xz plane
"""
if t_xyz[0, negXAtom - 1] == 0 and t_xyz[1, negXAtom - 1] == 0: # on xy
# print('hi')
G = np.array([[0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [-1.0, 0.0, 0.0]])
elif (
t_xyz[1, negXAtom - 1] == 0 and t_xyz[2, negXAtom - 1] == 0
): # already on x axis atom 2 y and z=0
# print('here')
G = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
elif t_xyz[0, negXAtom - 1] == 0 and t_xyz[2, negXAtom - 1] == 0:
# print('ho there')
G = np.array([[0.0, 1.0, 0.0], [-1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
else:
# print('no way')
d = t_xyz[0, negXAtom - 1] / math.sqrt(
t_xyz[0, negXAtom - 1] ** 2 + t_xyz[1, negXAtom - 1] ** 2
)
s = t_xyz[1, negXAtom - 1] / math.sqrt(
t_xyz[0, negXAtom - 1] ** 2 + t_xyz[1, negXAtom - 1] ** 2
)
G = np.array([[d, s, 0.0], [-s, d, 0.0], [0.0, 0.0, 1.0]])
return np.matmul(G, t_xyz)
def _zero_z_for_negx(t_xyz, negXAtom):
"""perform second rotation in setting -x axis to zero the z value
Args:
t_xyz: [Nx3] np.array for N atoms
negXAtom: integer label of atom to be positioned on -x axis
Returns:
[Nx3] rotated geometry with negXAtom on the x acis(may be + or -)"""
# perform after y
d = t_xyz[0, negXAtom - 1] / math.sqrt(
t_xyz[0, negXAtom - 1] ** 2 + t_xyz[2, negXAtom - 1] ** 2
)
s = t_xyz[2, negXAtom - 1] / math.sqrt(
t_xyz[0, negXAtom - 1] ** 2 + t_xyz[2, negXAtom - 1] ** 2
)
G = np.array([[d, 0, s], [0, 1, 0], [-s, 0, d]])
return np.matmul(G, t_xyz)
def _set_xaxis(xyzArray, negXAtom):
"""Given xyz geometry with atom at origin, areturn xyz geometry with negXAtom on -x.
Args:
xyzArray: [3xN] np.array for N atoms of geometry
negXAtom: integer label of atom to be positioned on -x axis
Returns:
[Nx3] rotated geometry with negXAtom on the xz plane
"""
t_xyz = xyzArray.T
# define initial xyz vector lengths. Should be unchanged after rotation
tol = 0.0001 # tolerance for change
initial_lengths = _get_lengths(xyzArray)
t_rot1 = _zero_y_for_negx(t_xyz, negXAtom)
rot1_lengths = _get_lengths(t_rot1.T)
if np.any((rot1_lengths - initial_lengths) > tol):
raise AssertionError("Geometry change after rotation exceeded tolerance")
t_rot2 = _zero_z_for_negx(t_rot1, negXAtom)
rot2_lengths = _get_lengths(t_rot2.T)
if np.any((rot2_lengths - initial_lengths) > tol):
raise AssertionError("Geometry change after rotation exceeded tolerance")
if t_rot2[0, negXAtom - 1] > 0: # if negxatom is along +x, rotate 180
G = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, -1]])
t_rot_final = np.matmul(G, t_rot2)
return t_rot_final.T
return t_rot2.T
def _popelier_match_scores_2(testDict, refDict, statDict):
"""Calculate QTMS differences between BCP dictionaries"""
refDictKeysList = list(refDict.keys())
testKeysList = list(testDict.keys())
# BCP distances for first match
dif00 = _get_popelier_dif(
testDict[testKeysList[0]], refDict[refDictKeysList[0]], statDict
)
dif11 = _get_popelier_dif(
testDict[testKeysList[1]], refDict[refDictKeysList[1]], statDict
)
# BCP distances for second match
dif10 = _get_popelier_dif(
testDict[testKeysList[1]], refDict[refDictKeysList[0]], statDict
)
# BCP distances for third match
dif01 = _get_popelier_dif(
testDict[testKeysList[0]], refDict[refDictKeysList[1]], statDict
)
return [dif00 + dif11, dif10 + dif01]
# Total score list and index of closest space
def _popelier_match_scores(testDict, refDict, statDict):
"""Calculate QTMS differences between BCP dictionaries"""
refDictKeysList = list(refDict.keys())
testKeysList = list(testDict.keys())
# BCP distances for first match
dif00 = _get_popelier_dif(
testDict[testKeysList[0]], refDict[refDictKeysList[0]], statDict
)
dif11 = _get_popelier_dif(
testDict[testKeysList[1]], refDict[refDictKeysList[1]], statDict
)
dif22 = _get_popelier_dif(
testDict[testKeysList[2]], refDict[refDictKeysList[2]], statDict
)
# BCP distances for second match
dif10 = _get_popelier_dif(
testDict[testKeysList[1]], refDict[refDictKeysList[0]], statDict
)
dif21 = _get_popelier_dif(
testDict[testKeysList[2]], refDict[refDictKeysList[1]], statDict
)
dif02 = _get_popelier_dif(
testDict[testKeysList[0]], refDict[refDictKeysList[2]], statDict
)
# BCP distances for third match
dif20 = _get_popelier_dif(
testDict[testKeysList[2]], refDict[refDictKeysList[0]], statDict
)
dif01 = _get_popelier_dif(
testDict[testKeysList[0]], refDict[refDictKeysList[1]], statDict
)
dif12 = _get_popelier_dif(
testDict[testKeysList[1]], refDict[refDictKeysList[2]], statDict
)
return [dif00 + dif11 + dif22, dif10 + dif21 + dif02, dif20 + dif01 + dif12]
# Total score list and index of closest space
def _align_dicts_2(testDict, refDict, statDict):
"""
Arguments:
testDict and refDict:For two dictionaries that are ordered in same rotational sense
statDict: scaling to use
Returns:
np.array of xyz point to use
"""
# pylint:disable=too-many-branches
matchScores = _popelier_match_scores_2(testDict, refDict, statDict)
refDictKeysList = list(refDict.keys())
testKeysList = list(testDict.keys())
minInd = matchScores.index(min(matchScores))
# identify the refDict BCP that is on the +y axis
if (
refDict[refDictKeysList[0]]["xyz"][1] > 0
and abs(refDict[refDictKeysList[0]]["xyz"][2]) < 0.01
):
refPosY = 0
elif (
refDict[refDictKeysList[1]]["xyz"][1] > 0
and abs(refDict[refDictKeysList[1]]["xyz"][2]) < 0.01
):
refPosY = 1
# for the best match, set the posYPoint to the one that mapped to refDict +y point
# if first element of match scores, 0 of ref maps to 0 of test. If second element,
# 1 of ref maps to 0 of test
if minInd == 0:
if refPosY == 0:
posYPoint = testDict[testKeysList[0]]["xyz"]
elif refPosY == 1:
posYPoint = testDict[testKeysList[1]]["xyz"]
elif minInd == 1:
if refPosY == 0:
posYPoint = testDict[testKeysList[1]]["xyz"]
elif refPosY == 1:
posYPoint = testDict[testKeysList[0]]["xyz"]
# elif minInd == 2:
# if refPosY == 0:
# posYPoint = testDict[testKeysList[2]]["xyz"]
# elif refPosY == 1:
# posYPoint = testDict[testKeysList[0]]["xyz"]
return posYPoint
def _align_dicts(testDict, refDict, statDict):
"""
Arguments:
testDict and refDict:For two dictionaries that are ordered in same rotational sense
statDict: scaling to use
Returns:
np.array of xyz point to use
"""
# pylint:disable=too-many-branches
matchScores = _popelier_match_scores(testDict, refDict, statDict)
refDictKeysList = list(refDict.keys())
testKeysList = list(testDict.keys())
minInd = matchScores.index(min(matchScores))
# identify the refDict BCP that is on the +y axis
if (
refDict[refDictKeysList[0]]["xyz"][1] > 0
and abs(refDict[refDictKeysList[0]]["xyz"][2]) < 0.01
):
refPosY = 0
elif (
refDict[refDictKeysList[1]]["xyz"][1] > 0
and abs(refDict[refDictKeysList[1]]["xyz"][2]) < 0.01
):
refPosY = 1
elif (
refDict[refDictKeysList[2]]["xyz"][1] > 0
and abs(refDict[refDictKeysList[2]]["xyz"][2]) < 0.01
):
refPosY = 2
# for the best match, set the posYPoint to the one that mapped to refDict +y point
if minInd == 0:
if refPosY == 0:
posYPoint = testDict[testKeysList[0]]["xyz"]
elif refPosY == 1:
posYPoint = testDict[testKeysList[1]]["xyz"]
elif refPosY == 2:
posYPoint = testDict[testKeysList[2]]["xyz"]
elif minInd == 1:
if refPosY == 0:
posYPoint = testDict[testKeysList[1]]["xyz"]
elif refPosY == 1:
posYPoint = testDict[testKeysList[2]]["xyz"]
elif refPosY == 2:
posYPoint = testDict[testKeysList[0]]["xyz"]
elif minInd == 2:
if refPosY == 0:
posYPoint = testDict[testKeysList[2]]["xyz"]
elif refPosY == 1:
posYPoint = testDict[testKeysList[0]]["xyz"]
elif refPosY == 2:
posYPoint = testDict[testKeysList[1]]["xyz"]
return posYPoint
def _get_posy_point_aiida(
data, cc_dict, atomDict, attachedAtom, negXAtomLabel, default_stats=True
):
# pylint:disable=too-many-arguments
# ccProps = qt.get_cc_props(FolderData,attachedAtom,is_folder_data=True)
for key in list(cc_dict.keys()):
keynum = int("".join(filter(str.isdigit, key)))
if keynum == 1:
vscc = cc_dict[key]
# if len(ccProps) > 0:
# vscc = qt.identify_vscc(ccProps,atomDict,attachedAtom)
# else:
# vscc = {}
if len(vscc) == 1: # pylint:disable=used-before-assignment
# reorient setting vscc to +y
vkeys = list(vscc.keys())
posYPoint = vscc[vkeys[0]]["xyz"]
elif len(vscc) == 2 and "N" not in attachedAtom:
vkeys = list(vscc.keys())
posYPoint = [
x / 2
for x in [
vscc[vkeys[0]]["xyz"][0] + vscc[vkeys[1]]["xyz"][0],
vscc[vkeys[0]]["xyz"][1] + vscc[vkeys[1]]["xyz"][1],
vscc[vkeys[0]]["xyz"][2] + vscc[vkeys[1]]["xyz"][2],
]
]
# reorient to average of vscc points for +y
else:
# bcpsToMatch is bcp dictionary, ordered for clockwise rot
# data,originAtomXYZ,negXAtomLabel,originAtomLabel
bcpsToMatch = _find_bcp_match(
data, atomDict[attachedAtom]["xyz"], negXAtomLabel, attachedAtom, atomDict
)
# on the assumption that if an atom has two bonds (_find_bap_match returns None),
# and does not have a lone pair, it is linear, so we do not do another rotation
# and posYPoint is None
if len(bcpsToMatch) == 0 or len(bcpsToMatch) == 1:
posYPoint = []
else:
atType = "".join([i for i in attachedAtom if not i.isdigit()])
matchDict = _get_bcp_reference(atType, len(bcpsToMatch) + 1)
if default_stats:
statDict = _DEFAULT_STAT_DICT
if len(bcpsToMatch) == 3:
posYPoint = _align_dicts(bcpsToMatch, matchDict, statDict)
elif len(bcpsToMatch) == 2:
posYPoint = _align_dicts_2(bcpsToMatch, matchDict, statDict)
# reorient to another point
# posy point will be the point that would lie along the y-axis in reference in maximal match case
# print('not done yet')
return posYPoint
def _get_posy_point(
sumFileNoExt, atomDict, attachedAtom, negXAtomLabel, default_stats=True
):
"""returns point to put on +y axis matching definition in rotate_substituent."""
ccProps = qt.get_cc_props(sumFileNoExt, attachedAtom)
if len(ccProps) > 0:
vscc = qt.identify_vscc(ccProps, atomDict, attachedAtom)
else:
vscc = {}
if len(vscc) == 1:
# reorient setting vscc to +y
vkeys = list(vscc.keys())
posYPoint = vscc[vkeys[0]]["xyz"]
elif len(vscc) == 2 and "N" not in attachedAtom:
vkeys = list(vscc.keys())
posYPoint = [
x / 2
for x in [
vscc[vkeys[0]]["xyz"][0] + vscc[vkeys[1]]["xyz"][0],
vscc[vkeys[0]]["xyz"][1] + vscc[vkeys[1]]["xyz"][1],
vscc[vkeys[0]]["xyz"][2] + vscc[vkeys[1]]["xyz"][2],
]
]
# reorient to average of vscc points for +y
else:
with open(sumFileNoExt + ".sum", encoding="utf-8") as sumFile:
# sumFile = open()
data = sumFile.readlines()
# sumFile.close()
# bcpsToMatch is bcp dictionary, ordered for clockwise rot
# data,originAtomXYZ,negXAtomLabel,originAtomLabel
bcpsToMatch = _find_bcp_match(
data, atomDict[attachedAtom]["xyz"], negXAtomLabel, attachedAtom, atomDict
)
# on the assumption that if an atom has two bonds (_find_bap_match returns None),
# and does not have a lone pair, it is linear, so we do not do another rotation
# and posYPoint is None
if len(bcpsToMatch) == 0 or len(bcpsToMatch) == 1:
posYPoint = []
else:
atType = "".join([i for i in attachedAtom if not i.isdigit()])
matchDict = _get_bcp_reference(atType, len(bcpsToMatch) + 1)
if default_stats:
statDict = _DEFAULT_STAT_DICT
if len(bcpsToMatch) == 3:
posYPoint = _align_dicts(bcpsToMatch, matchDict, statDict)
elif len(bcpsToMatch) == 2:
posYPoint = _align_dicts_2(bcpsToMatch, matchDict, statDict)
# reorient to another point
# posy point will be the point that would lie along the y-axis in reference in maximal match case
# print('not done yet')
return posYPoint
def _set_yaxis(xyzArray, posYArray):
"""rotate a geom positioned on -x axis so posYArray will lie on +y"""
theta = math.atan2(posYArray[2], posYArray[1])
# print(theta)
c = math.cos(theta)
s = math.sin(theta)
G = np.array([[1, 0, 0], [0, c, s], [0, -s, c]])
rot1 = np.matmul(G, xyzArray.T)
rot1vec = np.matmul(G, posYArray)
if rot1vec[1] < 0:
G2 = np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]])
final_geom = np.matmul(G2, rot1)
else:
final_geom = rot1
return final_geom.T
def _get_popelier_dif(bcpDictA, bcpDictB, statDict):
"""compute distance in BCP space between dictA and B scaling using statDict"""
distancesq = 0.0
for prop in bcpDictA:
if prop != "xyz":
scaledA = (bcpDictA[prop][0] - statDict[prop]["mean"]) / statDict[prop][
"sd"
]
scaledB = (bcpDictB[prop][0] - statDict[prop]["mean"]) / statDict[prop][
"sd"
]
distancesq += (scaledA - scaledB) ** 2
return math.sqrt(distancesq)
[docs]
def rotate_substituent_aiida(
sum_file_folder, atom_dict, cc_dict, originAtom=1, negXAtom=2, posYAtom=0
):
# pylint:disable=too-many-arguments
"""
Rotates a substituent to the defined coordinate system.
Args:
sum_file_folder (aiida FolderData): FolderData object containing .sum file
atom_dict: dict of output from get_atomic_props
cc_dict: dict of VSCC data
originAtom (int): the integer number of the atom to place at the origin
negXAtom (int): the integer number of the atom to place along the -x axis
posYAtom (int): override for above defined +y point, set to posYAtom instead
Returns:
dictionary 'atomic_symbols' for atomic symbols and 'geom' for 3xN np.array of rotated coordinates
Examples:
>>> rotate_substituent('SubCH3_CFH2_anti2146_reor',1,2)
Atom x y z
C1 0. 0. 0.
H2 -{float} 0. 0.
.(remaining geometry)
.
.
Notes:
Coordinate system defined as:
originAtom at (0,0,0)
negXAtom at (-x,0,0)
Atom on +y defined as:
average of lone pairs for 2 lone pairs on originAtom
Position of lone pair for 1 lone pair on originAtom
For no lone pairs: map BCPs onto reference for the atom type
Minimum distance in BCP space defined the atom to put on +y
"""
# pylint:disable=too-many-locals
# read sum file
data = sum_file_folder.get_object_content("aiida.sum").split("\n")
molecule_xyz = qt.get_xyz(data)
# Labels format A1 etc
negXAtomLabel = molecule_xyz["Atoms"][negXAtom - 1]
attachedAtom = molecule_xyz["Atoms"][originAtom - 1]
if not posYAtom and len(molecule_xyz["Atoms"]) > 2:
posYPoint = _get_posy_point_aiida(
data, cc_dict, atom_dict, attachedAtom, negXAtomLabel
)
else:
posYPoint = []
if len(posYPoint) > 0:
xyz_w_y_pt = np.append(molecule_xyz["xyz"], [posYPoint], axis=0)
# perform reorientation
molecule_xaxis = _set_xaxis(_set_origin(xyz_w_y_pt, originAtom), negXAtom)
final_y = molecule_xaxis[posYAtom - 1]
final_orientation = _set_yaxis(molecule_xaxis[0:-1], final_y)
elif posYAtom:
posYPoint = molecule_xyz["xyz"][posYAtom - 1]
else:
molecule_xaxis = _set_xaxis(
_set_origin(molecule_xyz["xyz"], originAtom), negXAtom
)
final_orientation = molecule_xaxis
# Generate output
final_orientation = final_orientation * 0.529177
# outFrame = pd.DataFrame(final_orientation*0.529177,columns = ['x','y','z'])
# outFrame['Atom'] = molecule_xyz['Atoms']
# outFrame = outFrame[['Atom','x','y','z']]
num_nna = len([x for x in molecule_xyz["Atoms"] if "NNA" in x])
tot_num_ats = len(molecule_xyz["Atoms"])
# out_dict = {molecule_xyz['Atoms'][i]:final_orientation[i] for i in range(0,len(molecule_xyz['Atoms']))}
return {
"atom_symbols": molecule_xyz["Atoms"][0 : (tot_num_ats - num_nna)],
"geom": final_orientation[0 : (tot_num_ats - num_nna)],
}
[docs]
def rotate_substituent(sumFileNoExt, originAtom, negXAtom, posYAtom=0):
"""
Rotates a substituent to the defined coordinate system.
Args:
sumFileNoExt (string): name of a sum file, without the .sum extension
originAtom (int): the integer number of the atom to place at the origin
negXAtom (int): the integer number of the atom to place along the -x axis
posYAtom (int): override for above defined +y point, set to posYAtom instead
Returns:
pandas data frame of output geometry (columns Atom, x, y, z)
Examples:
>>> rotate_substituent('SubCH3_CFH2_anti2146_reor',1,2)
Atom x y z
C1 0. 0. 0.
H2 -{float} 0. 0.
.(remaining geometry)
.
.
Notes:
Coordinate system defined as:
originAtom at (0,0,0)
negXAtom at (-x,0,0)
Atom on +y defined as:
average of lone pairs for 2 lone pairs on originAtom
Position of lone pair for 1 lone pair on originAtom
For no lone pairs: map BCPs onto reference for the atom type
Minimum distance in BCP space defined the atom to put on +y
"""
# pylint:disable=too-many-locals
# read sum file
with open(sumFileNoExt + ".sum", encoding="utf-8") as sumFile:
# sumFile = open(sumFileNoExt + ".sum")
data = sumFile.readlines()
# sumFile.close()
atomDict = qt.get_atomic_props(data) # (needed for VSCC identification)
molecule_xyz = qt.get_xyz(data)
# Labels format A1 etc
negXAtomLabel = molecule_xyz["Atoms"][negXAtom - 1]
attachedAtom = molecule_xyz["Atoms"][originAtom - 1]
# perform reorientation
if not posYAtom and len(molecule_xyz["Atoms"]) > 2:
posYPoint = _get_posy_point(sumFileNoExt, atomDict, attachedAtom, negXAtomLabel)
elif posYAtom:
posYPoint = molecule_xyz["xyz"][posYAtom - 1]
else:
posYPoint = []
if len(posYPoint) > 0:
print(molecule_xyz["xyz"])
print(posYPoint)
xyz_w_y_pt = np.append(molecule_xyz["xyz"], [posYPoint], axis=0)
# perform reorientation
molecule_xaxis = _set_xaxis(_set_origin(xyz_w_y_pt, originAtom), negXAtom)
final_y = molecule_xaxis[posYAtom - 1]
final_orientation = _set_yaxis(molecule_xaxis[0:-1], final_y)
else:
molecule_xaxis = _set_xaxis(
_set_origin(molecule_xyz["xyz"], originAtom), negXAtom
)
final_orientation = molecule_xaxis
# Generate output
outFrame = pd.DataFrame(final_orientation * 0.529177, columns=["x", "y", "z"])
outFrame["Atom"] = molecule_xyz["Atoms"]
outFrame = outFrame[["Atom", "x", "y", "z"]]
return outFrame
[docs]
def output_to_gjf(
# pylint:disable=too-many-arguments
old_file_name,
reor_geom,
esm="wB97XD",
basis_set="aug-cc-pvtz",
add_label="",
n_procs=4,
mem="3200MB",
charge=0,
multiplicity=1,
wfx=True,
):
"""Given a rotated molecule, writes new geometry to single point Gaussian calculation
Args:
old_file_name - the file name of the sum file(no extension) before reorientation
reor_geom - the data frame output of rotate_substituent
esm - whatever electronic structure method is to be used in single point(HF/MP2/functional)
basis_set - basis set to be used
add_label - any extra label for file name, default empty
n_procs=4 - numper of processors for remote clusters, set to 0 if not desired
mem='3200MB' -amount of memory for remote clusters, set to 0 if not desired
charge - charge of molecule
multiplicity - multiplicity of molecule
wfx - whether or not to write wfx, default True
Returns:
no return, but creates new gjf file old_file_name_reor_add_label.gjf
File looks like:
%chk=new_file_name.chk
%nprocs=n_procs
%mem=mem
#p esm/basis_set output=wfx nosymmetry
single point on old_file reoriented by subproptools
charge multiplicity
(xyz geom)
new_file_name.wfx
(blank lines)
"""
new_file_name = old_file_name + "_reor" + add_label
# delete file if already exists
if os.path.exists(new_file_name + ".gjf"):
# print('deleting')
os.remove(new_file_name + ".gjf")
chk_name = new_file_name + ".chk"
with open(new_file_name + ".gjf", "a", encoding="utf-8") as f:
f.write(f"%chk={chk_name}\n")
if n_procs:
f.write(f"%nprocs={n_procs}\n")
if mem:
f.write(f"%mem={mem}\n")
if wfx:
f.write(f"#p {esm}/{basis_set} output=wfx nosymmetry\n")
else:
f.write(f"#p {esm}/{basis_set} nosymmetry\n")
f.write("\n")
f.write(f"single point on {old_file_name} reoriented by subproptools\n")
f.write("\n")
f.write(f"{charge} {multiplicity}\n")
dfAsString = reor_geom.to_string(header=False, index=False)
f.write(
dfAsString.split("NNA")[0]
) # NNAs are appended as last atom - don't write them
if "NNA" not in dfAsString:
f.write("\n\n")
else:
f.write("\n")
if wfx:
f.write(new_file_name + ".wfx\n\n\n")
else:
f.write("\n\n\n")
[docs]
def rotate_sheet(
csv_file, esm, basis, n_procs=4, mem="3200MB", wfx=True, extra_label=""
):
"""
Given csv file and Gaussian calculation options, reorient files in csv_file and output gjf
Args:
csv_file - csv file containining these columns:
Substituent, originAtom, negXAtom, posYAtom, charge, multiplicity, label1, label2,...
Substituent: string label for substituent
originAtom: numerical index(starting form 1) of atom to use as origin
negXAtom: numerical index(starting form 1) of atom to place on -x
posYAtom: usually 0, but override if desired, numerical index(starting form 1) of atom to place on +y
charge: charge of the molecule
multiplicity: multiplicity of the molecule
label1, label2... label depicting situation for molecule(substrate, method) e.g. "SubH", "B3LYP/cc-pvDZ" etc
esm: electronic structure method (HF/MP2/DFT functional/etc)
basis: string for basis to be used
n_procs: number of processors for use on Cedar, default to 4
mem: memory to use on Cedar, default to 3200MB
wfx: if we wish to write wfx, default True
extra_label: an additional label for the reoriented file if needed, default none
Returns:
no return value, but outputs to gjf files in working directory(or directory in path of filenames)
"""
# pylint:disable=too-many-arguments
# pylint:disable=too-many-locals
csvFrame = pd.read_csv(csv_file)
ncolumns = csvFrame.shape[1]
nrow = csvFrame.shape[0]
for sub in range(0, nrow):
charge = csvFrame["charge"][sub]
multiplicity = csvFrame["multiplicity"][sub]
originAtom = csvFrame["originAtom"][sub]
negXAtom = csvFrame["negXAtom"][sub]
posYAtom = csvFrame["posYAtom"][sub]
for label in range(6, ncolumns):
rot_file = csvFrame.iloc[sub, label]
print(f"""Rotating {rot_file}""")
rot_geom = rotate_substituent(
rot_file, originAtom=originAtom, negXAtom=negXAtom, posYAtom=posYAtom
)
print(f"""Creating new Gaussian gjf for {rot_file}""")
output_to_gjf(
rot_file,
rot_geom,
esm=esm,
basis_set=basis,
add_label=extra_label,
n_procs=n_procs,
mem=mem,
charge=charge,
multiplicity=multiplicity,
wfx=wfx,
)
# commented out - have included reference bcps at data at top of file rather tahn calculating each time
def _get_ref_bcps(
sumfilenoext,
atPairList,
originAtom,
negXAtomLabel,
originAtomXYZ=np.array([0.0, 0.0, 0.0]),
):
"""given reference sumfile, extract bcp properties for needed bcps"""
# sumFile = open(sumfilenoext + ".sum") # open file, read lines, close file
with open(sumfilenoext + ".sum", encoding="utf-8") as sumFile:
data = sumFile.readlines()
# sumFile.close()
atomDict = qt.get_atomic_props(data)
bcpDict = {}
for bcp in atPairList:
# block = qt.get_bcp_block(data,bcp)
bcpDict.update({f"{bcp[0]}-{bcp[1]}": qt.get_bcp_properties(data, bcp)})
clockbcpDict = _find_clockwise_rot(
bcpDict, originAtom, negXAtomLabel, atomDict, originAtomXYZ
)
return clockbcpDict