from collections import defaultdict
from collections import namedtuple
from enum import IntEnum
from itertools import chain
import numpy as np
from rdkit import Chem
from . import _atomic_property
from ._base import Descriptor, parse_enum
class ChiType(IntEnum):
path = 1
cluster = 2
path_cluster = 3
chain = 4
class DFS(object):
__slots__ = ('mol', 'visited', 'vis_edges', 'is_chain', 'degrees', 'neighbors',)
def __init__(self, mol, use_bonds):
self.mol = mol
self.visited = set()
self.vis_edges = set()
self.is_chain = False
self.degrees = set()
self.neighbors = defaultdict(set)
for i in use_bonds:
bond = mol.GetBondWithIdx(i)
a = bond.GetBeginAtomIdx()
b = bond.GetEndAtomIdx()
self.neighbors[a].add(b)
self.neighbors[b].add(a)
@property
def nodes(self):
return list(self.neighbors.keys())
@classmethod
def _edge_key(cls, u, v):
return min(u, v), max(u, v)
def _dfs(self, u):
neighbors = self.neighbors[u]
self.visited.add(u)
self.degrees.add(len(neighbors))
for v in neighbors:
ek = self._edge_key(u, v)
if v not in self.visited:
self.vis_edges.add(ek)
self._dfs(v)
elif ek not in self.vis_edges:
self.vis_edges.add(ek)
self.is_chain = True
def __call__(self):
self._dfs(next(iter(self.neighbors.keys())))
if self.is_chain:
t = ChiType.chain
elif not self.degrees - set([1, 2]):
t = ChiType.path
elif 2 in self.degrees:
t = ChiType.path_cluster
else:
t = ChiType.cluster
return t
class ChiBase(Descriptor):
explicit_hydrogens = False
ChiBonds = namedtuple('ChiBonds', 'chain path path_cluster cluster')
class ChiCache(ChiBase):
__slots__ = ('_order',)
def __reduce_ex__(self, version):
return self.__class__, self._order
def __init__(self, order):
self._order = order
def calculate(self, mol):
chain = list()
path = list()
path_cluster = list()
cluster = list()
for bonds in Chem.FindAllSubgraphsOfLengthN(mol, self._order):
dfs = DFS(mol, bonds)
typ = dfs()
nodes = dfs.nodes
if typ == ChiType.chain:
chain.append(nodes)
elif typ == ChiType.path:
path.append(nodes)
elif typ == ChiType.path_cluster:
path_cluster.append(nodes)
else:
cluster.append(nodes)
return ChiBonds(chain, path, path_cluster, cluster)
_chi_type_dict = {
ChiType.path: 'P',
ChiType.chain: 'CH',
ChiType.path_cluster: 'PC',
ChiType.cluster: 'C'
}
_prop_dict = dict(delta='S', delta_v='V')
_deltas = ['delta', 'delta_v']
[docs]class Chi(ChiBase):
r"""chi descriptor.
:type type: str
:param type: one of chi_types
:type prop: str or function
:param prop: :ref:`atomic_properties`
:type averaged: bool
:param averaged: averaged by number of subgraphs
:returns: NaN when
* any atomic properties <= 0
* averaged and :math:`N_{\chi} = 0`
"""
chi_types = tuple(t.name for t in ChiType)
@classmethod
def preset(cls):
return chain(
(cls(ChiType.chain, l, a) for a in _deltas for l in range(3, 8)),
(cls(ChiType.cluster, l, a) for a in _deltas for l in range(3, 7)),
(cls(ChiType.path_cluster, l, a) for a in _deltas for l in range(4, 7)),
(cls(ChiType.path, l, a, m) for a in _deltas for m in [False, True] for l in range(8)),
)
def __str__(self):
prop = _prop_dict.get(self._prop_name, self._prop_name)
ct = _chi_type_dict[self._type]
p = 'A' if self._averaged else ''
return '{}{}{}-{}'.format(p, prop, ct, self._order)
@property
def gasteiger_charges(self):
return getattr(self._prop, 'gasteiger_charges', False)
__slots__ = ('_type', '_order', '_prop_name', '_prop', '_averaged',)
def __reduce_ex__(self, version):
return self.__class__, (self._type, self._order, self._prop, self._averaged)
def __init__(self, type='path', order=0, prop='delta', averaged=False):
self._type = parse_enum(ChiType, type)
self._order = order
self._prop_name, self._prop = _atomic_property.getter(prop, self.explicit_hydrogens)
self._averaged = averaged
def dependencies(self):
if self._order > 0:
return dict(chi=ChiCache(self._order))
def calculate(self, mol, chi=None):
if self._order <= 0:
chi = ChiBonds([], [{a.GetIdx()} for a in mol.GetAtoms()], [], [])
x = 0.0
node_sets = getattr(chi, self._type.name)
props = [self._prop(a) for a in mol.GetAtoms()]
for nodes in node_sets:
c = 1
for node in nodes:
c *= props[node]
if c <= 0:
return np.nan
x += c ** -0.5
if self._averaged:
x /= len(node_sets) or np.nan
return x
rtype = float