Skip to content

Commit ea3e50a

Browse files
authored
Added SLATM (qmlcode#13)
* Added SLATM representation, aded ase to travis.yml and bumped to 3.1 added scipy to travis yml
1 parent 0094698 commit ea3e50a

File tree

8 files changed

+574
-6
lines changed

8 files changed

+574
-6
lines changed

Diff for: .travis.yml

+6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ language: python
33
sudo: required
44
dist: trusty
55

6+
group: edge
7+
68
deploy:
79
provider: pages
810
local_dir: ${TRAVIS_BUILD_DIR}/docs/build/html
@@ -29,6 +31,8 @@ install:
2931
- |
3032
if [ ${TRAVIS_PYTHON_VERSION:0:1} = 3 ]; then
3133
sudo apt-get install python3-numpy
34+
pip3 install scipy
35+
pip3 install ase
3236
python3 setup.py build
3337
python3 setup.py install
3438
pip3 install sphinx
@@ -37,6 +41,8 @@ install:
3741
make html
3842
elif [ ${TRAVIS_PYTHON_VERSION} = "2.7" ]; then
3943
sudo apt-get install python-numpy
44+
pip2 install scipy
45+
pip2 install ase
4046
python2 setup.py build
4147
python2 setup.py install
4248
pip2 install sphinx

Diff for: docs/source/conf.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,9 @@
5555
# built documents.
5656
#
5757
# The short X.Y version.
58-
version = u'0.2.10'
58+
version = u'0.3.1'
5959
# The full version, including alpha/beta/rc tags.
60-
release = u'0.2.10'
60+
release = u'0.3.1'
6161

6262
# The language for content autogenerated by Sphinx. Refer to documentation
6363
# for a list of supported languages.

Diff for: qml/compound.py

+14
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from .representations import generate_atomic_coulomb_matrix
3232
from .representations import generate_bob
3333
from .representations import generate_eigenvalue_coulomb_matrix
34+
from .representations import generate_slatm_representation
3435

3536
from .arad import ARAD
3637

@@ -93,6 +94,19 @@ def generate_arad_representation(self, size = 23):
9394
assert (self.representation).shape[0] == size, "ERROR: Check ARAD descriptor size!"
9495
assert (self.representation).shape[2] == size, "ERROR: Check ARAD descriptor size!"
9596

97+
def generate_slatm_representation(self, mbtypes,
98+
local=False, sigmas=[0.05,0.05], dgrids=[0.03,0.03], rcut=4.8,
99+
alchemy=False, rpower=6, iprt=False):
100+
101+
slatm = generate_slatm_representation(self.coordinates, self.nuclear_charges,
102+
mbtypes, local=local, sigmas=sigmas, dgrids=dgrids, rcut=rcut,
103+
alchemy=alchemy, rpower=rpower, iprt=iprt)
104+
if local: slatm = np.asarray(slatm)
105+
self.representation = slatm
106+
print(self.representation)
107+
print(self.representation.shape)
108+
109+
96110
def read_xyz(self, filename):
97111

98112
f = open(filename, "r")

Diff for: qml/representations.py

+244-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# MIT License
22
#
3-
# Copyright (c) 2016 Anders Steen Christensen and Lars A. Bratholm
3+
# Copyright (c) 2017 Anders Steen Christensen, Lars A. Bratholm and Bing Huang
44
#
55
# Permission is hereby granted, free of charge, to any person obtaining a copy
66
# of this software and associated documentation files (the "Software"), to deal
@@ -23,6 +23,9 @@
2323
from __future__ import print_function
2424

2525
import numpy as np
26+
import itertools as itl
27+
28+
import ase
2629

2730
from .frepresentations import fgenerate_coulomb_matrix
2831
from .frepresentations import fgenerate_unsorted_coulomb_matrix
@@ -33,6 +36,10 @@
3336

3437
from .data import NUCLEAR_CHARGE
3538

39+
from .slatm import get_boa
40+
from .slatm import get_sbop
41+
from .slatm import get_sbot
42+
3643
def generate_coulomb_matrix(nuclear_charges, coordinates, size = 23, sorting = "row-norm"):
3744
""" Generates a sorted molecular coulomb, sort either by ``"row-norm"`` or ``"unsorted"``.
3845
``size=`` denotes the max number of atoms in the molecule (thus the size of the resulting square matrix.
@@ -128,7 +135,7 @@ def generate_bob(nuclear_charges, coordinates, atomtypes, asize = {"O":3, "C":7,
128135
n = 0
129136
atoms = sorted(asize, key=asize.get)
130137
nmax = [asize[key] for key in atoms]
131-
print(atoms,nmax)
138+
# print(atoms,nmax)
132139
ids = np.zeros(len(nmax), dtype=int)
133140
for i, (key, value) in enumerate(zip(atoms,nmax)):
134141
n += value * (1+value)
@@ -139,3 +146,238 @@ def generate_bob(nuclear_charges, coordinates, atomtypes, asize = {"O":3, "C":7,
139146
n /= 2
140147

141148
return fgenerate_bob(nuclear_charges, coordinates, nuclear_charges, ids, nmax, n)
149+
150+
151+
def get_slatm_mbtypes(nuclear_charges):
152+
"""
153+
Get the list of minimal types of many-body terms in a dataset. This resulting list
154+
is necessary as input in the ``generate_slatm_representation()`` function.
155+
156+
:param zs: A list of the nuclear charges for each compound in the dataset.
157+
:type fs: list of numpy arrays
158+
:return: A list containing the types of many-body terms.
159+
:rtype: list
160+
"""
161+
162+
# :param pbc: periodic boundary condition along x,y,z direction, defaulted to '000', i.e., molecule
163+
# :type pbc: string
164+
165+
pbc = '000'
166+
167+
zs = nuclear_charges
168+
# zs = [ read_xyz(f)[0] for f in fs ]
169+
nm = len(zs)
170+
zsmax = set()
171+
nas = []
172+
zs_ravel = []
173+
for zsi in zs:
174+
na = len(zsi); nas.append(na)
175+
zsil = list(zsi); zs_ravel += zsil
176+
zsmax.update( zsil )
177+
178+
zsmax = np.array( list(zsmax) )
179+
nass = []
180+
for i in range(nm):
181+
zsi = zs[i]
182+
nass.append( [ (zi == zsi).sum() for zi in zsmax ] )
183+
184+
nzmax = np.max(np.array(nass), axis=0)
185+
nzmax_u = []
186+
if pbc != '000':
187+
# the PBC will introduce new many-body terms, so set
188+
# nzmax to 3 if it's less than 3
189+
for nzi in nzmax:
190+
if nzi <= 2:
191+
nzi = 3
192+
nzmax_u.append(nzi)
193+
nzmax = nzmax_u
194+
195+
boas = [ [zi,] for zi in zsmax ]
196+
bops = [ [zi,zi] for zi in zsmax ] + list( itl.combinations(zsmax,2) )
197+
198+
bots = []
199+
for i in zsmax:
200+
for bop in bops:
201+
j,k = bop
202+
tas = [ [i,j,k], [i,k,j], [j,i,k] ]
203+
for tasi in tas:
204+
if (tasi not in bots) and (tasi[::-1] not in bots):
205+
nzsi = [ (zj == tasi).sum() for zj in zsmax ]
206+
if np.all(nzsi <= nzmax):
207+
bots.append( tasi )
208+
mbtypes = boas + bops + bots
209+
210+
return mbtypes#, np.array(zs_ravel), np.array(nas)
211+
212+
213+
def generate_slatm_representation(coordinates, nuclear_charges, mbtypes,
214+
local=False, sigmas=[0.05,0.05], dgrids=[0.03,0.03], rcut=4.8,
215+
alchemy=False, rpower=6, iprt=False):
216+
"""
217+
Generate Spectrum of London and Axillrod-Teller-Muto potential (SLATM) representation.
218+
Both global (``local=False``) and local (``local=True``) SLATM are available.
219+
220+
A version that works for periodic boundary conditions will be released soon.
221+
222+
NOTE: You will need to run the ``get_slatm_mbtypes()`` function to get the ``mbtypes`` input (or generate it manually).
223+
224+
:param coordinates: Input coordinates
225+
:type coordinates: numpy array
226+
:param nuclear_charges: List of nuclear charges.
227+
:type nuclear_charges: numpy array
228+
:param mbtypes: Many-body types for the whole dataset, including 1-, 2- and 3-body types. Could be obtained by calling ``get_slatm_mbtypes()``.
229+
:type mbtypes: list
230+
:param local: Generate a local representation. Defaulted to False (i.e., global representation); otherwise, atomic version.
231+
:type local: bool
232+
:param sigmas: Controlling the width of Gaussian smearing function for 2- and 3-body parts, defaulted to [0.05,0.05], usually these do not need to be adjusted.
233+
:type sigmas: list
234+
:param dgrids: The interval between two sampled internuclear distances and angles, defaulted to [0.03,0.03], no need for change, compromised for speed and accuracy.
235+
:type dgrids: list
236+
:param rcut: Cut-off radius, defaulted to 4.8 Angstrom.
237+
:type rcut: float
238+
:param alchemy: Swith to use the alchemy version of SLATM. (default=False)
239+
:type alchemy: bool
240+
:param rpower: The power of R in 2-body potential, defaulted to London potential (=6).
241+
:type rpower: float
242+
:param iprt: Print debug output if True.
243+
:type iprt: bool
244+
:return: 1D SLATM representation
245+
:rtype: numpy array
246+
"""
247+
# :param pbc: defaulted to '000', meaning it's a molecule; the three digits in the string corresponds to x,y,z direction
248+
# :type pbc: string
249+
250+
pbc = '000'
251+
252+
if pbc != '000' and iprt:
253+
print(' -- handling systems with periodic boundary condition')
254+
# =======================================================================
255+
# PBC may introduce new many-body terms, so at the stage of get statistics
256+
# info from db, we've already considered this point by letting maximal number
257+
# of nuclear charges being 3.
258+
# =======================================================================
259+
260+
m = ase.Atoms(nuclear_charges, coordinates)
261+
zsm = nuclear_charges
262+
263+
iloc = local
264+
if iloc:
265+
mbs = []
266+
na = len(m)
267+
X2Ns = []
268+
for ia in range(na):
269+
if iprt: print(' -- ia = ', ia + 1)
270+
n1 = 0; n2 = 0; n3 = 0
271+
mbs_ia = np.zeros(0)
272+
icount = 0
273+
for mbtype in mbtypes:
274+
if len(mbtype) == 1:
275+
mbsi = get_boa(mbtype[0], np.array([zsm[ia],])) #print ' -- mbsi = ', mbsi
276+
if alchemy:
277+
n1 = 1
278+
n1_0 = mbs_ia.shape[0]
279+
if n1_0 == 0:
280+
mbs_ia = np.concatenate( (mbs_ia, mbsi), axis=0 )
281+
elif n1_0 == 1:
282+
mbs_ia += mbsi
283+
else:
284+
raise '#ERROR'
285+
else:
286+
n1 += len(mbsi)
287+
mbs_ia = np.concatenate( (mbs_ia, mbsi), axis=0 )
288+
elif len(mbtype) == 2:
289+
#print ' 001, pbc = ', pbc
290+
mbsi = get_sbop(mbtype, m, zsm, iloc=iloc, ia=ia, \
291+
sigma=sigmas[0], dgrid=dgrids[0], rcut=rcut, \
292+
pbc=pbc, rpower=rpower)[1]
293+
mbsi *= 0.5 # only for the two-body parts, local rpst
294+
#print ' 002'
295+
if alchemy:
296+
n2 = len(mbsi)
297+
n2_0 = mbs_ia.shape[0]
298+
if n2_0 == n1:
299+
mbs_ia = np.concatenate( (mbs_ia, mbsi), axis=0 )
300+
elif n2_0 == n1 + n2:
301+
t = mbs_ia[n1:n1+n2] + mbsi
302+
mbs_ia[n1:n1+n2] = t
303+
else:
304+
raise '#ERROR'
305+
else:
306+
n2 += len(mbsi)
307+
mbs_ia = np.concatenate( (mbs_ia, mbsi), axis=0 )
308+
else: # len(mbtype) == 3:
309+
mbsi = get_sbot(mbtype, m, zsm, iloc=iloc, ia=ia, \
310+
sigma=sigmas[1], dgrid=dgrids[1], rcut=rcut, pbc=pbc)[1]
311+
if alchemy:
312+
n3 = len(mbsi)
313+
n3_0 = mbs_ia.shape[0]
314+
if n3_0 == n1 + n2:
315+
mbs_ia = np.concatenate( (mbs_ia, mbsi), axis=0 )
316+
elif n3_0 == n1 + n2 + n3:
317+
t = mbs_ia[n1+n2:n1+n2+n3] + mbsi
318+
mbs_ia[n1+n2:n1+n2+n3] = t
319+
else:
320+
raise '#ERROR'
321+
else:
322+
n3 += len(mbsi)
323+
mbs_ia = np.concatenate( (mbs_ia, mbsi), axis=0 )
324+
325+
mbs.append( mbs_ia )
326+
X2N = [n1,n2,n3];
327+
if X2N not in X2Ns:
328+
X2Ns.append(X2N)
329+
assert len(X2Ns) == 1, '#ERROR: multiple `X2N ???'
330+
else:
331+
n1 = 0; n2 = 0; n3 = 0
332+
mbs = np.zeros(0)
333+
for mbtype in mbtypes:
334+
if len(mbtype) == 1:
335+
mbsi = get_boa(mbtype[0], zsm)
336+
if alchemy:
337+
n1 = 1
338+
n1_0 = mbs.shape[0]
339+
if n1_0 == 0:
340+
mbs = np.concatenate( (mbs, [sum(mbsi)] ), axis=0 )
341+
elif n1_0 == 1:
342+
mbs += sum(mbsi )
343+
else:
344+
raise '#ERROR'
345+
else:
346+
n1 += len(mbsi)
347+
mbs = np.concatenate( (mbs, mbsi), axis=0 )
348+
elif len(mbtype) == 2:
349+
mbsi = get_sbop(mbtype, m, zsm, sigma=sigmas[0], \
350+
dgrid=dgrids[0], rcut=rcut, rpower=rpower)[1]
351+
352+
if alchemy:
353+
n2 = len(mbsi)
354+
n2_0 = mbs.shape[0]
355+
if n2_0 == n1:
356+
mbs = np.concatenate( (mbs, mbsi), axis=0 )
357+
elif n2_0 == n1 + n2:
358+
t = mbs[n1:n1+n2] + mbsi
359+
mbs[n1:n1+n2] = t
360+
else:
361+
raise '#ERROR'
362+
else:
363+
n2 += len(mbsi)
364+
mbs = np.concatenate( (mbs, mbsi), axis=0 )
365+
else: # len(mbtype) == 3:
366+
mbsi = get_sbot(mbtype, m, zsm, sigma=sigmas[1], \
367+
dgrid=dgrids[1], rcut=rcut)[1]
368+
if alchemy:
369+
n3 = len(mbsi)
370+
n3_0 = mbs.shape[0]
371+
if n3_0 == n1 + n2:
372+
mbs = np.concatenate( (mbs, mbsi), axis=0 )
373+
elif n3_0 == n1 + n2 + n3:
374+
t = mbs[n1+n2:n1+n2+n3] + mbsi
375+
mbs[n1+n2:n1+n2+n3] = t
376+
else:
377+
raise '#ERROR'
378+
else:
379+
n3 += len(mbsi)
380+
mbs = np.concatenate( (mbs, mbsi), axis=0 )
381+
382+
return mbs
383+

0 commit comments

Comments
 (0)