1
1
# MIT License
2
2
#
3
- # Copyright (c) 2016 Anders Steen Christensen and Lars A. Bratholm
3
+ # Copyright (c) 2017 Anders Steen Christensen, Lars A. Bratholm and Bing Huang
4
4
#
5
5
# Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
# of this software and associated documentation files (the "Software"), to deal
23
23
from __future__ import print_function
24
24
25
25
import numpy as np
26
+ import itertools as itl
27
+
28
+ import ase
26
29
27
30
from .frepresentations import fgenerate_coulomb_matrix
28
31
from .frepresentations import fgenerate_unsorted_coulomb_matrix
33
36
34
37
from .data import NUCLEAR_CHARGE
35
38
39
+ from .slatm import get_boa
40
+ from .slatm import get_sbop
41
+ from .slatm import get_sbot
42
+
36
43
def generate_coulomb_matrix (nuclear_charges , coordinates , size = 23 , sorting = "row-norm" ):
37
44
""" Generates a sorted molecular coulomb, sort either by ``"row-norm"`` or ``"unsorted"``.
38
45
``size=`` denotes the max number of atoms in the molecule (thus the size of the resulting square matrix.
@@ -128,7 +135,7 @@ def generate_bob(nuclear_charges, coordinates, atomtypes, asize = {"O":3, "C":7,
128
135
n = 0
129
136
atoms = sorted (asize , key = asize .get )
130
137
nmax = [asize [key ] for key in atoms ]
131
- print (atoms ,nmax )
138
+ # print(atoms,nmax)
132
139
ids = np .zeros (len (nmax ), dtype = int )
133
140
for i , (key , value ) in enumerate (zip (atoms ,nmax )):
134
141
n += value * (1 + value )
@@ -139,3 +146,238 @@ def generate_bob(nuclear_charges, coordinates, atomtypes, asize = {"O":3, "C":7,
139
146
n /= 2
140
147
141
148
return fgenerate_bob (nuclear_charges , coordinates , nuclear_charges , ids , nmax , n )
149
+
150
+
151
+ def get_slatm_mbtypes (nuclear_charges ):
152
+ """
153
+ Get the list of minimal types of many-body terms in a dataset. This resulting list
154
+ is necessary as input in the ``generate_slatm_representation()`` function.
155
+
156
+ :param zs: A list of the nuclear charges for each compound in the dataset.
157
+ :type fs: list of numpy arrays
158
+ :return: A list containing the types of many-body terms.
159
+ :rtype: list
160
+ """
161
+
162
+ # :param pbc: periodic boundary condition along x,y,z direction, defaulted to '000', i.e., molecule
163
+ # :type pbc: string
164
+
165
+ pbc = '000'
166
+
167
+ zs = nuclear_charges
168
+ # zs = [ read_xyz(f)[0] for f in fs ]
169
+ nm = len (zs )
170
+ zsmax = set ()
171
+ nas = []
172
+ zs_ravel = []
173
+ for zsi in zs :
174
+ na = len (zsi ); nas .append (na )
175
+ zsil = list (zsi ); zs_ravel += zsil
176
+ zsmax .update ( zsil )
177
+
178
+ zsmax = np .array ( list (zsmax ) )
179
+ nass = []
180
+ for i in range (nm ):
181
+ zsi = zs [i ]
182
+ nass .append ( [ (zi == zsi ).sum () for zi in zsmax ] )
183
+
184
+ nzmax = np .max (np .array (nass ), axis = 0 )
185
+ nzmax_u = []
186
+ if pbc != '000' :
187
+ # the PBC will introduce new many-body terms, so set
188
+ # nzmax to 3 if it's less than 3
189
+ for nzi in nzmax :
190
+ if nzi <= 2 :
191
+ nzi = 3
192
+ nzmax_u .append (nzi )
193
+ nzmax = nzmax_u
194
+
195
+ boas = [ [zi ,] for zi in zsmax ]
196
+ bops = [ [zi ,zi ] for zi in zsmax ] + list ( itl .combinations (zsmax ,2 ) )
197
+
198
+ bots = []
199
+ for i in zsmax :
200
+ for bop in bops :
201
+ j ,k = bop
202
+ tas = [ [i ,j ,k ], [i ,k ,j ], [j ,i ,k ] ]
203
+ for tasi in tas :
204
+ if (tasi not in bots ) and (tasi [::- 1 ] not in bots ):
205
+ nzsi = [ (zj == tasi ).sum () for zj in zsmax ]
206
+ if np .all (nzsi <= nzmax ):
207
+ bots .append ( tasi )
208
+ mbtypes = boas + bops + bots
209
+
210
+ return mbtypes #, np.array(zs_ravel), np.array(nas)
211
+
212
+
213
+ def generate_slatm_representation (coordinates , nuclear_charges , mbtypes ,
214
+ local = False , sigmas = [0.05 ,0.05 ], dgrids = [0.03 ,0.03 ], rcut = 4.8 ,
215
+ alchemy = False , rpower = 6 , iprt = False ):
216
+ """
217
+ Generate Spectrum of London and Axillrod-Teller-Muto potential (SLATM) representation.
218
+ Both global (``local=False``) and local (``local=True``) SLATM are available.
219
+
220
+ A version that works for periodic boundary conditions will be released soon.
221
+
222
+ NOTE: You will need to run the ``get_slatm_mbtypes()`` function to get the ``mbtypes`` input (or generate it manually).
223
+
224
+ :param coordinates: Input coordinates
225
+ :type coordinates: numpy array
226
+ :param nuclear_charges: List of nuclear charges.
227
+ :type nuclear_charges: numpy array
228
+ :param mbtypes: Many-body types for the whole dataset, including 1-, 2- and 3-body types. Could be obtained by calling ``get_slatm_mbtypes()``.
229
+ :type mbtypes: list
230
+ :param local: Generate a local representation. Defaulted to False (i.e., global representation); otherwise, atomic version.
231
+ :type local: bool
232
+ :param sigmas: Controlling the width of Gaussian smearing function for 2- and 3-body parts, defaulted to [0.05,0.05], usually these do not need to be adjusted.
233
+ :type sigmas: list
234
+ :param dgrids: The interval between two sampled internuclear distances and angles, defaulted to [0.03,0.03], no need for change, compromised for speed and accuracy.
235
+ :type dgrids: list
236
+ :param rcut: Cut-off radius, defaulted to 4.8 Angstrom.
237
+ :type rcut: float
238
+ :param alchemy: Swith to use the alchemy version of SLATM. (default=False)
239
+ :type alchemy: bool
240
+ :param rpower: The power of R in 2-body potential, defaulted to London potential (=6).
241
+ :type rpower: float
242
+ :param iprt: Print debug output if True.
243
+ :type iprt: bool
244
+ :return: 1D SLATM representation
245
+ :rtype: numpy array
246
+ """
247
+ # :param pbc: defaulted to '000', meaning it's a molecule; the three digits in the string corresponds to x,y,z direction
248
+ # :type pbc: string
249
+
250
+ pbc = '000'
251
+
252
+ if pbc != '000' and iprt :
253
+ print (' -- handling systems with periodic boundary condition' )
254
+ # =======================================================================
255
+ # PBC may introduce new many-body terms, so at the stage of get statistics
256
+ # info from db, we've already considered this point by letting maximal number
257
+ # of nuclear charges being 3.
258
+ # =======================================================================
259
+
260
+ m = ase .Atoms (nuclear_charges , coordinates )
261
+ zsm = nuclear_charges
262
+
263
+ iloc = local
264
+ if iloc :
265
+ mbs = []
266
+ na = len (m )
267
+ X2Ns = []
268
+ for ia in range (na ):
269
+ if iprt : print (' -- ia = ' , ia + 1 )
270
+ n1 = 0 ; n2 = 0 ; n3 = 0
271
+ mbs_ia = np .zeros (0 )
272
+ icount = 0
273
+ for mbtype in mbtypes :
274
+ if len (mbtype ) == 1 :
275
+ mbsi = get_boa (mbtype [0 ], np .array ([zsm [ia ],])) #print ' -- mbsi = ', mbsi
276
+ if alchemy :
277
+ n1 = 1
278
+ n1_0 = mbs_ia .shape [0 ]
279
+ if n1_0 == 0 :
280
+ mbs_ia = np .concatenate ( (mbs_ia , mbsi ), axis = 0 )
281
+ elif n1_0 == 1 :
282
+ mbs_ia += mbsi
283
+ else :
284
+ raise '#ERROR'
285
+ else :
286
+ n1 += len (mbsi )
287
+ mbs_ia = np .concatenate ( (mbs_ia , mbsi ), axis = 0 )
288
+ elif len (mbtype ) == 2 :
289
+ #print ' 001, pbc = ', pbc
290
+ mbsi = get_sbop (mbtype , m , zsm , iloc = iloc , ia = ia , \
291
+ sigma = sigmas [0 ], dgrid = dgrids [0 ], rcut = rcut , \
292
+ pbc = pbc , rpower = rpower )[1 ]
293
+ mbsi *= 0.5 # only for the two-body parts, local rpst
294
+ #print ' 002'
295
+ if alchemy :
296
+ n2 = len (mbsi )
297
+ n2_0 = mbs_ia .shape [0 ]
298
+ if n2_0 == n1 :
299
+ mbs_ia = np .concatenate ( (mbs_ia , mbsi ), axis = 0 )
300
+ elif n2_0 == n1 + n2 :
301
+ t = mbs_ia [n1 :n1 + n2 ] + mbsi
302
+ mbs_ia [n1 :n1 + n2 ] = t
303
+ else :
304
+ raise '#ERROR'
305
+ else :
306
+ n2 += len (mbsi )
307
+ mbs_ia = np .concatenate ( (mbs_ia , mbsi ), axis = 0 )
308
+ else : # len(mbtype) == 3:
309
+ mbsi = get_sbot (mbtype , m , zsm , iloc = iloc , ia = ia , \
310
+ sigma = sigmas [1 ], dgrid = dgrids [1 ], rcut = rcut , pbc = pbc )[1 ]
311
+ if alchemy :
312
+ n3 = len (mbsi )
313
+ n3_0 = mbs_ia .shape [0 ]
314
+ if n3_0 == n1 + n2 :
315
+ mbs_ia = np .concatenate ( (mbs_ia , mbsi ), axis = 0 )
316
+ elif n3_0 == n1 + n2 + n3 :
317
+ t = mbs_ia [n1 + n2 :n1 + n2 + n3 ] + mbsi
318
+ mbs_ia [n1 + n2 :n1 + n2 + n3 ] = t
319
+ else :
320
+ raise '#ERROR'
321
+ else :
322
+ n3 += len (mbsi )
323
+ mbs_ia = np .concatenate ( (mbs_ia , mbsi ), axis = 0 )
324
+
325
+ mbs .append ( mbs_ia )
326
+ X2N = [n1 ,n2 ,n3 ];
327
+ if X2N not in X2Ns :
328
+ X2Ns .append (X2N )
329
+ assert len (X2Ns ) == 1 , '#ERROR: multiple `X2N ???'
330
+ else :
331
+ n1 = 0 ; n2 = 0 ; n3 = 0
332
+ mbs = np .zeros (0 )
333
+ for mbtype in mbtypes :
334
+ if len (mbtype ) == 1 :
335
+ mbsi = get_boa (mbtype [0 ], zsm )
336
+ if alchemy :
337
+ n1 = 1
338
+ n1_0 = mbs .shape [0 ]
339
+ if n1_0 == 0 :
340
+ mbs = np .concatenate ( (mbs , [sum (mbsi )] ), axis = 0 )
341
+ elif n1_0 == 1 :
342
+ mbs += sum (mbsi )
343
+ else :
344
+ raise '#ERROR'
345
+ else :
346
+ n1 += len (mbsi )
347
+ mbs = np .concatenate ( (mbs , mbsi ), axis = 0 )
348
+ elif len (mbtype ) == 2 :
349
+ mbsi = get_sbop (mbtype , m , zsm , sigma = sigmas [0 ], \
350
+ dgrid = dgrids [0 ], rcut = rcut , rpower = rpower )[1 ]
351
+
352
+ if alchemy :
353
+ n2 = len (mbsi )
354
+ n2_0 = mbs .shape [0 ]
355
+ if n2_0 == n1 :
356
+ mbs = np .concatenate ( (mbs , mbsi ), axis = 0 )
357
+ elif n2_0 == n1 + n2 :
358
+ t = mbs [n1 :n1 + n2 ] + mbsi
359
+ mbs [n1 :n1 + n2 ] = t
360
+ else :
361
+ raise '#ERROR'
362
+ else :
363
+ n2 += len (mbsi )
364
+ mbs = np .concatenate ( (mbs , mbsi ), axis = 0 )
365
+ else : # len(mbtype) == 3:
366
+ mbsi = get_sbot (mbtype , m , zsm , sigma = sigmas [1 ], \
367
+ dgrid = dgrids [1 ], rcut = rcut )[1 ]
368
+ if alchemy :
369
+ n3 = len (mbsi )
370
+ n3_0 = mbs .shape [0 ]
371
+ if n3_0 == n1 + n2 :
372
+ mbs = np .concatenate ( (mbs , mbsi ), axis = 0 )
373
+ elif n3_0 == n1 + n2 + n3 :
374
+ t = mbs [n1 + n2 :n1 + n2 + n3 ] + mbsi
375
+ mbs [n1 + n2 :n1 + n2 + n3 ] = t
376
+ else :
377
+ raise '#ERROR'
378
+ else :
379
+ n3 += len (mbsi )
380
+ mbs = np .concatenate ( (mbs , mbsi ), axis = 0 )
381
+
382
+ return mbs
383
+
0 commit comments