Skip to content

Commit 88686fb

Browse files
committed
Made omp less memory intensive in acsf, including a fix to qmlcode#90
1 parent e8224b5 commit 88686fb

File tree

1 file changed

+53
-29
lines changed

1 file changed

+53
-29
lines changed

qml/representations/facsf.f90

+53-29
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ subroutine fgenerate_acsf(coordinates, nuclear_charges, elements, &
102102
integer, allocatable, dimension(:) :: element_types
103103
double precision :: rij, rik, angle, invcut
104104
double precision, allocatable, dimension(:) :: radial, angular, a, b, c
105-
double precision, allocatable, dimension(:, :) :: distance_matrix, rdecay, rep3
105+
double precision, allocatable, dimension(:, :) :: distance_matrix, rdecay, rep_subset
106106

107107
double precision, parameter :: pi = 4.0d0 * atan(1.0d0)
108108

@@ -158,27 +158,34 @@ subroutine fgenerate_acsf(coordinates, nuclear_charges, elements, &
158158

159159
! Allocate temporary
160160
allocate(radial(nbasis2))
161+
allocate(rep_subset(natoms, nelements * nbasis2))
161162

162-
!$OMP PARALLEL DO PRIVATE(n,m,rij,radial) REDUCTION(+:rep)
163+
rep_subset = 0.0d0
164+
165+
!$OMP PARALLEL DO PRIVATE(n,m,rij,radial) COLLAPSE(2) REDUCTION(+:rep_subset) SCHEDULE(dynamic)
163166
do i = 1, natoms
164-
! index of the element of atom i
165-
m = element_types(i)
166-
do j = i + 1, natoms
167-
! index of the element of atom j
168-
n = element_types(j)
169-
! distance between atoms i and j
167+
do j = 1, natoms
168+
if (j .le. i) cycle
170169
rij = distance_matrix(i,j)
171170
if (rij <= rcut) then
171+
! index of the element of atom i
172+
m = element_types(i)
173+
! index of the element of atom j
174+
n = element_types(j)
175+
! distance between atoms i and j
172176
! two body term of the representation
173177
radial = exp(-eta2*(rij - Rs2)**2) * rdecay(i,j)
174-
rep(i, (n-1)*nbasis2 + 1:n*nbasis2) = rep(i, (n-1)*nbasis2 + 1:n*nbasis2) + radial
175-
rep(j, (m-1)*nbasis2 + 1:m*nbasis2) = rep(j, (m-1)*nbasis2 + 1:m*nbasis2) + radial
178+
rep_subset(i, (n-1)*nbasis2 + 1:n*nbasis2) = rep_subset(i, (n-1)*nbasis2 + 1:n*nbasis2) + radial
179+
rep_subset(j, (m-1)*nbasis2 + 1:m*nbasis2) = rep_subset(j, (m-1)*nbasis2 + 1:m*nbasis2) + radial
176180
endif
177181
enddo
178182
enddo
179183
!$OMP END PARALLEL DO
180184

185+
rep(:, 1:nelements * nbasis2) = rep_subset(:,:)
186+
181187
deallocate(radial)
188+
deallocate(rep_subset)
182189

183190
! number of radial basis functions in the three body term
184191
nbasis3 = size(Rs3)
@@ -191,19 +198,19 @@ subroutine fgenerate_acsf(coordinates, nuclear_charges, elements, &
191198
rdecay = decay(distance_matrix, invcut, natoms)
192199

193200
! Allocate temporary
194-
allocate(rep3(natoms,rep_size))
201+
allocate(rep_subset(natoms,rep_size - nelements * nbasis2))
195202
allocate(a(3))
196203
allocate(b(3))
197204
allocate(c(3))
198205
allocate(radial(nbasis3))
199206
allocate(angular(nabasis))
200207

201-
rep3 = 0.0d0
208+
rep_subset = 0.0d0
202209

203210
! This could probably be done more efficiently if it's a bottleneck
204211
! Also the order is a bit wobbly compared to the tensorflow implementation
205212
!$OMP PARALLEL DO PRIVATE(rij, n, rik, m, a, b, c, angle, radial, angular, &
206-
!$OMP p, q, s, z) REDUCTION(+:rep3) COLLAPSE(2) SCHEDULE(dynamic)
213+
!$OMP p, q, s, z) REDUCTION(+:rep_subset) COLLAPSE(2) SCHEDULE(dynamic)
207214
do i = 1, natoms
208215
do j = 1, natoms - 1
209216
if (i .eq. j) cycle
@@ -234,24 +241,24 @@ subroutine fgenerate_acsf(coordinates, nuclear_charges, elements, &
234241
! The highest of the element indices for atoms j and k
235242
q = max(n,m) - 1
236243
! calculate the indices that the three body terms should be added to
237-
s = nelements * nbasis2 + nbasis3 * nabasis * (-(p * (p + 1))/2 + q + nelements * p) + 1
244+
s = nbasis3 * nabasis * (-(p * (p + 1))/2 + q + nelements * p) + 1
238245
do l = 1, nbasis3
239246
! calculate the indices that the three body terms should be added to
240247
z = s + (l-1) * nabasis
241248
! Add the contributions from atoms i,j and k
242-
rep3(i, z:z + nabasis - 1) = rep3(i, z:z + nabasis - 1) + angular * radial(l)
249+
rep_subset(i, z:z + nabasis - 1) = rep_subset(i, z:z + nabasis - 1) + angular * radial(l)
243250
enddo
244251
enddo
245252
enddo
246253
enddo
247254
!$OMP END PARALLEL DO
248255

249-
rep = rep + rep3
256+
rep(:, nelements * nbasis2 + 1:) = rep_subset(:,:)
250257

251258
deallocate(element_types)
252259
deallocate(rdecay)
253260
deallocate(distance_matrix)
254-
deallocate(rep3)
261+
deallocate(rep_subset)
255262
deallocate(a)
256263
deallocate(b)
257264
deallocate(c)
@@ -294,7 +301,9 @@ subroutine fgenerate_acsf_and_gradients(coordinates, nuclear_charges, elements,
294301
double precision, allocatable, dimension(:) :: d_angular_d_i, d_angular_d_j, d_angular_d_k
295302
double precision, allocatable, dimension(:, :) :: distance_matrix, rdecay, sq_distance_matrix
296303
double precision, allocatable, dimension(:, :) :: inv_distance_matrix, inv_sq_distance_matrix
304+
double precision, allocatable, dimension(:, :) :: rep_subset
297305
double precision, allocatable, dimension(:, :, :) :: atom_grad
306+
double precision, allocatable, dimension(:, :, :, :) :: grad_subset
298307

299308
double precision, parameter :: pi = 4.0d0 * atan(1.0d0)
300309

@@ -339,7 +348,8 @@ subroutine fgenerate_acsf_and_gradients(coordinates, nuclear_charges, elements,
339348

340349
!$OMP PARALLEL DO PRIVATE(rij,rij2,invrij,invrij2) SCHEDULE(dynamic)
341350
do i = 1, natoms
342-
do j = i+1, natoms
351+
do j = 1, natoms
352+
if (j .le. i) cycle
343353
rij = norm2(coordinates(j,:) - coordinates(i,:))
344354
distance_matrix(i, j) = rij
345355
distance_matrix(j, i) = rij
@@ -370,9 +380,14 @@ subroutine fgenerate_acsf_and_gradients(coordinates, nuclear_charges, elements,
370380
allocate(radial(nbasis2))
371381
allocate(radial_part(nbasis2))
372382
allocate(part(nbasis2))
383+
allocate(rep_subset(natoms, nelements * nbasis2))
384+
allocate(grad_subset(natoms, nelements * nbasis2, natoms, 3))
373385

374-
!!$OMP PARALLEL DO PRIVATE(m,n,rij,invrij,radial_base,radial,radial_part,part) REDUCTION(+:rep,grad) &
375-
!!$OMP SCHEDULE(dynamic)
386+
grad_subset = 0.0d0
387+
rep_subset = 0.0d0
388+
389+
!$OMP PARALLEL DO PRIVATE(m,n,rij,invrij,radial_base,radial,radial_part,part) REDUCTION(+:rep_subset,grad_subset) &
390+
!$OMP SCHEDULE(dynamic)
376391
do i = 1, natoms
377392
! The element index of atom i
378393
m = element_types(i)
@@ -388,28 +403,37 @@ subroutine fgenerate_acsf_and_gradients(coordinates, nuclear_charges, elements,
388403
! The full two body term between atom i and j
389404
radial = radial_base * rdecay(i,j)
390405
! Add the contributions from atoms i and j
391-
rep(i, (n-1)*nbasis2 + 1:n*nbasis2) = rep(i, (n-1)*nbasis2 + 1:n*nbasis2) + radial
392-
rep(j, (m-1)*nbasis2 + 1:m*nbasis2) = rep(j, (m-1)*nbasis2 + 1:m*nbasis2) + radial
406+
rep_subset(i, (n-1)*nbasis2 + 1:n*nbasis2) = rep_subset(i, (n-1)*nbasis2 + 1:n*nbasis2) + radial
407+
rep_subset(j, (m-1)*nbasis2 + 1:m*nbasis2) = rep_subset(j, (m-1)*nbasis2 + 1:m*nbasis2) + radial
393408
! Part of the gradients that can be reused for x,y and z coordinates
394409
radial_part = - radial_base * invrij * (2.0d0 * eta2 * (rij - Rs2) * rdecay(i,j) + &
395410
& 0.5d0 * pi * sin(pi*rij * invcut) * invcut)
396411
do k = 1, 3
397412
! The gradients wrt coordinates
398413
part = radial_part * (coordinates(i,k) - coordinates(j,k))
399-
grad(i, (n-1)*nbasis2 + 1:n*nbasis2, i, k) = grad(i, (n-1)*nbasis2 + 1:n*nbasis2, i, k) + part
400-
grad(i, (n-1)*nbasis2 + 1:n*nbasis2, j, k) = grad(i, (n-1)*nbasis2 + 1:n*nbasis2, j, k) - part
401-
grad(j, (m-1)*nbasis2 + 1:m*nbasis2, j, k) = grad(j, (m-1)*nbasis2 + 1:m*nbasis2, j, k) - part
402-
grad(j, (m-1)*nbasis2 + 1:m*nbasis2, i, k) = grad(j, (m-1)*nbasis2 + 1:m*nbasis2, i, k) + part
414+
grad_subset(i, (n-1)*nbasis2 + 1:n*nbasis2, i, k) = &
415+
grad_subset(i, (n-1)*nbasis2 + 1:n*nbasis2, i, k) + part
416+
grad_subset(i, (n-1)*nbasis2 + 1:n*nbasis2, j, k) = &
417+
grad_subset(i, (n-1)*nbasis2 + 1:n*nbasis2, j, k) - part
418+
grad_subset(j, (m-1)*nbasis2 + 1:m*nbasis2, j, k) = &
419+
grad_subset(j, (m-1)*nbasis2 + 1:m*nbasis2, j, k) - part
420+
grad_subset(j, (m-1)*nbasis2 + 1:m*nbasis2, i, k) = &
421+
grad_subset(j, (m-1)*nbasis2 + 1:m*nbasis2, i, k) + part
403422
enddo
404423
endif
405424
enddo
406425
enddo
407-
!!$OMP END PARALLEL DO
426+
!$OMP END PARALLEL DO
427+
428+
rep(:,:nelements*nbasis2) = rep_subset(:,:)
429+
grad(:,:nelements*nbasis2,:,:) = grad_subset(:,:,:,:)
408430

409431
deallocate(radial_base)
410432
deallocate(radial)
411433
deallocate(radial_part)
412434
deallocate(part)
435+
deallocate(rep_subset)
436+
deallocate(grad_subset)
413437

414438

415439
! Number of radial basis functions in the three body term
@@ -549,8 +573,8 @@ subroutine fgenerate_acsf_and_gradients(coordinates, nuclear_charges, elements,
549573
enddo
550574
enddo
551575
enddo
552-
rep(i, twobody_size + 1:) = rep(i, twobody_size + 1:) + atom_rep
553-
grad(i, twobody_size + 1:,:,:) = grad(i, twobody_size + 1:,:,:) + atom_grad
576+
rep(i, twobody_size + 1:) = atom_rep
577+
grad(i, twobody_size + 1:,:,:) = atom_grad
554578
enddo
555579
!$OMP END PARALLEL DO
556580

0 commit comments

Comments
 (0)