Skip to content
This repository was archived by the owner on Dec 8, 2024. It is now read-only.

Atomic kernels memory #27

Merged
merged 3 commits into from
Jul 3, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions docs/source/examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -106,19 +106,25 @@ The easiest way to calculate the kernel matrix using an explicit, local represen
.. code:: python

import numpy as np
from qml.wrappers import get_atomic_kernels_gaussian
from qml.kernels import get_local_kernels_gaussian

# Assume the QM7 dataset is loaded into a list of Compound()
for compound in qm7:

# Generate the desired representation for each compound
compound.generate_atomic_coulomb_matrix(size=23, sort="row-norm")

# Make a big array with all the atomic representations
X = np.concatenate([mol.representation for mol in qm7])

# Make an array with the number of atoms in each compound
N = np.array([mol.natoms for mol in qm7])

# List of kernel-widths
sigmas = [50.0, 100.0, 200.0]

# Calculate the kernel-matrix
K = get_atomic_kernels_gaussian(qm7, qm7, sigmas)
K = get_local_kernels_gaussian(X, X, N, N, sigmas)

print(K.shape)

Expand Down
175 changes: 175 additions & 0 deletions qml/fkernels.f90
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,181 @@
! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
! SOFTWARE.

subroutine fget_local_kernels_gaussian(q1, q2, n1, n2, sigmas, &
& nm1, nm2, nsigmas, kernels)

implicit none

double precision, dimension(:,:), intent(in) :: q1
double precision, dimension(:,:), intent(in) :: q2

! List of numbers of atoms in each molecule
integer, dimension(:), intent(in) :: n1
integer, dimension(:), intent(in) :: n2

! Sigma in the Gaussian kernel
double precision, dimension(:), intent(in) :: sigmas

! Number of molecules
integer, intent(in) :: nm1
integer, intent(in) :: nm2

! Number of sigmas
integer, intent(in) :: nsigmas

! -1.0 / sigma^2 for use in the kernel
double precision, dimension(nsigmas) :: inv_sigma2

! Resulting alpha vector
double precision, dimension(nsigmas,nm1,nm2), intent(out) :: kernels

! Internal counters
integer :: a, b, i, j, k, ni, nj

! Temporary variables necessary for parallelization
double precision, allocatable, dimension(:,:) :: atomic_distance

integer, allocatable, dimension(:) :: i_starts
integer, allocatable, dimension(:) :: j_starts

allocate(i_starts(nm1))
allocate(j_starts(nm2))

!$OMP PARALLEL DO
do i = 1, nm1
i_starts(i) = sum(n1(:i)) - n1(i)
enddo
!$OMP END PARALLEL DO

!$OMP PARALLEL DO
do j = 1, nm2
j_starts(j) = sum(n2(:j)) - n2(j)
enddo
!$OMP END PARALLEL DO

inv_sigma2(:) = -0.5d0 / (sigmas(:))**2
kernels(:,:,:) = 0.0d0

allocate(atomic_distance(maxval(n1), maxval(n2)))
atomic_distance(:,:) = 0.0d0

!$OMP PARALLEL DO PRIVATE(atomic_distance,ni,nj)
do a = 1, nm1
ni = n1(a)
do b = 1, nm2
nj = n2(b)

atomic_distance(:,:) = 0.0d0
do i = 1, ni
do j = 1, nj

atomic_distance(i, j) = sum((q1(:,i + i_starts(a)) - q2(:,j + j_starts(b)))**2)

enddo
enddo

do k = 1, nsigmas
kernels(k, a, b) = sum(exp(atomic_distance(:ni,:nj) * inv_sigma2(k)))
enddo

enddo
enddo
!$OMP END PARALLEL DO

deallocate(atomic_distance)
deallocate(i_starts)
deallocate(j_starts)

end subroutine fget_local_kernels_gaussian

subroutine fget_local_kernels_laplacian(q1, q2, n1, n2, sigmas, &
& nm1, nm2, nsigmas, kernels)

implicit none

double precision, dimension(:,:), intent(in) :: q1
double precision, dimension(:,:), intent(in) :: q2

! List of numbers of atoms in each molecule
integer, dimension(:), intent(in) :: n1
integer, dimension(:), intent(in) :: n2

! Sigma in the Gaussian kernel
double precision, dimension(:), intent(in) :: sigmas

! Number of molecules
integer, intent(in) :: nm1
integer, intent(in) :: nm2

! Number of sigmas
integer, intent(in) :: nsigmas

! -1.0 / sigma^2 for use in the kernel
double precision, dimension(nsigmas) :: inv_sigma2

! Resulting alpha vector
double precision, dimension(nsigmas,nm1,nm2), intent(out) :: kernels

! Internal counters
integer :: a, b, i, j, k, ni, nj

! Temporary variables necessary for parallelization
double precision, allocatable, dimension(:,:) :: atomic_distance

integer, allocatable, dimension(:) :: i_starts
integer, allocatable, dimension(:) :: j_starts

allocate(i_starts(nm1))
allocate(j_starts(nm2))

!$OMP PARALLEL DO
do i = 1, nm1
i_starts(i) = sum(n1(:i)) - n1(i)
enddo
!$OMP END PARALLEL DO

!$OMP PARALLEL DO
do j = 1, nm2
j_starts(j) = sum(n2(:j)) - n2(j)
enddo
!$OMP END PARALLEL DO

inv_sigma2(:) = -1.0d0 / sigmas(:)
kernels(:,:,:) = 0.0d0

allocate(atomic_distance(maxval(n1), maxval(n2)))
atomic_distance(:,:) = 0.0d0

!$OMP PARALLEL DO PRIVATE(atomic_distance,ni,nj)
do a = 1, nm1
ni = n1(a)
do b = 1, nm2
nj = n2(b)

atomic_distance(:,:) = 0.0d0
do i = 1, ni
do j = 1, nj

atomic_distance(i, j) = sum(abs(q1(:,i + i_starts(a)) - q2(:,j + j_starts(b))))

enddo
enddo


do k = 1, nsigmas
kernels(k, a, b) = sum(exp(atomic_distance(:ni,:nj) * inv_sigma2(k)))
enddo

enddo
enddo
!$OMP END PARALLEL DO

deallocate(atomic_distance)
deallocate(i_starts)
deallocate(j_starts)

end subroutine fget_local_kernels_laplacian

subroutine fget_vector_kernels_laplacian(q1, q2, n1, n2, sigmas, &
& nm1, nm2, nsigmas, kernels)

Expand Down
84 changes: 84 additions & 0 deletions qml/kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
from .fkernels import fsargan_kernel
from .fkernels import fmatern_kernel_l2

from .fkernels import fget_local_kernels_gaussian
from .fkernels import fget_local_kernels_laplacian

def laplacian_kernel(A, B, sigma):
""" Calculates the Laplacian kernel matrix K, where :math:`K_{ij}`:

Expand Down Expand Up @@ -183,3 +186,84 @@ def matern_kernel(A, B, sigma, order = 0, metric = "l1"):

return K

def get_local_kernels_gaussian(A, B, na, nb, sigmas):
""" Calculates the Gaussian kernel matrix K, for a local representation where :math:`K_{ij}`:

:math:`K_{ij} = \sum_{a \in i} \sum_{b \in j} \\exp \\big( -\\frac{\\|A_a - B_b\\|_2^2}{2\sigma^2} \\big)`

Where :math:`A_{a}` and :math:`B_{b}` are representation vectors.

Note that the input array is one big 2D array with all atoms concatenated along the same axis.
Further more a series of kernels is produced (since calculating the distance matrix is expensive
but getting the resulting kernels elements for several sigmas is not.)

K is calculated using an OpenMP parallel Fortran routine.

:param A: 2D array of descriptors - shape (total atoms A, representation size).
:type A: numpy array
:param B: 2D array of descriptors - shape (total atoms B, representation size).
:type B: numpy array
:param na: 1D array containing numbers of atoms in each compound.
:type na: numpy array
:param nb: 1D array containing numbers of atoms in each compound.
:type nb: numpy array
:param sigma: The value of sigma in the kernel matrix.
:type sigma: float

:return: The Gaussian kernel matrix - shape (nsigmas, N, M)
:rtype: numpy array
"""

assert np.sum(na) == A.shape[0], "Error in A input"
assert np.sum(nb) == B.shape[0], "Error in B input"

assert A.shape[1] == B.shape[1], "Error in representation sizes"

nma = len(na)
nmb = len(nb)

sigmas = np.asarray(sigmas)
nsigmas = len(sigmas)

return fget_local_kernels_gaussian(A.T, B.T, na, nb, sigmas, nma, nmb, nsigmas)

def get_local_kernels_laplacian(A, B, na, nb, sigmas):
""" Calculates the Local Laplacian kernel matrix K, for a local representation where :math:`K_{ij}`:

:math:`K_{ij} = \sum_{a \in i} \sum_{b \in j} \\exp \\big( -\\frac{\\|A_a - B_b\\|_1}{\sigma} \\big)`

Where :math:`A_{a}` and :math:`B_{b}` are representation vectors.

Note that the input array is one big 2D array with all atoms concatenated along the same axis.
Further more a series of kernels is produced (since calculating the distance matrix is expensive
but getting the resulting kernels elements for several sigmas is not.)

K is calculated using an OpenMP parallel Fortran routine.

:param A: 2D array of descriptors - shape (N, representation size).
:type A: numpy array
:param B: 2D array of descriptors - shape (M, representation size).
:type B: numpy array
:param na: 1D array containing numbers of atoms in each compound.
:type na: numpy array
:param nb: 1D array containing numbers of atoms in each compound.
:type nb: numpy array
:param sigmas: List of the sigmas.
:type sigmas: list

:return: The Laplacian kernel matrix - shape (nsigmas, N, M)
:rtype: numpy array
"""

assert np.sum(na) == A.shape[0], "Error in A input"
assert np.sum(nb) == B.shape[0], "Error in B input"

assert A.shape[1] == B.shape[1], "Error in representation sizes"

nma = len(na)
nmb = len(nb)

sigmas = np.asarray(sigmas)
nsigmas = len(sigmas)

return fget_local_kernels_laplacian(A.T, B.T, na, nb, sigmas, nma, nmb, nsigmas)
2 changes: 1 addition & 1 deletion qml/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

from .fkernels import fget_vector_kernels_gaussian
from .fkernels import fget_vector_kernels_laplacian
from .fkernels import fget_local_kernels_gaussian

from .arad import get_local_kernels_arad
from .arad import get_local_symmetric_kernels_arad
Expand Down Expand Up @@ -55,7 +56,6 @@ def get_atomic_kernels_laplacian(mols1, mols2, sigmas):
x1 = np.swapaxes(x1, 0, 2)
x2 = np.swapaxes(x2, 0, 2)


sigmas = np.asarray(sigmas, dtype=np.float64)
nsigmas = sigmas.size

Expand Down
Loading