qmlcode · andersx · Jul 3, 2017 · Jul 3, 2017 · Jul 3, 2017 · Jul 3, 2017
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
@@ -106,19 +106,25 @@ The easiest way to calculate the kernel matrix using an explicit, local represen
 .. code:: python
 
     import numpy as np
-    from qml.wrappers import get_atomic_kernels_gaussian
+    from qml.kernels import get_local_kernels_gaussian
 
     # Assume the QM7 dataset is loaded into a list of Compound()
     for compound in qm7:
 
         # Generate the desired representation for each compound
         compound.generate_atomic_coulomb_matrix(size=23, sort="row-norm")
 
+    # Make a big array with all the atomic representations
+    X = np.concatenate([mol.representation for mol in qm7])
+
+    # Make an array with the number of atoms in each compound
+    N = np.array([mol.natoms for mol in qm7])
+
     # List of kernel-widths
     sigmas = [50.0, 100.0, 200.0]
 
     # Calculate the kernel-matrix
-    K = get_atomic_kernels_gaussian(qm7, qm7, sigmas)
+    K = get_local_kernels_gaussian(X, X, N, N, sigmas)
 
     print(K.shape)
 

diff --git a/qml/fkernels.f90 b/qml/fkernels.f90
@@ -20,6 +20,181 @@
 ! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 ! SOFTWARE.
 
+subroutine fget_local_kernels_gaussian(q1, q2, n1, n2, sigmas, &
+        & nm1, nm2, nsigmas, kernels)
+
+    implicit none
+
+    double precision, dimension(:,:), intent(in) :: q1
+    double precision, dimension(:,:), intent(in) :: q2
+
+    ! List of numbers of atoms in each molecule
+    integer, dimension(:), intent(in) :: n1
+    integer, dimension(:), intent(in) :: n2
+
+    ! Sigma in the Gaussian kernel
+    double precision, dimension(:), intent(in) :: sigmas
+
+    ! Number of molecules
+    integer, intent(in) :: nm1
+    integer, intent(in) :: nm2
+
+    ! Number of sigmas
+    integer, intent(in) :: nsigmas
+
+    ! -1.0 / sigma^2 for use in the kernel
+    double precision, dimension(nsigmas) :: inv_sigma2
+
+    ! Resulting alpha vector
+    double precision, dimension(nsigmas,nm1,nm2), intent(out) :: kernels
+
+    ! Internal counters
+    integer :: a, b, i, j, k, ni, nj
+
+    ! Temporary variables necessary for parallelization
+    double precision, allocatable, dimension(:,:) :: atomic_distance
+
+    integer, allocatable, dimension(:) :: i_starts
+    integer, allocatable, dimension(:) :: j_starts
+
+    allocate(i_starts(nm1))
+    allocate(j_starts(nm2))
+
+    !$OMP PARALLEL DO
+    do i = 1, nm1
+        i_starts(i) = sum(n1(:i)) - n1(i)
+    enddo
+    !$OMP END PARALLEL DO
+
+    !$OMP PARALLEL DO
+    do j = 1, nm2
+        j_starts(j) = sum(n2(:j)) - n2(j)
+    enddo
+    !$OMP END PARALLEL DO
+
+    inv_sigma2(:) = -0.5d0 / (sigmas(:))**2
+    kernels(:,:,:) = 0.0d0
+
+    allocate(atomic_distance(maxval(n1), maxval(n2)))
+    atomic_distance(:,:) = 0.0d0
+
+    !$OMP PARALLEL DO PRIVATE(atomic_distance,ni,nj)
+     do a = 1, nm1
+        ni = n1(a)
+        do b = 1, nm2
+            nj = n2(b)
+
+            atomic_distance(:,:) = 0.0d0
+            do i = 1, ni
+                do j = 1, nj
+
+                    atomic_distance(i, j) = sum((q1(:,i + i_starts(a)) - q2(:,j + j_starts(b)))**2)
+
+                enddo
+            enddo
+
+            do k = 1, nsigmas
+                kernels(k, a, b) =  sum(exp(atomic_distance(:ni,:nj) * inv_sigma2(k)))
+            enddo
+
+        enddo
+    enddo
+    !$OMP END PARALLEL DO
+
+    deallocate(atomic_distance)
+    deallocate(i_starts)
+    deallocate(j_starts)
+
+end subroutine fget_local_kernels_gaussian
+
+subroutine fget_local_kernels_laplacian(q1, q2, n1, n2, sigmas, &
+        & nm1, nm2, nsigmas, kernels)
+
+    implicit none
+
+    double precision, dimension(:,:), intent(in) :: q1
+    double precision, dimension(:,:), intent(in) :: q2
+
+    ! List of numbers of atoms in each molecule
+    integer, dimension(:), intent(in) :: n1
+    integer, dimension(:), intent(in) :: n2
+
+    ! Sigma in the Gaussian kernel
+    double precision, dimension(:), intent(in) :: sigmas
+
+    ! Number of molecules
+    integer, intent(in) :: nm1
+    integer, intent(in) :: nm2
+
+    ! Number of sigmas
+    integer, intent(in) :: nsigmas
+
+    ! -1.0 / sigma^2 for use in the kernel
+    double precision, dimension(nsigmas) :: inv_sigma2
+
+    ! Resulting alpha vector
+    double precision, dimension(nsigmas,nm1,nm2), intent(out) :: kernels
+
+    ! Internal counters
+    integer :: a, b, i, j, k, ni, nj
+
+    ! Temporary variables necessary for parallelization
+    double precision, allocatable, dimension(:,:) :: atomic_distance
+
+    integer, allocatable, dimension(:) :: i_starts
+    integer, allocatable, dimension(:) :: j_starts
+
+    allocate(i_starts(nm1))
+    allocate(j_starts(nm2))
+
+    !$OMP PARALLEL DO
+    do i = 1, nm1
+        i_starts(i) = sum(n1(:i)) - n1(i)
+    enddo
+    !$OMP END PARALLEL DO
+
+    !$OMP PARALLEL DO
+    do j = 1, nm2
+        j_starts(j) = sum(n2(:j)) - n2(j)
+    enddo
+    !$OMP END PARALLEL DO
+
+    inv_sigma2(:) = -1.0d0 / sigmas(:)
+    kernels(:,:,:) = 0.0d0
+
+    allocate(atomic_distance(maxval(n1), maxval(n2)))
+    atomic_distance(:,:) = 0.0d0
+
+    !$OMP PARALLEL DO PRIVATE(atomic_distance,ni,nj)
+     do a = 1, nm1
+        ni = n1(a)
+        do b = 1, nm2
+            nj = n2(b)
+
+            atomic_distance(:,:) = 0.0d0
+            do i = 1, ni
+                do j = 1, nj
+
+                    atomic_distance(i, j) = sum(abs(q1(:,i + i_starts(a)) - q2(:,j + j_starts(b))))
+
+                enddo
+            enddo
+
+
+            do k = 1, nsigmas
+                kernels(k, a, b) =  sum(exp(atomic_distance(:ni,:nj) * inv_sigma2(k)))
+            enddo
+
+        enddo
+    enddo
+    !$OMP END PARALLEL DO
+
+    deallocate(atomic_distance)
+    deallocate(i_starts)
+    deallocate(j_starts)
+
+end subroutine fget_local_kernels_laplacian
+
 subroutine fget_vector_kernels_laplacian(q1, q2, n1, n2, sigmas, &
         & nm1, nm2, nsigmas, kernels)
 

diff --git a/qml/kernels.py b/qml/kernels.py
@@ -29,6 +29,9 @@
 from .fkernels import fsargan_kernel
 from .fkernels import fmatern_kernel_l2
 
+from .fkernels import fget_local_kernels_gaussian
+from .fkernels import fget_local_kernels_laplacian
+
 def laplacian_kernel(A, B, sigma):
     """ Calculates the Laplacian kernel matrix K, where :math:`K_{ij}`:
 
@@ -183,3 +186,84 @@ def matern_kernel(A, B, sigma, order = 0, metric = "l1"):
 
     return K
 
+def get_local_kernels_gaussian(A, B, na, nb, sigmas):
+    """ Calculates the Gaussian kernel matrix K, for a local representation where :math:`K_{ij}`:
+
+            :math:`K_{ij} = \sum_{a \in i} \sum_{b \in j} \\exp \\big( -\\frac{\\|A_a - B_b\\|_2^2}{2\sigma^2} \\big)`
+
+        Where :math:`A_{a}` and :math:`B_{b}` are representation vectors.
+
+        Note that the input array is one big 2D array with all atoms concatenated along the same axis.
+        Further more a series of kernels is produced (since calculating the distance matrix is expensive
+        but getting the resulting kernels elements for several sigmas is not.)
+
+        K is calculated using an OpenMP parallel Fortran routine.
+
+        :param A: 2D array of descriptors - shape (total atoms A, representation size).
+        :type A: numpy array
+        :param B: 2D array of descriptors - shape (total atoms B, representation size).
+        :type B: numpy array
+        :param na: 1D array containing numbers of atoms in each compound.
+        :type na: numpy array
+        :param nb: 1D array containing numbers of atoms in each compound.
+        :type nb: numpy array
+        :param sigma: The value of sigma in the kernel matrix.
+        :type sigma: float
+
+        :return: The Gaussian kernel matrix - shape (nsigmas, N, M)
+        :rtype: numpy array
+    """
+
+    assert np.sum(na) == A.shape[0], "Error in A input"
+    assert np.sum(nb) == B.shape[0], "Error in B input"
+
+    assert A.shape[1] == B.shape[1], "Error in representation sizes"
+
+    nma = len(na)
+    nmb = len(nb)
+
+    sigmas = np.asarray(sigmas)
+    nsigmas = len(sigmas)
+
+    return fget_local_kernels_gaussian(A.T, B.T, na, nb, sigmas, nma, nmb, nsigmas)
+
+def get_local_kernels_laplacian(A, B, na, nb, sigmas):
+    """ Calculates the Local Laplacian kernel matrix K, for a local representation where :math:`K_{ij}`:
+
+            :math:`K_{ij} = \sum_{a \in i} \sum_{b \in j} \\exp \\big( -\\frac{\\|A_a - B_b\\|_1}{\sigma} \\big)`
+
+        Where :math:`A_{a}` and :math:`B_{b}` are representation vectors.
+
+        Note that the input array is one big 2D array with all atoms concatenated along the same axis.
+        Further more a series of kernels is produced (since calculating the distance matrix is expensive
+        but getting the resulting kernels elements for several sigmas is not.)
+
+        K is calculated using an OpenMP parallel Fortran routine.
+
+        :param A: 2D array of descriptors - shape (N, representation size).
+        :type A: numpy array
+        :param B: 2D array of descriptors - shape (M, representation size).
+        :type B: numpy array
+        :param na: 1D array containing numbers of atoms in each compound.
+        :type na: numpy array
+        :param nb: 1D array containing numbers of atoms in each compound.
+        :type nb: numpy array
+        :param sigmas: List of the sigmas.
+        :type sigmas: list
+
+        :return: The Laplacian kernel matrix - shape (nsigmas, N, M)
+        :rtype: numpy array
+    """
+
+    assert np.sum(na) == A.shape[0], "Error in A input"
+    assert np.sum(nb) == B.shape[0], "Error in B input"
+
+    assert A.shape[1] == B.shape[1], "Error in representation sizes"
+
+    nma = len(na)
+    nmb = len(nb)
+
+    sigmas = np.asarray(sigmas)
+    nsigmas = len(sigmas)
+
+    return fget_local_kernels_laplacian(A.T, B.T, na, nb, sigmas, nma, nmb, nsigmas)
diff --git a/qml/wrappers.py b/qml/wrappers.py
@@ -24,6 +24,7 @@
 
 from .fkernels import fget_vector_kernels_gaussian
 from .fkernels import fget_vector_kernels_laplacian
+from .fkernels import fget_local_kernels_gaussian
 
 from .arad import get_local_kernels_arad
 from .arad import get_local_symmetric_kernels_arad
@@ -55,7 +56,6 @@ def get_atomic_kernels_laplacian(mols1, mols2, sigmas):
     x1 = np.swapaxes(x1, 0, 2)
     x2 = np.swapaxes(x2, 0, 2)
 
-
     sigmas = np.asarray(sigmas, dtype=np.float64)
     nsigmas = sigmas.size