From 52568f1bb75bd431d30696ff5aa61e3aefd07e83 Mon Sep 17 00:00:00 2001 From: "Anders S. Christensen" Date: Mon, 26 Jun 2017 11:29:45 +0200 Subject: [PATCH 1/4] Added linear kernel and test --- qml/fkernels.f90 | 47 +++++++++++++++++++++++++++++++++++++++++ qml/kernels.py | 31 +++++++++++++++++++++++++++ tests/test_kernels.py | 49 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 125 insertions(+), 2 deletions(-) diff --git a/qml/fkernels.f90 b/qml/fkernels.f90 index 3547d40be..0ab6d13e3 100644 --- a/qml/fkernels.f90 +++ b/qml/fkernels.f90 @@ -225,6 +225,53 @@ subroutine flaplacian_kernel(a, na, b, nb, k, sigma) end subroutine flaplacian_kernel +subroutine flinear_kernel(a, na, b, nb, k) + + implicit none + + double precision, dimension(:,:), intent(in) :: a + double precision, dimension(:,:), intent(in) :: b + + integer, intent(in) :: na, nb + + double precision, dimension(:,:), intent(inout) :: k + + integer :: i, j + + double precision, allocatable, dimension(:,:) :: a_normed + double precision, allocatable, dimension(:,:) :: b_normed + + allocate(a_normed(size(a, dim=1),size(a, dim=2))) + allocate(b_normed(size(b, dim=1),size(b, dim=2))) + + +!$OMP PARALLEL DO + do i = 1, na + a_normed(:,i) = a(:,i) / norm2(a(:,i)) + enddo +!$OMP END PARALLEL DO + +!$OMP PARALLEL DO + do i = 1, nb + b_normed(:,i) = b(:,i) / norm2(b(:,i)) + enddo +!$OMP END PARALLEL DO + + +!$OMP PARALLEL DO + do i = 1, nb + do j = 1, na + k(j,i) = dot_product(a_normed(:,j), b_normed(:,i)) + enddo + enddo +!$OMP END PARALLEL DO + + deallocate(a_normed) + deallocate(b_normed) + +end subroutine flinear_kernel + + subroutine fmatern_kernel_l2(a, na, b, nb, k, sigma, order) implicit none diff --git a/qml/kernels.py b/qml/kernels.py index 0f8c76e18..974781804 100644 --- a/qml/kernels.py +++ b/qml/kernels.py @@ -26,6 +26,7 @@ from .fkernels import fgaussian_kernel from .fkernels import flaplacian_kernel +from .fkernels import flinear_kernel from .fkernels import fsargan_kernel from .fkernels import fmatern_kernel_l2 @@ -87,6 +88,36 @@ def gaussian_kernel(A, B, sigma): return K +def linear_kernel(A, B): + """ Calculates the linear kernel matrix K, where :math:`K_{ij}`: + + :math:`K_{ij} = \hat{A}_i \cdot - \hat{B}_j` + + Where :math:`\hat{A}_{i}` and :math:`\hat{B}_{j}` are normalized + representation vectors. Note the normalization is handled inside + the kernel function. + + K is calculated using an OpenMP parallel Fortran routine. + + :param A: 2D array of descriptors - shape (N, representation size). + :type A: numpy array + :param B: 2D array of descriptors - shape (M, representation size). + :type B: numpy array + + :return: The Gaussian kernel matrix - shape (N, M) + :rtype: numpy array + """ + + na = A.shape[0] + nb = B.shape[0] + + K = np.empty((na, nb), order='F') + + # Note: Transposed for Fortran + flinear_kernel(A.T, na, B.T, nb, K) + + return K + def sargan_kernel(A, B, sigma, gammas): """ Calculates the Sargan kernel matrix K, where :math:`K_{ij}`: diff --git a/tests/test_kernels.py b/tests/test_kernels.py index c8fd433fb..c05be57f8 100644 --- a/tests/test_kernels.py +++ b/tests/test_kernels.py @@ -25,11 +25,16 @@ import sys import numpy as np import qml -from qml.kernels import laplacian_kernel, gaussian_kernel, \ - matern_kernel, sargan_kernel +from qml.kernels import laplacian_kernel +from qml.kernels import gaussian_kernel +from qml.kernels import linear_kernel +from qml.kernels import matern_kernel +from qml.kernels import sargan_kernel def test_laplacian_kernel(): + np.random.seed(666) + n_train = 25 n_test = 20 @@ -57,6 +62,8 @@ def test_laplacian_kernel(): def test_gaussian_kernel(): + np.random.seed(666) + n_train = 25 n_test = 20 @@ -82,7 +89,41 @@ def test_gaussian_kernel(): # Check for symmetry: assert np.allclose(Ksymm, Ksymm.T), "Error in Gaussian kernel" + +def test_linear_kernel(): + + np.random.seed(666) + + n_train = 25 + n_test = 20 + + # List of dummy representations + X = np.random.rand(n_train, 1000) + Xs = np.random.rand(n_test, 1000) + + sigma = 100.0 + + Ktest = np.zeros((n_train, n_test)) + + for i in range(n_train): + for j in range(n_test): + Ktest[i,j] = np.dot(X[i]/np.linalg.norm(X[i]), + Xs[j]/np.linalg.norm(Xs[j])) + + K = linear_kernel(X, Xs) + + # Compare two implementations: + assert np.allclose(K, Ktest), "Error in Gaussian kernel" + + Ksymm = linear_kernel(X, X) + + # Check for symmetry: + assert np.allclose(Ksymm, Ksymm.T), "Error in Gaussian kernel" + def test_matern_kernel(): + + np.random.seed(666) + for metric in ("l1", "l2"): for order in (0, 1, 2): print(metric,order) @@ -129,6 +170,9 @@ def matern(metric, order): assert np.allclose(Ksymm, Ksymm.T), "Error in Matern kernel" def test_sargan_kernel(): + + np.random.seed(666) + for ngamma in (0, 1, 2): sargan(ngamma) @@ -173,5 +217,6 @@ def sargan(ngamma): if __name__ == "__main__": test_laplacian_kernel() test_gaussian_kernel() + test_linear_kernel() test_matern_kernel() test_sargan_kernel() From c79dde57e5a37caf5dcd957575d141404b0a8514 Mon Sep 17 00:00:00 2001 From: "Anders S. Christensen" Date: Mon, 26 Jun 2017 11:45:33 +0200 Subject: [PATCH 2/4] Updated documentation for kernels and removed compiler warning (from frepresentations) --- qml/fkernels.f90 | 2 +- qml/frepresentations.f90 | 1 - qml/kernels.py | 24 ++++++++++++------------ 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/qml/fkernels.f90 b/qml/fkernels.f90 index 0ab6d13e3..485facf21 100644 --- a/qml/fkernels.f90 +++ b/qml/fkernels.f90 @@ -1,6 +1,6 @@ ! MIT License ! -! Copyright (c) 2016 Anders Steen Christensen +! Copyright (c) 2016 Anders Steen Christensen, Lars A. Bratholm, Felix A. Faber ! ! Permission is hereby granted, free of charge, to any person obtaining a copy ! of this software and associated documentation files (the "Software"), to deal diff --git a/qml/frepresentations.f90 b/qml/frepresentations.f90 index 61eef67ea..57e895807 100644 --- a/qml/frepresentations.f90 +++ b/qml/frepresentations.f90 @@ -230,7 +230,6 @@ subroutine fgenerate_local_coulomb_matrix(atomic_charges, coordinates, natoms, n double precision, allocatable, dimension(:, :, :) :: pair_distance_matrix double precision, allocatable, dimension(:, :) :: distance_matrix - double precision, allocatable, dimension(:, :) :: distance_matrix_tmp integer i, j, m, n, k diff --git a/qml/kernels.py b/qml/kernels.py index 974781804..1ea069de8 100644 --- a/qml/kernels.py +++ b/qml/kernels.py @@ -1,6 +1,6 @@ # MIT License # -# Copyright (c) 2016 Anders Steen Christensen, Felix Faber +# Copyright (c) 2016 Anders Steen Christensen, Felix A. Faber, Lars A. Bratholm # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -38,9 +38,9 @@ def laplacian_kernel(A, B, sigma): Where :math:`A_{i}` and :math:`B_{j}` are representation vectors. K is calculated using an OpenMP parallel Fortran routine. - :param A: 2D array of descriptors - shape (N, representation size). + :param A: 2D array of representations - shape (N, representation size). :type A: numpy array - :param B: 2D array of descriptors - shape (M, representation size). + :param B: 2D array of representations - shape (M, representation size). :type B: numpy array :param sigma: The value of sigma in the kernel matrix. :type sigma: float @@ -67,9 +67,9 @@ def gaussian_kernel(A, B, sigma): Where :math:`A_{i}` and :math:`B_{j}` are representation vectors. K is calculated using an OpenMP parallel Fortran routine. - :param A: 2D array of descriptors - shape (N, representation size). + :param A: 2D array of representations - shape (N, representation size). :type A: numpy array - :param B: 2D array of descriptors - shape (M, representation size). + :param B: 2D array of representations - shape (M, representation size). :type B: numpy array :param sigma: The value of sigma in the kernel matrix. :type sigma: float @@ -91,7 +91,7 @@ def gaussian_kernel(A, B, sigma): def linear_kernel(A, B): """ Calculates the linear kernel matrix K, where :math:`K_{ij}`: - :math:`K_{ij} = \hat{A}_i \cdot - \hat{B}_j` + :math:`K_{ij} = \hat{A}_i \cdot \hat{B}_j` Where :math:`\hat{A}_{i}` and :math:`\hat{B}_{j}` are normalized representation vectors. Note the normalization is handled inside @@ -99,9 +99,9 @@ def linear_kernel(A, B): K is calculated using an OpenMP parallel Fortran routine. - :param A: 2D array of descriptors - shape (N, representation size). + :param A: 2D array of representations - shape (N, representation size). :type A: numpy array - :param B: 2D array of descriptors - shape (M, representation size). + :param B: 2D array of representations - shape (M, representation size). :type B: numpy array :return: The Gaussian kernel matrix - shape (N, M) @@ -126,9 +126,9 @@ def sargan_kernel(A, B, sigma, gammas): Where :math:`A_{i}` and :math:`B_{j}` are representation vectors. K is calculated using an OpenMP parallel Fortran routine. - :param A: 2D array of descriptors - shape (N, representation size). + :param A: 2D array of representations - shape (N, representation size). :type A: numpy array - :param B: 2D array of descriptors - shape (M, representation size). + :param B: 2D array of representations - shape (M, representation size). :type B: numpy array :param sigma: The value of sigma in the kernel matrix. :type sigma: float @@ -168,9 +168,9 @@ def matern_kernel(A, B, sigma, order = 0, metric = "l1"): K is calculated using an OpenMP parallel Fortran routine. - :param A: 2D array of descriptors - shape (N, representation size). + :param A: 2D array of representations - shape (N, representation size). :type A: numpy array - :param B: 2D array of descriptors - shape (M, representation size). + :param B: 2D array of representations - shape (M, representation size). :type B: numpy array :param sigma: The value of sigma in the kernel matrix. :type sigma: float From 662712201285b279f7b1eed4c00899704c8c6e7b Mon Sep 17 00:00:00 2001 From: "Anders S. Christensen" Date: Tue, 4 Jul 2017 15:34:58 +0200 Subject: [PATCH 3/4] Added global ARAD and linear global kernel --- qml/arad.py | 82 +++++++++++ qml/farad_kernels.f90 | 330 +++++++++++++++++++++++++++++++++++++++--- qml/fkernels.f90 | 25 +--- qml/kernels.py | 6 +- tests/test_arad.py | 9 +- tests/test_kernels.py | 7 +- 6 files changed, 402 insertions(+), 57 deletions(-) diff --git a/qml/arad.py b/qml/arad.py index 528c4489e..886ecc136 100644 --- a/qml/arad.py +++ b/qml/arad.py @@ -24,6 +24,9 @@ import numpy as np +from .farad_kernels import fget_global_kernels_arad +from .farad_kernels import fget_global_symmetric_kernels_arad + from .farad_kernels import fget_local_kernels_arad from .farad_kernels import fget_local_symmetric_kernels_arad @@ -131,6 +134,85 @@ def generate_arad_representation(coordinates, nuclear_charges, size=23, cut_dist return M +def get_global_kernels_arad(X1, X2, sigmas, + width=0.2, cut_distance=5.0, r_width=1.0, c_width=0.5): + """ Calculates the global Gaussian kernel matrix K for atomic ARAD + descriptors for a list of different sigmas. Each kernel element + is the sum of all kernel elements between pairs of atoms in two molecules. + + K is calculated using an OpenMP parallel Fortran routine. + + :param X1: ARAD descriptors for molecules in set 1. + :type X1: numpy array + :param X2: Array of ARAD descriptors for molecules in set 2. + :type X2: numpy array + :param sigmas: List of sigmas for which to calculate the Kernel matrices. + :type sigmas: list + + :return: The kernel matrices for each sigma - shape (number_sigmas, number_molecules1, number_molecules2) + :rtype: numpy array + """ + + amax = X1.shape[1] + + assert X1.shape[3] == amax, "ERROR: Check ARAD decriptor sizes! code = 1" + assert X2.shape[1] == amax, "ERROR: Check ARAD decriptor sizes! code = 2" + assert X2.shape[3] == amax, "ERROR: Check ARAD decriptor sizes! code = 3" + + nm1 = X1.shape[0] + nm2 = X2.shape[0] + + N1 = np.empty(nm1, dtype = np.int32) + Z1_arad = np.zeros((nm1, amax, 2)) + for i in range(nm1): + N1[i] = len(np.where(X1[i,:,2,0] > 0)[0]) + Z1_arad[i] = X1[i,:,1:3,0] + + N2 = np.empty(nm2, dtype = np.int32) + Z2_arad = np.zeros((nm2, amax, 2)) + for i in range(nm2): + N2[i] = len(np.where(X2[i,:,2,0] > 0)[0]) + Z2_arad[i] = X2[i,:,1:3,0] + + sigmas = np.array(sigmas) + nsigmas = sigmas.size + + return fget_global_kernels_arad(X1, X2, Z1_arad, Z2_arad, N1, N2, sigmas, + nm1, nm2, nsigmas, width, cut_distance, r_width, c_width) + + +def get_global_symmetric_kernels_arad(X1, sigmas, + width=0.2, cut_distance=5.0, r_width=1.0, c_width=0.5): + """ Calculates the global Gaussian kernel matrix K for atomic ARAD + descriptors for a list of different sigmas. Each kernel element + is the sum of all kernel elements between pairs of atoms in two molecules. + + K is calculated using an OpenMP parallel Fortran routine. + + :param X1: ARAD descriptors for molecules in set 1. + :type X1: numpy array + :param sigmas: List of sigmas for which to calculate the Kernel matrices. + :type sigmas: list + + :return: The kernel matrices for each sigma - shape (number_sigmas, number_molecules1, number_molecules1) + :rtype: numpy array + """ + + nm1 = X1.shape[0] + amax = X1.shape[1] + + N1 = np.empty(nm1, dtype = np.int32) + Z1_arad = np.zeros((nm1, amax, 2)) + for i in range(nm1): + N1[i] = len(np.where(X1[i,:,2,0] > 0)[0]) + Z1_arad[i] = X1[i,:,1:3,0] + + sigmas = np.array(sigmas) + nsigmas = sigmas.size + + return fget_global_symmetric_kernels_arad(X1, Z1_arad, N1, sigmas, + nm1, nsigmas, width, cut_distance, r_width, c_width) + def get_local_kernels_arad(X1, X2, sigmas, width=0.2, cut_distance=5.0, r_width=1.0, c_width=0.5): diff --git a/qml/farad_kernels.f90 b/qml/farad_kernels.f90 index 188207e97..bc1c6f608 100644 --- a/qml/farad_kernels.f90 +++ b/qml/farad_kernels.f90 @@ -82,7 +82,6 @@ function atomic_distl2(X1, X2, N1, N2, sin1, sin2, width, cut_distance, r_width, aadist = aadist + d * (1.0d0 + x1(4,m_1)*x2(4,m_2) + x1(5,m_1)*x2(5,m_2)) - ! write (*,*) m_1, m_2, x1(4,m_1), x2(4,m_2) end if end do end do @@ -157,9 +156,6 @@ subroutine fget_local_kernels_arad(q1, q2, z1, z2, n1, n2, sigmas, nm1, nm2, nsi ! Value of PI at full FORTRAN precision. double precision, parameter :: pi = 4.0d0 * atan(1.0d0) - ! Small number // to test for numerical stability - double precision, parameter :: eps = 5.0e-12 - r_width2 = r_width**2 c_width2 = c_width**2 @@ -172,8 +168,6 @@ subroutine fget_local_kernels_arad(q1, q2, z1, z2, n1, n2, sigmas, nm1, nm2, nsi sin1 = 0.0d0 sin2 = 0.0d0 - ! write (*,*) "INV_CUT", inv_cut - !$OMP PARALLEL DO PRIVATE(ni) do i = 1, nm1 ni = n1(i) @@ -247,10 +241,7 @@ subroutine fget_local_kernels_arad(q1, q2, z1, z2, n1, n2, sigmas, nm1, nm2, nsi & * (r_width2/(r_width2 + (z1(i,i_1,1) - z2(j,j_1,1))**2) * & & c_width2/(c_width2 + (z1(i,i_1,2) - z2(j,j_1,2))**2)) - if (abs(l2dist) < eps) l2dist = 0.0d0 - atomic_distance(i_1,j_1) = l2dist - ! write (*,*) i_1, j_1, l2dist enddo enddo @@ -331,9 +322,6 @@ subroutine fget_local_symmetric_kernels_arad(q1, z1, n1, sigmas, nm1, nsigmas, & ! Value of PI at full FORTRAN precision. double precision, parameter :: pi = 4.0d0 * atan(1.0d0) - ! Small number // to test for numerical stability - double precision, parameter :: eps = 5.0e-12 - r_width2 = r_width**2 c_width2 = c_width**2 @@ -388,8 +376,6 @@ subroutine fget_local_symmetric_kernels_arad(q1, z1, n1, sigmas, nm1, nsigmas, & & * (r_width2/(r_width2 + (z1(i,i_1,1) - z1(j,j_1,1))**2) & & * c_width2/(c_width2 + (z1(i,i_1,2) - z1(j,j_1,2))**2)) - if (abs(l2dist) < eps) l2dist = 0.0d0 - atomic_distance(i_1,j_1) = l2dist enddo @@ -474,9 +460,6 @@ subroutine fget_atomic_kernels_arad(q1, q2, z1, z2, n1, n2, sigmas, na1, na2, ns ! Value of PI at full FORTRAN precision. double precision, parameter :: pi = 4.0d0 * atan(1.0d0) - ! Small number // to test for numerical stability - double precision, parameter :: eps = 5.0e-12 - r_width2 = r_width**2 c_width2 = c_width**2 @@ -534,8 +517,6 @@ subroutine fget_atomic_kernels_arad(q1, q2, z1, z2, n1, n2, sigmas, na1, na2, ns & * (r_width2/(r_width2 + (z1(i,1) - z2(j,1))**2) * & & c_width2/(c_width2 + (z1(i,2) - z2(j,2))**2)) - if (abs(l2dist) < eps) l2dist = 0.0d0 - do k = 1, nsigmas kernels(k, i, j) = exp(l2dist * inv_sigma2(k)) enddo @@ -610,9 +591,6 @@ subroutine fget_atomic_symmetric_kernels_arad(q1, z1, n1, sigmas, na1, nsigmas, ! Value of PI at full FORTRAN precision. double precision, parameter :: pi = 4.0d0 * atan(1.0d0) - ! Small number // to test for numerical stability - double precision, parameter :: eps = 5.0e-12 - r_width2 = r_width**2 c_width2 = c_width**2 @@ -652,8 +630,6 @@ subroutine fget_atomic_symmetric_kernels_arad(q1, z1, n1, sigmas, na1, nsigmas, & * (r_width2/(r_width2 + (z1(i,1) - z1(j,1))**2) * & & c_width2/(c_width2 + (z1(i,2) - z1(j,2))**2)) - if (abs(l2dist) < eps) l2dist = 0.0d0 - do k = 1, nsigmas kernels(k, i, j) = exp(l2dist * inv_sigma2(k)) kernels(k, j, i) = exp(l2dist * inv_sigma2(k)) @@ -667,3 +643,309 @@ subroutine fget_atomic_symmetric_kernels_arad(q1, z1, n1, sigmas, na1, nsigmas, deallocate(sin1) end subroutine fget_atomic_symmetric_kernels_arad + + +subroutine fget_global_symmetric_kernels_arad(q1, z1, n1, sigmas, nm1, nsigmas, & + & width, cut_distance, r_width, c_width, kernels) + + use arad, only: atomic_distl2 + + implicit none + + ! ARAD descriptors for the training set, format (i,j_1,5,m_1) + double precision, dimension(:,:,:,:), intent(in) :: q1 + + ! ARAD atom-types for each atom in each molecule, format (i, j_1, 2) + double precision, dimension(:,:,:), intent(in) :: z1 + + ! List of numbers of atoms in each molecule + integer, dimension(:), intent(in) :: n1 + + ! Sigma in the Gaussian kernel + double precision, dimension(:), intent(in) :: sigmas + + ! Number of molecules + integer, intent(in) :: nm1 + + ! Number of sigmas + integer, intent(in) :: nsigmas + + ! -1.0 / sigma^2 for use in the kernel + double precision, dimension(nsigmas) :: inv_sigma2 + + ! ARAD parameters + double precision, intent(in) :: width + double precision, intent(in) :: cut_distance + double precision, intent(in) :: r_width + double precision, intent(in) :: c_width + + ! Resulting alpha vector + double precision, dimension(nsigmas,nm1,nm1), intent(out) :: kernels + + ! Internal counters + integer :: i, j, k, ni, nj + integer :: m_1, i_1, j_1 + + ! Pre-computed constants + double precision :: r_width2 + double precision :: c_width2 + double precision :: inv_cut + + ! Temporary variables necessary for parallelization + double precision :: l2dist + double precision, allocatable, dimension(:,:) :: atomic_distance + + ! Pre-computed terms in the full distance matrix + double precision, allocatable, dimension(:,:) :: selfl21 + + ! Pre-computed sine terms + double precision, allocatable, dimension(:,:,:) :: sin1 + + ! Value of PI at full FORTRAN precision. + double precision, parameter :: pi = 4.0d0 * atan(1.0d0) + + r_width2 = r_width**2 + c_width2 = c_width**2 + + inv_cut = pi / (2.0d0 * cut_distance) + inv_sigma2(:) = -1.0d0 / (sigmas(:))**2 + + allocate(sin1(nm1, maxval(n1), maxval(n1))) + + !$OMP PARALLEL DO PRIVATE(ni) + do i = 1, nm1 + ni = n1(i) + do m_1 = 1, ni + do i_1 = 1, ni + sin1(i, i_1, m_1) = 1.0d0 - sin(q1(i,i_1,1,m_1) * inv_cut) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + allocate(selfl21(nm1, maxval(n1))) + + !$OMP PARALLEL DO PRIVATE(ni) + do i = 1, nm1 + ni = n1(i) + do i_1 = 1, ni + selfl21(i,i_1) = atomic_distl2(q1(i,i_1,:,:), q1(i,i_1,:,:), n1(i), n1(i), & + & sin1(i,i_1,:), sin1(i,i_1,:), width, cut_distance, r_width, c_width) + enddo + enddo + !$OMP END PARALLEL DO + + allocate(atomic_distance(maxval(n1), maxval(n1))) + + kernels(:,:,:) = 0.0d0 + atomic_distance(:,:) = 0.0d0 + + !$OMP PARALLEL DO PRIVATE(l2dist,atomic_distance,ni,nj) schedule(dynamic) + do j = 1, nm1 + nj = n1(j) + do i = 1, j + ni = n1(i) + + atomic_distance(:,:) = 0.0d0 + + do i_1 = 1, ni + do j_1 = 1, nj + + l2dist = atomic_distl2(q1(i,i_1,:,:), q1(j,j_1,:,:), n1(i), n1(j), & + & sin1(i,i_1,:), sin1(j,j_1,:), width, cut_distance, r_width, c_width) + + l2dist = selfl21(i,i_1) + selfl21(j,j_1) - 2.0d0 * l2dist & + & * (r_width2/(r_width2 + (z1(i,i_1,1) - z1(j,j_1,1))**2) & + & * c_width2/(c_width2 + (z1(i,i_1,2) - z1(j,j_1,2))**2)) + + atomic_distance(i_1,j_1) = l2dist + + enddo + enddo + + do k = 1, nsigmas + kernels(k, i, j) = exp(sum(atomic_distance(:ni,:nj)) * inv_sigma2(k)) + kernels(k, j, i) = kernels(k, i, j) + enddo + + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(atomic_distance) + deallocate(selfl21) + deallocate(sin1) + +end subroutine fget_global_symmetric_kernels_arad + + +subroutine fget_global_kernels_arad(q1, q2, z1, z2, n1, n2, sigmas, nm1, nm2, nsigmas, & + & width, cut_distance, r_width, c_width, kernels) + + use arad, only: atomic_distl2 + + implicit none + + ! ARAD descriptors for the training set, format (i,j_1,5,m_1) + double precision, dimension(:,:,:,:), intent(in) :: q1 + double precision, dimension(:,:,:,:), intent(in) :: q2 + + ! ARAD atom-types for each atom in each molecule, format (i, j_1, 2) + double precision, dimension(:,:,:), intent(in) :: z1 + double precision, dimension(:,:,:), intent(in) :: z2 + + ! List of numbers of atoms in each molecule + integer, dimension(:), intent(in) :: n1 + integer, dimension(:), intent(in) :: n2 + + ! Sigma in the Gaussian kernel + double precision, dimension(:), intent(in) :: sigmas + + ! Number of molecules + integer, intent(in) :: nm1 + integer, intent(in) :: nm2 + + ! Number of sigmas + integer, intent(in) :: nsigmas + + ! -1.0 / sigma^2 for use in the kernel + double precision, dimension(nsigmas) :: inv_sigma2 + + ! ARAD parameters + double precision, intent(in) :: width + double precision, intent(in) :: cut_distance + double precision, intent(in) :: r_width + double precision, intent(in) :: c_width + + ! Resulting alpha vector + double precision, dimension(nsigmas,nm1,nm2), intent(out) :: kernels + + ! Internal counters + integer :: i, j, k, ni, nj + integer :: m_1, i_1, j_1 + + ! Pre-computed constants + double precision :: r_width2 + double precision :: c_width2 + double precision :: inv_cut + + ! Temporary variables necessary for parallelization + double precision :: l2dist + double precision, allocatable, dimension(:,:) :: atomic_distance + + ! Pre-computed terms in the full distance matrix + double precision, allocatable, dimension(:,:) :: selfl21 + double precision, allocatable, dimension(:,:) :: selfl22 + + ! Pre-computed sine terms + double precision, allocatable, dimension(:,:,:) :: sin1 + double precision, allocatable, dimension(:,:,:) :: sin2 + + ! Value of PI at full FORTRAN precision. + double precision, parameter :: pi = 4.0d0 * atan(1.0d0) + + r_width2 = r_width**2 + c_width2 = c_width**2 + + inv_cut = pi / (2.0d0 * cut_distance) + inv_sigma2(:) = -1.0d0 / (sigmas(:))**2 + + allocate(sin1(nm1, maxval(n1), maxval(n1))) + allocate(sin2(nm2, maxval(n2), maxval(n2))) + + sin1 = 0.0d0 + sin2 = 0.0d0 + + !$OMP PARALLEL DO PRIVATE(ni) + do i = 1, nm1 + ni = n1(i) + do m_1 = 1, ni + do i_1 = 1, ni + if (q1(i,i_1,1,m_1) < cut_distance) then + sin1(i, i_1, m_1) = 1.0d0 - sin(q1(i,i_1,1,m_1) * inv_cut) + endif + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL DO PRIVATE(ni) + do i = 1, nm2 + ni = n2(i) + do m_1 = 1, ni + do i_1 = 1, ni + if (q2(i,i_1,1,m_1) < cut_distance) then + sin2(i, i_1, m_1) = 1.0d0 - sin(q2(i,i_1,1,m_1) * inv_cut) + endif + enddo + enddo + enddo + !$OMP END PARALLEL DO + + allocate(selfl21(nm1, maxval(n1))) + allocate(selfl22(nm2, maxval(n2))) + + !$OMP PARALLEL DO PRIVATE(ni) + do i = 1, nm1 + ni = n1(i) + do i_1 = 1, ni + selfl21(i,i_1) = atomic_distl2(q1(i,i_1,:,:), q1(i,i_1,:,:), n1(i), n1(i), & + & sin1(i,i_1,:), sin1(i,i_1,:), width, cut_distance, r_width, c_width) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL DO PRIVATE(ni) + do i = 1, nm2 + ni = n2(i) + do i_1 = 1, ni + selfl22(i,i_1) = atomic_distl2(q2(i,i_1,:,:), q2(i,i_1,:,:), n2(i), n2(i), & + & sin2(i,i_1,:), sin2(i,i_1,:), width, cut_distance, r_width, c_width) + enddo + enddo + !$OMP END PARALLEL DO + + + allocate(atomic_distance(maxval(n1), maxval(n2))) + + kernels(:,:,:) = 0.0d0 + atomic_distance(:,:) = 0.0d0 + + !$OMP PARALLEL DO PRIVATE(l2dist,atomic_distance,ni,nj) schedule(dynamic) + do j = 1, nm2 + nj = n2(j) + do i = 1, nm1 + ni = n1(i) + + atomic_distance(:,:) = 0.0d0 + + do i_1 = 1, ni + do j_1 = 1, nj + + l2dist = atomic_distl2(q1(i,i_1,:,:), q2(j,j_1,:,:), n1(i), n2(j), & + & sin1(i,i_1,:), sin2(j,j_1,:), width, cut_distance, r_width, c_width) + + l2dist = selfl21(i,i_1) + selfl22(j,j_1) - 2.0d0 * l2dist & + & * (r_width2/(r_width2 + (z1(i,i_1,1) - z2(j,j_1,1))**2) * & + & c_width2/(c_width2 + (z1(i,i_1,2) - z2(j,j_1,2))**2)) + + atomic_distance(i_1,j_1) = l2dist + + enddo + enddo + + do k = 1, nsigmas + kernels(k, i, j) = exp(sum(atomic_distance(:ni,:nj)) * inv_sigma2(k)) + enddo + + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(atomic_distance) + deallocate(selfl21) + deallocate(selfl22) + deallocate(sin1) + deallocate(sin2) + +end subroutine fget_global_kernels_arad diff --git a/qml/fkernels.f90 b/qml/fkernels.f90 index 485facf21..69d224503 100644 --- a/qml/fkernels.f90 +++ b/qml/fkernels.f90 @@ -238,37 +238,14 @@ subroutine flinear_kernel(a, na, b, nb, k) integer :: i, j - double precision, allocatable, dimension(:,:) :: a_normed - double precision, allocatable, dimension(:,:) :: b_normed - - allocate(a_normed(size(a, dim=1),size(a, dim=2))) - allocate(b_normed(size(b, dim=1),size(b, dim=2))) - - -!$OMP PARALLEL DO - do i = 1, na - a_normed(:,i) = a(:,i) / norm2(a(:,i)) - enddo -!$OMP END PARALLEL DO - -!$OMP PARALLEL DO - do i = 1, nb - b_normed(:,i) = b(:,i) / norm2(b(:,i)) - enddo -!$OMP END PARALLEL DO - - !$OMP PARALLEL DO do i = 1, nb do j = 1, na - k(j,i) = dot_product(a_normed(:,j), b_normed(:,i)) + k(j,i) = dot_product(a(:,j), b(:,i)) enddo enddo !$OMP END PARALLEL DO - deallocate(a_normed) - deallocate(b_normed) - end subroutine flinear_kernel diff --git a/qml/kernels.py b/qml/kernels.py index 1ea069de8..8be7bc352 100644 --- a/qml/kernels.py +++ b/qml/kernels.py @@ -91,11 +91,9 @@ def gaussian_kernel(A, B, sigma): def linear_kernel(A, B): """ Calculates the linear kernel matrix K, where :math:`K_{ij}`: - :math:`K_{ij} = \hat{A}_i \cdot \hat{B}_j` + :math:`K_{ij} = A_i \cdot B_j` - Where :math:`\hat{A}_{i}` and :math:`\hat{B}_{j}` are normalized - representation vectors. Note the normalization is handled inside - the kernel function. + VWhere :math:`A_{i}` and :math:`B_{j}` are representation vectors. K is calculated using an OpenMP parallel Fortran routine. diff --git a/tests/test_arad.py b/tests/test_arad.py index e34937528..59a3c7aa6 100644 --- a/tests/test_arad.py +++ b/tests/test_arad.py @@ -13,6 +13,9 @@ from qml.arad import get_local_kernels_arad from qml.arad import get_local_symmetric_kernels_arad +from qml.arad import get_global_kernels_arad +from qml.arad import get_global_symmetric_kernels_arad + from qml.arad import get_atomic_kernels_arad from qml.arad import get_atomic_symmetric_kernels_arad @@ -70,7 +73,11 @@ def test_arad(): assert np.allclose(K_local_symm, K_local_asymm), "Symmetry error in local kernels" assert np.invert(np.all(np.isnan(K_local_asymm))), "ERROR: ARAD local symmetric kernel contains NaN" - K_local_asymm = get_local_kernels_arad(X1[-4:], X1[:6], sigmas) + K_global_asymm = get_global_kernels_arad(X1, X1, sigmas) + K_global_symm = get_global_symmetric_kernels_arad(X1, sigmas) + + assert np.allclose(K_global_symm, K_global_asymm), "Symmetry error in global kernels" + assert np.invert(np.all(np.isnan(K_global_asymm))), "ERROR: ARAD global symmetric kernel contains NaN" molid = 5 X1 = generate_arad_representation(mols[molid].coordinates, diff --git a/tests/test_kernels.py b/tests/test_kernels.py index c05be57f8..6e21ec7f8 100644 --- a/tests/test_kernels.py +++ b/tests/test_kernels.py @@ -107,18 +107,17 @@ def test_linear_kernel(): for i in range(n_train): for j in range(n_test): - Ktest[i,j] = np.dot(X[i]/np.linalg.norm(X[i]), - Xs[j]/np.linalg.norm(Xs[j])) + Ktest[i,j] = np.dot(X[i], Xs[j]) K = linear_kernel(X, Xs) # Compare two implementations: - assert np.allclose(K, Ktest), "Error in Gaussian kernel" + assert np.allclose(K, Ktest), "Error in linear kernel" Ksymm = linear_kernel(X, X) # Check for symmetry: - assert np.allclose(Ksymm, Ksymm.T), "Error in Gaussian kernel" + assert np.allclose(Ksymm, Ksymm.T), "Error in linear kernel" def test_matern_kernel(): From ce010c0166927e3d7bc852fb1df536014509d7fa Mon Sep 17 00:00:00 2001 From: "Anders S. Christensen" Date: Thu, 13 Jul 2017 15:26:45 +0200 Subject: [PATCH 4/4] Corrected ARAD global kernel to L2 distance --- qml/farad_kernels.f90 | 82 +++++++++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 26 deletions(-) diff --git a/qml/farad_kernels.f90 b/qml/farad_kernels.f90 index bc1c6f608..9bfabde82 100644 --- a/qml/farad_kernels.f90 +++ b/qml/farad_kernels.f90 @@ -696,7 +696,7 @@ subroutine fget_global_symmetric_kernels_arad(q1, z1, n1, sigmas, nm1, nsigmas, double precision, allocatable, dimension(:,:) :: atomic_distance ! Pre-computed terms in the full distance matrix - double precision, allocatable, dimension(:,:) :: selfl21 + double precision, allocatable, dimension(:) :: selfl21 ! Pre-computed sine terms double precision, allocatable, dimension(:,:,:) :: sin1 @@ -704,6 +704,8 @@ subroutine fget_global_symmetric_kernels_arad(q1, z1, n1, sigmas, nm1, nsigmas, ! Value of PI at full FORTRAN precision. double precision, parameter :: pi = 4.0d0 * atan(1.0d0) + double precision :: mol_dist + r_width2 = r_width**2 c_width2 = c_width**2 @@ -723,14 +725,22 @@ subroutine fget_global_symmetric_kernels_arad(q1, z1, n1, sigmas, nm1, nsigmas, enddo !$OMP END PARALLEL DO - allocate(selfl21(nm1, maxval(n1))) + allocate(selfl21(nm1)) - !$OMP PARALLEL DO PRIVATE(ni) + selfl21 = 0.0d0 + + !$OMP PARALLEL DO PRIVATE(ni) REDUCTION(+:selfl21) do i = 1, nm1 ni = n1(i) do i_1 = 1, ni - selfl21(i,i_1) = atomic_distl2(q1(i,i_1,:,:), q1(i,i_1,:,:), n1(i), n1(i), & - & sin1(i,i_1,:), sin1(i,i_1,:), width, cut_distance, r_width, c_width) + do j_1 = 1, ni + + selfl21(i) = selfl21(i) + atomic_distl2(q1(i,i_1,:,:), q1(i,j_1,:,:), n1(i), n1(i), & + & sin1(i,i_1,:), sin1(i,j_1,:), width, cut_distance, r_width, c_width) & + & * (r_width2/(r_width2 + (z1(i,i_1,1) - z1(i,j_1,1))**2) * & + & c_width2/(c_width2 + (z1(i,i_1,2) - z1(i,j_1,2))**2)) + + enddo enddo enddo !$OMP END PARALLEL DO @@ -740,10 +750,10 @@ subroutine fget_global_symmetric_kernels_arad(q1, z1, n1, sigmas, nm1, nsigmas, kernels(:,:,:) = 0.0d0 atomic_distance(:,:) = 0.0d0 - !$OMP PARALLEL DO PRIVATE(l2dist,atomic_distance,ni,nj) schedule(dynamic) + !$OMP PARALLEL DO PRIVATE(l2dist,atomic_distance,ni,nj,mol_dist) schedule(dynamic) do j = 1, nm1 nj = n1(j) - do i = 1, j + do i = 1, j! nm1 ni = n1(i) atomic_distance(:,:) = 0.0d0 @@ -754,17 +764,18 @@ subroutine fget_global_symmetric_kernels_arad(q1, z1, n1, sigmas, nm1, nsigmas, l2dist = atomic_distl2(q1(i,i_1,:,:), q1(j,j_1,:,:), n1(i), n1(j), & & sin1(i,i_1,:), sin1(j,j_1,:), width, cut_distance, r_width, c_width) - l2dist = selfl21(i,i_1) + selfl21(j,j_1) - 2.0d0 * l2dist & - & * (r_width2/(r_width2 + (z1(i,i_1,1) - z1(j,j_1,1))**2) & - & * c_width2/(c_width2 + (z1(i,i_1,2) - z1(j,j_1,2))**2)) + L2dist = l2dist * (r_width2/(r_width2 + (z1(i,i_1,1) - z1(j,j_1,1))**2) * & + & c_width2/(c_width2 + (z1(i,i_1,2) - z1(j,j_1,2))**2)) atomic_distance(i_1,j_1) = l2dist enddo enddo + mol_dist = selfl21(i) + selfl21(j) - 2.0d0 * sum(atomic_distance(:ni,:nj)) + do k = 1, nsigmas - kernels(k, i, j) = exp(sum(atomic_distance(:ni,:nj)) * inv_sigma2(k)) + kernels(k, i, j) = exp(mol_dist * inv_sigma2(k)) kernels(k, j, i) = kernels(k, i, j) enddo @@ -834,8 +845,8 @@ subroutine fget_global_kernels_arad(q1, q2, z1, z2, n1, n2, sigmas, nm1, nm2, ns double precision, allocatable, dimension(:,:) :: atomic_distance ! Pre-computed terms in the full distance matrix - double precision, allocatable, dimension(:,:) :: selfl21 - double precision, allocatable, dimension(:,:) :: selfl22 + double precision, allocatable, dimension(:) :: selfl21 + double precision, allocatable, dimension(:) :: selfl22 ! Pre-computed sine terms double precision, allocatable, dimension(:,:,:) :: sin1 @@ -844,6 +855,8 @@ subroutine fget_global_kernels_arad(q1, q2, z1, z2, n1, n2, sigmas, nm1, nm2, ns ! Value of PI at full FORTRAN precision. double precision, parameter :: pi = 4.0d0 * atan(1.0d0) + double precision :: mol_dist + r_width2 = r_width**2 c_width2 = c_width**2 @@ -882,25 +895,41 @@ subroutine fget_global_kernels_arad(q1, q2, z1, z2, n1, n2, sigmas, nm1, nm2, ns enddo !$OMP END PARALLEL DO - allocate(selfl21(nm1, maxval(n1))) - allocate(selfl22(nm2, maxval(n2))) + allocate(selfl21(nm1)) + allocate(selfl22(nm2)) - !$OMP PARALLEL DO PRIVATE(ni) + selfl21 = 0.0d0 + selfl22 = 0.0d0 + + !$OMP PARALLEL DO PRIVATE(ni) REDUCTION(+:selfl21) do i = 1, nm1 ni = n1(i) do i_1 = 1, ni - selfl21(i,i_1) = atomic_distl2(q1(i,i_1,:,:), q1(i,i_1,:,:), n1(i), n1(i), & - & sin1(i,i_1,:), sin1(i,i_1,:), width, cut_distance, r_width, c_width) + do j_1 = 1, ni + + selfl21(i) = selfl21(i) + atomic_distl2(q1(i,i_1,:,:), q1(i,j_1,:,:), n1(i), n1(i), & + & sin1(i,i_1,:), sin1(i,j_1,:), width, cut_distance, r_width, c_width) & + & * (r_width2/(r_width2 + (z1(i,i_1,1) - z1(i,j_1,1))**2) * & + & c_width2/(c_width2 + (z1(i,i_1,2) - z1(i,j_1,2))**2)) + + enddo enddo enddo !$OMP END PARALLEL DO - !$OMP PARALLEL DO PRIVATE(ni) + !$OMP PARALLEL DO PRIVATE(ni) REDUCTION(+:selfl22) do i = 1, nm2 ni = n2(i) do i_1 = 1, ni - selfl22(i,i_1) = atomic_distl2(q2(i,i_1,:,:), q2(i,i_1,:,:), n2(i), n2(i), & - & sin2(i,i_1,:), sin2(i,i_1,:), width, cut_distance, r_width, c_width) + do j_1 = 1, ni + + selfl22(i) = selfl22(i) + atomic_distl2(q2(i,i_1,:,:), q2(i,j_1,:,:), n2(i), n2(i), & + & sin2(i,i_1,:), sin2(i,j_1,:), width, cut_distance, r_width, c_width) & + &* (r_width2/(r_width2 + (z2(i,i_1,1) - z2(i,j_1,1))**2) * & + & c_width2/(c_width2 + (z2(i,i_1,2) - z2(i,j_1,2))**2)) + + + enddo enddo enddo !$OMP END PARALLEL DO @@ -911,7 +940,7 @@ subroutine fget_global_kernels_arad(q1, q2, z1, z2, n1, n2, sigmas, nm1, nm2, ns kernels(:,:,:) = 0.0d0 atomic_distance(:,:) = 0.0d0 - !$OMP PARALLEL DO PRIVATE(l2dist,atomic_distance,ni,nj) schedule(dynamic) + !$OMP PARALLEL DO PRIVATE(l2dist,atomic_distance,ni,nj,mol_dist) schedule(dynamic) do j = 1, nm2 nj = n2(j) do i = 1, nm1 @@ -925,17 +954,18 @@ subroutine fget_global_kernels_arad(q1, q2, z1, z2, n1, n2, sigmas, nm1, nm2, ns l2dist = atomic_distl2(q1(i,i_1,:,:), q2(j,j_1,:,:), n1(i), n2(j), & & sin1(i,i_1,:), sin2(j,j_1,:), width, cut_distance, r_width, c_width) - l2dist = selfl21(i,i_1) + selfl22(j,j_1) - 2.0d0 * l2dist & - & * (r_width2/(r_width2 + (z1(i,i_1,1) - z2(j,j_1,1))**2) * & - & c_width2/(c_width2 + (z1(i,i_1,2) - z2(j,j_1,2))**2)) + L2dist = l2dist * (r_width2/(r_width2 + (z1(i,i_1,1) - z2(j,j_1,1))**2) * & + & c_width2/(c_width2 + (z1(i,i_1,2) - z2(j,j_1,2))**2)) atomic_distance(i_1,j_1) = l2dist enddo enddo + mol_dist = selfl21(i) + selfl22(j) - 2.0d0 * sum(atomic_distance(:ni,:nj)) + do k = 1, nsigmas - kernels(k, i, j) = exp(sum(atomic_distance(:ni,:nj)) * inv_sigma2(k)) + kernels(k, i, j) = exp(mol_dist * inv_sigma2(k)) enddo enddo