zaspel · Sep 10, 2018
diff --git a/‎docs/source/qml.rst
+11 b/‎docs/source/qml.rst
+11
diff --git a/‎examples/qmlearn.py
+287 b/‎examples/qmlearn.py
+287
diff --git a/‎qml/__init__.py
+2 b/‎qml/__init__.py
+2
diff --git a/‎qml/aglaia/aglaia.py
+43-43 b/‎qml/aglaia/aglaia.py
+43-43
diff --git a/‎qml/arad/arad.py
+1-1 b/‎qml/arad/arad.py
+1-1
diff --git a/‎qml/data/__init__.py
-1 b/‎qml/data/__init__.py
-1
diff --git a/‎qml/data/compound.py
+2-2 b/‎qml/data/compound.py
+2-2
diff --git a/‎qml/data/dataprovider.py
-1 b/‎qml/data/dataprovider.py
-1
diff --git a/‎qml/data/xyzdataprovider.py
-1 b/‎qml/data/xyzdataprovider.py
-1
diff --git a/‎qml/fchl/fchl_electric_field_kernels.py
+1-1 b/‎qml/fchl/fchl_electric_field_kernels.py
+1-1
diff --git a/‎qml/fchl/fchl_force_kernels.py
+1-1 b/‎qml/fchl/fchl_force_kernels.py
+1-1
diff --git a/‎qml/fchl/fchl_kernels.py
+1-1 b/‎qml/fchl/fchl_kernels.py
+1-1
diff --git a/‎qml/fchl/fchl_representations.py
+2-2 b/‎qml/fchl/fchl_representations.py
+2-2
diff --git a/‎qml/fchl/fchl_scalar_kernels.py
+1-1 b/‎qml/fchl/fchl_scalar_kernels.py
+1-1
diff --git a/‎qml/kernels/kernels.py
+3-2 b/‎qml/kernels/kernels.py
+3-2
diff --git a/‎qml/models/kernelridge.py
-2 b/‎qml/models/kernelridge.py
-2
diff --git a/‎qml/qmlearn/__init__.py
+27 b/‎qml/qmlearn/__init__.py
+27
diff --git a/‎qml/qmlearn/data.py
+139 b/‎qml/qmlearn/data.py
+139
diff --git a/‎qml/qmlearn/kernels.py
+845 b/‎qml/qmlearn/kernels.py
+845
diff --git a/‎qml/qmlearn/models.py
+161 b/‎qml/qmlearn/models.py
+161
diff --git a/‎qml/qmlearn/preprocessing.py
+237 b/‎qml/qmlearn/preprocessing.py
+237
diff --git a/‎qml/qmlearn/representations.py
+800 b/‎qml/qmlearn/representations.py
+800
diff --git a/‎qml/representations/representations.py
+1-1 b/‎qml/representations/representations.py
+1-1
diff --git a/‎qml/utils/__init__.py
+25 b/‎qml/utils/__init__.py
+25
diff --git a/‎qml/data/alchemy.py ‎qml/utils/alchemy.py b/‎qml/data/alchemy.py ‎qml/utils/alchemy.py
diff --git a/‎qml/aglaia/utils.py ‎qml/utils/utils.py
+25-17 b/‎qml/aglaia/utils.py ‎qml/utils/utils.py
+25-17
diff --git a/‎setup.py
+3-1 b/‎setup.py
+3-1
diff --git a/‎test/test_armp.py
+1-1 b/‎test/test_armp.py
+1-1
diff --git a/‎test/test_mrmp.py
+1-1 b/‎test/test_mrmp.py
+1-1
diff --git a/‎test/test_neural_network.py
+1-1 b/‎test/test_neural_network.py
+1-1
@@ -113,3 +113,14 @@ qml\.aglaia module
    :inherited-members:
 
 
+qml\.qmlearn.representations module
+------------------
+
+.. automodule:: qml.qmlearn.representations
+   :inherited-members:
+
+qml\.qmlearn.kernels module
+------------------
+
+.. automodule:: qml.qmlearn.kernels
+   :inherited-members:
@@ -0,0 +1,287 @@
+import glob
+import numpy as np
+from qml import qmlearn
+import sklearn.pipeline
+import sklearn.model_selection
+
+def data():
+    """
+    Using the Data object.
+    """
+    print("*** Begin data examples ***")
+
+    # The Data object has the same role as the Compound class.
+    # Where the Compound class is for one compound, the Data class
+    # Is for multiple
+
+    # One can load in a set of xyz files
+    filenames = sorted(glob.glob("../test/qm7/00*.xyz"))
+    data = qmlearn.Data(filenames)
+    print("length of filenames", len(filenames))
+    print("length of nuclear_charges", len(data.nuclear_charges))
+    print("length of coordinates", len(data.coordinates))
+
+    # Or just load a glob string
+    data = qmlearn.Data("../test/qm7/00*.xyz")
+    print("length of nuclear_charges", len(data.nuclear_charges))
+
+    # Energies (or other molecular properties) can be stored in the object
+    energies = np.loadtxt("../test/data/hof_qm7.txt", usecols=1)[:98]
+    data.set_energies(energies)
+    print("length of energies", len(data.energies))
+
+    print("*** End data examples ***")
+    print()
+
+def preprocessing():
+    """
+    Rescaling energies
+    """
+
+    print("*** Begin preprocessing examples ***")
+
+    # The AtomScaler object does a linear fit of the number of each element to the energy.
+    data = qmlearn.Data("../test/qm7/*.xyz")
+    energies = np.loadtxt("../test/data/hof_qm7.txt", usecols=1)
+
+    # Input can be nuclear_charges and energies
+    print("Energies before rescaling", energies[:3])
+    rescaled_energies = qmlearn.preprocessing.AtomScaler().fit_transform(data.nuclear_charges, energies)
+    print("Energies after rescaling", rescaled_energies[:3])
+
+    # Or a data object can be used
+    data.set_energies(energies)
+    data2 = qmlearn.preprocessing.AtomScaler().fit_transform(data)
+    print("Energies after rescaling", data2.energies[:3])
+
+    print("*** End preprocessing examples ***")
+    print()
+
+def representations():
+    """
+    Creating representations. Currently implemented representations are
+    CoulombMatrix, AtomicCoulombMatrix, AtomicSLATM, GlobalSLATM,
+    FCHLRepresentations, AtomCenteredSymmetryFunctions. 
+    (BagOfBonds is still missing)
+    """
+
+    print("*** Begin representations examples ***")
+
+    data = qmlearn.Data("../test/qm7/*.xyz")
+
+    # Representations can be created from a data object
+    model = qmlearn.representations.CoulombMatrix(sorting ='row-norm')
+    representations = model.generate(data)
+    print("Shape of representations:", representations.shape)
+
+    # Alternatively the data object can be passed at initialization of the representation class
+    # and only select molecule indices can be parsed
+
+    model = qmlearn.representations.CoulombMatrix(data)
+    representations = model.generate([0,5,7,16])
+    print("Shape of representations:", representations.shape)
+
+    print("*** End representations examples ***")
+    print()
+
+def kernels():
+    """
+    Create kernels. Currently implemented kernels are GaussianKernel,
+    LaplacianKernel, FCHLKernel.
+    """
+
+    print("*** Begin kernels examples ***")
+
+    data = qmlearn.Data("../test/qm7/*.xyz")
+    energies = np.loadtxt("../test/data/hof_qm7.txt", usecols=1)
+    data.set_energies(energies)
+
+    # Kernels can be created from representations
+    model = qmlearn.representations.CoulombMatrix(data)
+    indices = np.arange(100)
+    representations = model.generate(indices)
+
+    model = qmlearn.kernels.GaussianKernel(sigma='auto')
+    symmetric_kernels = model.generate(representations[:80])
+    print("Shape of symmetric kernels:", symmetric_kernels.shape)
+
+    asymmetric_kernels = model.generate(representations[:80], representations[80:])
+    print("Shape of asymmetric kernels:", asymmetric_kernels.shape)
+
+    # Atomic representations can be used as well
+    model = qmlearn.representations.AtomicCoulombMatrix(data)
+    indices = np.arange(100)
+    representations = model.generate(indices)
+
+    model = qmlearn.kernels.GaussianKernel(sigma='auto')
+    symmetric_kernels = model.generate(representations[:80], representation_type = 'atomic')
+    print("Shape of symmetric kernels:", symmetric_kernels.shape)
+
+    asymmetric_kernels = model.generate(representations[:80], representations[80:], representation_type = 'atomic')
+    print("Shape of asymmetric kernels:", asymmetric_kernels.shape)
+
+    print("*** End kernels examples ***")
+    print()
+
+def models():
+    """
+    Regression models. Only KernelRidgeRegression implemented so far.
+    """
+
+    print("*** Begin models examples ***")
+
+    filenames = sorted(glob.glob("../test/qm7/*.xyz"))
+    data = qmlearn.Data(filenames)
+    energies = np.loadtxt("../test/data/hof_qm7.txt", usecols=1)
+    model = qmlearn.representations.CoulombMatrix(data)
+    # Create 1000 random indices
+    indices = np.arange(1000)
+    np.random.shuffle(indices)
+
+    representations = model.generate(indices)
+    model = qmlearn.kernels.GaussianKernel(sigma='auto')
+    symmetric_kernels = model.generate(representations[:800])
+    asymmetric_kernels = model.generate(representations[:800], representations[800:])
+
+    # Model can be fit giving kernel matrix and energies
+
+    model = qmlearn.models.KernelRidgeRegression()
+    model.fit(symmetric_kernels, energies[indices[:800]])
+    print("Fitted KRR weights:", model.alpha[:3])
+
+    # Predictions can be had from an asymmetric kernel
+    predictions = model.predict(asymmetric_kernels)
+    print("Predicted energies:", predictions[:3])
+    print("True energies:", energies[indices[:3]])
+
+    # Or the score (default negative mae) can be had directly
+    scores = model.score(asymmetric_kernels, energies[indices[800:]])
+    print("Negative MAE:", scores)
+
+    print("*** End models examples ***")
+    print()
+
+def pipelines():
+    """
+    Constructing scikit-learn pipelines
+    """
+
+    print("*** Begin pipelines examples ***")
+
+    # It is much easier to do all this with a scikit-learn pipeline
+
+    # Create data
+    data = qmlearn.Data("../test/qm7/*.xyz")
+    energies = np.loadtxt("../test/data/hof_qm7.txt", usecols=1)
+    data.set_energies(energies)
+
+    # Create model
+    model = sklearn.pipeline.make_pipeline(
+            qmlearn.preprocessing.AtomScaler(data),
+            qmlearn.representations.CoulombMatrix(),
+            qmlearn.kernels.GaussianKernel(),
+            qmlearn.models.KernelRidgeRegression(),
+            )
+
+    # Create 1000 random indices
+    indices = np.arange(1000)
+    np.random.shuffle(indices)
+
+    model.fit(indices[:800])
+    scores = model.score(indices[800:])
+    print("Negative MAE:", scores)
+
+    # Passing alchemy=False to kernels makes sure that the atomic kernel only compares C to C, H to H etc.
+    # This will speed up kernels of some representations dramatically, but only works in pipelines
+
+    # Create model
+    model = sklearn.pipeline.make_pipeline(
+            qmlearn.preprocessing.AtomScaler(data),
+            qmlearn.representations.CoulombMatrix(),
+            qmlearn.kernels.GaussianKernel(alchemy=False),
+            qmlearn.models.KernelRidgeRegression(),
+            )
+
+    # Create 1000 random indices
+    indices = np.arange(1000)
+    np.random.shuffle(indices)
+
+    model.fit(indices[:800])
+    scores = model.score(indices[800:])
+    print("Negative MAE without alchemy:", scores)
+
+    print("*** End pipelines examples ***")
+    print()
+
+def cross_validation():
+    """
+    Doing cross validation with qmlearn
+    """
+
+    print("*** Begin CV examples ***")
+
+    # Create data
+    data = qmlearn.Data("../test/qm7/*.xyz")
+    energies = np.loadtxt("../test/data/hof_qm7.txt", usecols=1)
+    data.set_energies(energies)
+
+    # Create model
+    model = sklearn.pipeline.make_pipeline(
+            qmlearn.preprocessing.AtomScaler(data),
+            qmlearn.representations.CoulombMatrix(),
+            qmlearn.kernels.GaussianKernel(),
+            qmlearn.models.KernelRidgeRegression(),
+            # memory='/dev/shm/' ### This will cache the previous steps to the virtual memory and might speed up gridsearch
+            )
+
+    # Create 1000 random indices
+    indices = np.arange(1000)
+    np.random.shuffle(indices)
+
+    # 3-fold CV of a given model can easily be done
+    scores = sklearn.model_selection.cross_validate(model, indices, cv=3)
+    print("Cross-validated scores:", scores['test_score'])
+
+    # Doing a grid search over hyper parameters
+    params = {'gaussiankernel__sigma': [10, 30, 100],
+              'kernelridgeregression__l2_reg': [1e-8, 1e-4],
+             }
+
+    grid = sklearn.model_selection.GridSearchCV(model, cv=3, refit=False, param_grid=params)
+    grid.fit(indices)
+    print("Best hyper parameters:", grid.best_params_)
+    print("Best score:", grid.best_score_)
+
+    # As an alternative the pipeline can be constructed slightly different, which allows more complex CV
+    # Create model
+    model = sklearn.pipeline.Pipeline([
+            ('preprocess', qmlearn.preprocessing.AtomScaler(data)),
+            ('representations', qmlearn.representations.CoulombMatrix()),
+            ('kernel', qmlearn.kernels.GaussianKernel()),
+            ('model', qmlearn.models.KernelRidgeRegression())
+            ],
+            # memory='/dev/shm/' ### This will cache the previous steps to the virtual memory and might speed up gridsearch
+            )
+
+    # Doing a grid search over hyper parameters
+    # including which kernel to use
+    params = {'kernel': [qmlearn.kernels.LaplacianKernel(), qmlearn.kernels.GaussianKernel()],
+              'kernel__sigma': [10, 30, 100, 1000, 3000, 1000],
+              'model__l2_reg': [1e-8, 1e-4],
+             }
+
+    grid = sklearn.model_selection.GridSearchCV(model, cv=3, refit=False, param_grid=params)
+    grid.fit(indices)
+    print("Best hyper parameters:", grid.best_params_)
+    print("Best score:", grid.best_score_)
+
+    print("*** End CV examples ***")
+
+if __name__ == '__main__':
+    data()
+    preprocessing()
+    representations()
+    kernels()
+    models()
+    pipelines()
+    cross_validation()
@@ -40,6 +40,8 @@
 from . import arad
 from . import fchl
 from . import representations
+from . import qmlearn
+from . import utils
 
 __author__ = "Anders S. Christensen"
 __copyright__ = "Copyright 2016"
 
@@ -30,13 +30,13 @@
 import tensorflow as tf
 from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
 from sklearn.base import BaseEstimator
-from qml.aglaia.symm_funct import generate_parkhill_acsf
-from qml.aglaia.utils import InputError, ceil, is_positive_or_zero, is_positive_integer, is_positive, \
-        is_bool, is_positive_integer_or_zero, is_string, is_positive_integer_array, is_array_like, is_none, \
+from .symm_funct import generate_parkhill_acsf
+from ..utils import InputError, ceil, is_positive_or_zero, is_positive_integer, is_positive, \
+        is_bool, is_positive_integer_or_zero, is_string, is_positive_integer_array, is_array_like, \
         check_global_representation, check_y, check_sizes, check_dy, check_classes, is_numeric_array, is_non_zero_integer, \
     is_positive_integer_or_zero_array, check_local_representation
 
-from qml.aglaia.tf_utils import TensorBoardLogger
+from .tf_utils import TensorBoardLogger
 
 try:
     from qml.data import Compound
@@ -580,7 +580,7 @@ def _set_slatm_parameters(self, params):
         self.slatm_parameters = {'slatm_sigma1': 0.05, 'slatm_sigma2': 0.05, 'slatm_dgrid1': 0.03, 'slatm_dgrid2': 0.03,
                                  'slatm_rcut': 4.8, 'slatm_rpower': 6, 'slatm_alchemy': False}
 
-        if not is_none(params):
+        if params is not None:
             for key, value in params.items():
                 if key in self.slatm_parameters:
                     self.slatm_parameters[key] = value
@@ -597,7 +597,7 @@ def _set_acsf_parameters(self, params):
         self.acsf_parameters = {'radial_cutoff': 10.0, 'angular_cutoff': 10.0, 'radial_rs': (0.0, 0.1, 0.2),
                                 'angular_rs': (0.0, 0.1, 0.2), 'theta_s': (3.0, 2.0), 'zeta': 3.0, 'eta': 2.0}
 
-        if not is_none(params):
+        if params is not None:
             for key, value in params.items():
                 if key in self.acsf_parameters:
                     self.acsf_parameters[key] = value
@@ -658,7 +658,7 @@ def generate_compounds(self, filenames):
         """
 
         # Check that the number of properties match the number of compounds if the properties have already been set
-        if is_none(self.properties):
+        if self.properties is None:
             pass
         else:
             if self.properties.size == len(filenames):
@@ -683,18 +683,18 @@ def generate_representation(self, xyz=None, classes=None):
         :return: None
         """
 
-        if is_none(self.compounds) and is_none(xyz) and is_none(classes):
+        if self.compounds is None and xyz is None and classes is None:
             raise InputError("QML compounds need to be created in advance or Cartesian coordinates need to be passed in "
                              "order to generate the representation.")
 
-        if not is_none(self.representation):
+        if self.representation is not None:
             raise InputError("The representations have already been set!")
 
-        if is_none(self.compounds):
+        if self.compounds is None:
 
             self.representation, self.classes = self._generate_representations_from_data(xyz, classes)
 
-        elif is_none(xyz):
+        elif xyz is None:
             # Make representations from compounds
 
             self.representation, self.classes = self._generate_representations_from_compounds()
@@ -708,7 +708,7 @@ def set_properties(self, properties):
         :param y: array of properties of size (nsamples,)
         :type y: array
         """
-        if is_none(properties):
+        if properties is None:
             raise InputError("Properties cannot be set to none.")
         else:
             if is_numeric_array(properties) and np.asarray(properties).ndim == 1:
@@ -725,10 +725,10 @@ def set_representations(self, representations):
         :type representations: numpy array of shape (n_samples, n_features) or (n_samples, n_atoms, n_features)
         """
 
-        if not is_none(self.representation):
+        if self.representation is not None:
             raise InputError("The representations have already been set!")
 
-        if is_none(representations):
+        if representations is None:
             raise InputError("Descriptor cannot be set to none.")
         else:
             if is_numeric_array(representations):
@@ -745,7 +745,7 @@ def set_gradients(self, gradients):
         :return: None
         """
 
-        if is_none(gradients):
+        if gradients is None:
             raise InputError("Gradients cannot be set to none.")
         else:
             if is_numeric_array(gradients):
@@ -762,7 +762,7 @@ def set_classes(self, classes):
         :type classes: numpy array of shape (n_samples, n_atoms) of ints
         :return: None
         """
-        if is_none(classes):
+        if classes is None:
             raise InputError("Classes cannot be set to none.")
         else:
             if is_positive_integer_array(classes):
@@ -1050,7 +1050,7 @@ def _initialise_representation(self, representation, parameters):
             raise InputError("Unknown representation %s" % representation)
         self.representation_name = representation.lower()
 
-        if not is_none(parameters):
+        if parameters is not None:
             if not type(parameters) is dict:
                 raise InputError("The representation parameters passed should be either None or a dictionary.")
 
@@ -1060,7 +1060,7 @@ def _initialise_representation(self, representation, parameters):
 
         else:
 
-            if not is_none(parameters):
+            if parameters is not None:
                 raise InputError("The representation %s does not take any additional parameters." % (self.representation_name))
 
     def _set_representation(self, representation):
@@ -1098,7 +1098,7 @@ def _generate_representations_from_compounds(self):
         :rtype: numpy array of shape (n_samples, n_features) and None
         """
 
-        if is_none(self.compounds):
+        if self.compounds is None:
             raise InputError("This should never happen.")
 
         n_samples = len(self.compounds)
@@ -1368,18 +1368,18 @@ def _check_inputs(self, x, y, dy, classes):
         if not is_array_like(x):
             raise InputError("x should be an array either containing indices or data.")
 
-        if not is_none(dy) and not is_none(classes):
+        if dy is not None and classes is not None:
             raise InputError("MRMP estimator cannot predict gradients and do atomic decomposition.")
 
         # Check if x is made up of indices or data
         if is_positive_integer_or_zero_array(x):
 
-            if is_none(self.representation):
-                if is_none(self.compounds):
+            if self.representation is None:
+                if self.compounds is None:
                     raise InputError("No representations or QML compounds have been set yet.")
                 else:
                     self.representation, _ = self._generate_representations_from_compounds()
-            if is_none(self.properties):
+            if self.properties is None:
                 raise InputError("The properties need to be set in advance.")
 
             approved_x = self.representation[x]
@@ -1391,7 +1391,7 @@ def _check_inputs(self, x, y, dy, classes):
 
         else:
 
-            if is_none(y):
+            if y is None:
                 raise InputError("y cannot be of None type.")
 
             approved_x = check_global_representation(x)
@@ -1420,18 +1420,18 @@ def _check_predict_input(self, x, classes):
         if not is_array_like(x):
             raise InputError("x should be an array either containing indices or data.")
 
-        if not is_none(classes):
+        if classes is not None:
             raise InputError("MRMP estimator cannot do atomic decomposition.")
 
         # Check if x is made up of indices or data
         if is_positive_integer_or_zero_array(x):
 
-            if is_none(self.representation):
-                if is_none(self.compounds):
+            if self.representation is None:
+                if self.compounds is None:
                     raise InputError("No representations or QML compounds have been set yet.")
                 else:
                     self.representation, _ = self._generate_representations_from_compounds()
-            if is_none(self.properties):
+            if self.properties is None:
                 raise InputError("The properties need to be set in advance.")
 
             approved_x = self.representation[x]
@@ -1586,7 +1586,7 @@ def _initialise_representation(self, representation, parameters):
             raise InputError("Unknown representation %s" % representation)
         self.representation_name = representation.lower()
 
-        if not is_none(parameters):
+        if parameters is not None:
             if not type(parameters) is dict:
                 raise InputError("The representation parameters passed should be either None or a dictionary.")
             self._check_representation_parameters(parameters)
@@ -1601,7 +1601,7 @@ def _initialise_representation(self, representation, parameters):
 
         else:
 
-            if not is_none(parameters):
+            if parameters is not None:
                 raise InputError("The representation %s does not take any additional parameters." % (self.representation_name))
 
     def _set_representation(self, representation):
@@ -1624,7 +1624,7 @@ def _generate_representations_from_data(self, xyz, classes):
         :rtype: numpy arrays of shape (n_samples, n_atoms, n_features) and (n_samples, n_atoms)
         """
 
-        if is_none(classes):
+        if classes is None:
             raise InputError("The classes need to be provided for the ARMP estimator.")
         else:
             if len(classes.shape) > 2 or np.all(xyz.shape[:2] != classes.shape):
@@ -1743,7 +1743,7 @@ def _generate_representations_from_compounds(self):
         :rtype: numpy array of shape (n_samples, n_atoms, n_features) and (n_samples, n_atoms)
         """
 
-        if is_none(self.compounds):
+        if self.compounds is None:
             raise InputError("QML compounds needs to be created in advance")
 
         if self.representation_name == 'slatm':
@@ -2028,22 +2028,22 @@ def _check_inputs(self, x, y, dy, classes):
         if not is_array_like(x):
             raise InputError("x should be an array either containing indices or data.")
 
-        if not is_none(dy):
+        if dy is not None:
             raise InputError("ARMP estimator cannot be used to predict gradients. Use ARMP_G estimator.")
 
         # Check if x is made up of indices or data
         if is_positive_integer_or_zero_array(x):
 
-            if is_none(self.representation):
+            if self.representation is None:
 
-                if is_none(self.compounds):
+                if self.compounds is None:
                     raise InputError("No representations or QML compounds have been set yet.")
                 else:
                     self.representation, self.classes = self._generate_representations_from_compounds()
 
-            if is_none(self.properties):
+            if self.properties is None:
                 raise InputError("The properties need to be set in advance.")
-            if is_none(self.classes):
+            if self.classes is None:
                 raise InputError("The classes need to be set in advance.")
 
             approved_x = self.representation[x]
@@ -2055,9 +2055,9 @@ def _check_inputs(self, x, y, dy, classes):
 
         else:
 
-            if is_none(y):
+            if y is None:
                 raise InputError("y cannot be of None type.")
-            if is_none(classes):
+            if classes is None:
                 raise InputError("ARMP estimator needs the classes to do atomic decomposition.")
 
             approved_x = check_local_representation(x)
@@ -2089,12 +2089,12 @@ def _check_predict_input(self, x, classes):
         # Check if x is made up of indices or data
         if is_positive_integer_or_zero_array(x):
 
-            if is_none(self.representation):
-                if is_none(self.compounds):
+            if self.representation is None:
+                if self.compounds is None:
                     raise InputError("No representations or QML compounds have been set yet.")
                 else:
                     self.representation, self.classes = self._generate_representations_from_compounds()
-            if is_none(self.properties):
+            if self.properties is None:
                 raise InputError("The properties need to be set in advance.")
 
             approved_x = self.representation[x]
@@ -2104,7 +2104,7 @@ def _check_predict_input(self, x, classes):
 
         else:
 
-            if is_none(classes):
+            if classes is None:
                 raise InputError("ARMP estimator needs the classes to do atomic decomposition.")
 
             approved_x = check_local_representation(x)
 
@@ -33,7 +33,7 @@
 from .farad_kernels import fget_atomic_kernels_arad
 from .farad_kernels import fget_atomic_symmetric_kernels_arad
 
-from qml.data.alchemy import PTP
+from qml.utils.alchemy import PTP
 
 def getAngle(sp,norms):
     epsilon = 10.* np.finfo(float).eps
 
@@ -23,4 +23,3 @@
 
 from .xyzdataprovider import XYZDataProvider
 from .compound import Compound
-from .alchemy import ELEMENT_NAME, NUCLEAR_CHARGE
@@ -1,6 +1,6 @@
 # MIT License
 #
-# Copyright (c) 2016-2017 Anders Steen Christensen, Felix Faber, Lars Andersen Bratholm
+# Copyright (c) 2016-2018 Anders Steen Christensen, Felix Faber, Lars Andersen Bratholm
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -25,7 +25,7 @@
 import numpy as np
 import collections
 
-from .alchemy import NUCLEAR_CHARGE
+from ..utils import NUCLEAR_CHARGE
 
 from ..representations import generate_coulomb_matrix
 from ..representations import generate_atomic_coulomb_matrix
 
@@ -44,4 +44,3 @@ def get_properties(self, idx=None):
     def read_database(self, db_filename):
 
         self.compounds = connect(db_filename)
-
@@ -40,4 +40,3 @@ def add_structures(self, xyz_filenames):
             print(i, xyz_filename, self.properties[i])
             compound = read(xyz_filename)
             self.compounds.write(compound)
- 
@@ -31,7 +31,7 @@
 
 from .fchl_kernel_functions import get_kernel_parameters
 
-from qml.data.alchemy import get_alchemy
+from qml.utils.alchemy import get_alchemy
 
 
 # def get_local_kernels_ef(A, B, verbose=False, df=0.01, ef_scaling=0.01,\
 
@@ -37,7 +37,7 @@
 
 from .fchl_kernel_functions import get_kernel_parameters
 
-from qml.data.alchemy import get_alchemy
+from qml.utils.alchemy import get_alchemy
 
 
 def get_gaussian_process_kernels(A, B, verbose=False, dx=0.005, \
 
@@ -52,7 +52,7 @@
 
 from .fchl_kernel_functions import get_kernel_parameters
 
-from qml.data.alchemy import get_alchemy
+from qml.utils.alchemy import get_alchemy
 
 
 def get_local_kernels(A, B, \
 
@@ -25,8 +25,8 @@
 import numpy as np
 import copy
 
-from qml.data.alchemy import get_alchemy
-from qml.data.alchemy import ELEMENT_NAME
+from qml.utils.alchemy import get_alchemy
+from qml.utils import ELEMENT_NAME
 
 def generate_representation(coordinates, nuclear_charges,
         max_size=23, neighbors=23, cut_distance = 5.0, cell=None):
 
@@ -36,7 +36,7 @@
 from .ffchl_module import fget_atomic_local_kernels_fchl
 
 from .fchl_kernel_functions import get_kernel_parameters
-from qml.data.alchemy import get_alchemy
+from qml.utils.alchemy import get_alchemy
 
 
 def get_local_kernels(A, B, verbose=False,\
 
@@ -24,7 +24,7 @@
 
 import numpy as np
 
-from .fkernels import fgaussian_kernel
+from .fkernels import fgaussian_kernel, fgaussian_kernel_symmetric
 from .fkernels import flaplacian_kernel
 from .fkernels import fgaussian_kernel_symmetric
 from .fkernels import flaplacian_kernel_symmetric
@@ -34,6 +34,7 @@
 
 from .fkernels import fget_local_kernels_gaussian
 from .fkernels import fget_local_kernels_laplacian
+from .fkernels import fget_vector_kernels_gaussian, fget_vector_kernels_gaussian_symmetric
 
 def laplacian_kernel(A, B, sigma):
     """ Calculates the Laplacian kernel matrix K, where :math:`K_{ij}`:
@@ -304,7 +305,7 @@ def get_local_kernels_gaussian(A, B, na, nb, sigmas):
 
     nma = len(na)
     nmb = len(nb)
-     
+
     sigmas = np.asarray(sigmas)
     nsigmas = len(sigmas)
 
 
@@ -110,5 +110,3 @@ def _save(self, path, save_kernel=False):
         if save_kernel:
             np.save(path + "/K.npy")
 
-        
-
@@ -0,0 +1,27 @@
+# MIT License
+#
+# Copyright (c) 2018 Lars Andersen Bratholm
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from .data import Data
+from . import representations
+from . import kernels
+from . import preprocessing
+from . import models
@@ -0,0 +1,139 @@
+
+from __future__ import print_function
+
+import glob
+import numpy as np
+from ..utils import NUCLEAR_CHARGE
+import copy
+
+
+class Data(object):
+    """
+    Temporary data class which should be replaced at some point by the ASE-interface.
+    This could in principle also be replaced by a dictionary
+
+    """
+
+    def __init__(self, filenames=None, property_type = "energy"):
+        """
+        :param filenames: list of filenames or a string to be read by glob. e.g. 'dir/*.xyz'
+        :type filenames: list or string
+        :param property_type: What kind of property will be predicted ('energy')
+        :type property_type: string
+        """
+
+        self.property_type = property_type
+
+        self._set_ncompounds(0)
+        self.coordinates = None
+        self.nuclear_charges = None
+        self.natoms = None
+        self.energies = None
+
+        if isinstance(filenames, str):
+            filenames = sorted(glob.glob(filenames))
+        if isinstance(filenames, list):
+            self._parse_xyz_files(filenames)
+        # Overwritten in various parts of a standard prediction pipeline
+        # so don't use these within the class
+        #self._has_transformed_labels
+        #self._representations
+        #self._kernel
+        #self._indices
+        #self._representation_type
+        #self._representation_short_name
+        #self._representation_cutoff
+        #self._representation_alchemy
+
+    def _set_ncompounds(self, n):
+        self.ncompounds = n
+        # Hack for sklearn CV
+        self.shape = (n,)
+
+    def take(self, i, axis=None):
+        """
+        Hack for sklearn CV
+        """
+        other = copy.copy(self)
+        other._indices = i
+        return other
+
+    # Hack for sklearn CV
+    def __getitem__(self, i):
+        return i
+
+    # Hack for sklearn CV but also convenience
+    def __len__(self):
+        if hasattr(self, '_indices'):
+            return len(self._indices)
+        return self.ncompounds
+
+    # Hack for sklearn CV but also convenience
+    def __eq__(self, other):
+        """
+        Overrides the == operator.
+        """
+
+        if type(self) != type(other):
+            return False
+
+        self_vars = vars(self)
+        other_vars = vars(other)
+
+        if len(self_vars) != len(other_vars):
+            return False
+
+        for key, val in self_vars.items():
+            if val is not other_vars[key]:
+                return False
+
+        return True
+
+    # Hack for sklearn CV but also convenience
+    def __ne__(self, other):
+        """
+        Overrides the != operator (unnecessary in Python 3)
+        """
+        return not self.__eq__(other)
+
+    def set_energies(self, energies):
+        self.energies = energies
+
+    def _parse_xyz_files(self, filenames):
+        """
+        Parse a list of xyz files.
+        """
+
+        self._set_ncompounds(len(filenames))
+        self.coordinates = np.empty(self.ncompounds, dtype=object)
+        self.nuclear_charges = np.empty(self.ncompounds, dtype=object)
+        self.natoms = np.empty(self.ncompounds, dtype = int)
+
+        for i, filename in enumerate(filenames):
+            with open(filename, "r") as f:
+                lines = f.readlines()
+
+            natoms = int(lines[0])
+            self.natoms[i] = natoms
+            self.nuclear_charges[i] = np.empty(natoms, dtype=int)
+            self.coordinates[i] = np.empty((natoms, 3), dtype=float)
+
+            for j, line in enumerate(lines[2:natoms+2]):
+                tokens = line.split()
+
+                if len(tokens) < 4:
+                    break
+
+                self.nuclear_charges[i][j] = NUCLEAR_CHARGE[tokens[0]]
+                self.coordinates[i][j] = np.asarray(tokens[1:4], dtype=float)
+
+        # Try to convert dtype to int/float in cases where you have the
+        # same molecule, just different conformers
+
+        try:
+            self.nuclear_charges = np.asarray([self.nuclear_charges[i] for i in range(self.ncompounds)], 
+                    dtype=int)
+            self.coordinates = np.asarray([self.coordinates[i] for i in range(self.ncompounds)],
+                    dtype=float)
+        except ValueError:
+            pass
@@ -0,0 +1,161 @@
+# MIT License
+#
+# Copyright (c) 2018 Lars A. Bratholm
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import division, absolute_import, print_function
+
+import numpy as np
+from sklearn.base import BaseEstimator
+from sklearn.metrics import mean_absolute_error
+
+from ..utils import is_numeric_array
+from .data import Data
+from ..math import cho_solve
+
+class _BaseModel(BaseEstimator):
+    """
+    Base class for all regression models
+    """
+
+    _estimator_type = "regressor"
+
+    def fit(self, X):
+        raise NotImplementedError
+
+    def predict(self, X):
+        return NotImplementedError
+
+    def score(self, X, y=None):
+        """
+        Make predictions on `X` and return a score
+
+        :param X: Data object
+        :type X: object
+        :param y: Energies
+        :type y: array
+        :return: score
+        :rtype: float
+        """
+
+        # Make predictions
+        y_pred = self.predict(X)
+
+        # Get the true values
+        if is_numeric_array(y):
+            pass
+
+        elif isinstance(X, Data):
+            try:
+                y = X.energies[X._indices]
+            except:
+                print("No kernel energies found in data object in module %s" % self.__class__.__name__)
+                raise SystemExit
+
+        else:
+            print("Expected variable 'X' to be Data object. Got %s" % str(X))
+            raise SystemExit
+
+        # Return the score
+        if self.scoring == 'mae':
+            return mean_absolute_error(y, y_pred)
+        elif self.scoring == 'neg_mae':
+            return - mean_absolute_error(y, y_pred)
+        elif self.scoring == 'rmsd':
+            return np.sqrt(mean_squared_error(y, y_pred))
+        elif self.scoring == 'neg_rmsd':
+            return - np.sqrt(mean_squared_error(y, y_pred))
+        elif self.scoring == 'neg_log_mae':
+            return - np.log(mean_absolute_error(y, y_pred))
+
+class KernelRidgeRegression(_BaseModel):
+    """
+    Standard Kernel Ridge Regression using a cholesky solver
+    """
+
+    def __init__(self, l2_reg=1e-10, scoring='neg_mae'):
+        """
+        :param llambda: l2 regularization
+        :type llambda: float
+        :param scoring: Metric used for scoring ('mae', 'neg_mae', 'rmsd', 'neg_rmsd', 'neg_log_mae')
+        :type scoring: string
+        """
+        self.l2_reg = l2_reg
+        self.scoring = scoring
+
+        self.alpha = None
+
+    def fit(self, X, y=None):
+        """
+        Fit the Kernel Ridge Regression model using a cholesky solver
+
+        :param X: Data object or kernel
+        :type X: object or array
+        :param y: Energies
+        :type y: array
+        """
+
+        if isinstance(X, Data):
+            try:
+                K, y = X._kernel, X.energies[X._indices]
+            except:
+                print("No kernel matrix and/or energies found in data object in module %s" % self.__class__.__name__)
+                raise SystemExit
+        elif is_numeric_array(X) and X.ndim == 2 and X.shape[0] == X.shape[1] and y is not None:
+            K = X
+        else:
+            print("Expected variable 'X' to be kernel matrix or Data object. Got %s" % str(X))
+            raise SystemExit
+
+
+        K[np.diag_indices_from(K)] += self.l2_reg
+
+        self.alpha = cho_solve(K, y)
+
+    def predict(self, X):
+        """
+        Fit the Kernel Ridge Regression model using a cholesky solver
+
+        :param X: Data object
+        :type X: object
+        :param y: Energies
+        :type y: array
+        """
+
+        # Check if model has been fit
+        if self.alpha is None:
+            print("Error: The %s model has not been trained yet" % self.__class__.__name__)
+            raise SystemExit
+
+        if isinstance(X, Data):
+            try:
+                K = X._kernel
+            except:
+                print("No kernel matrix found in data object in module %s" % self.__class__.__name__)
+                raise SystemExit
+        elif is_numeric_array(X) and X.ndim == 2 and X.shape[1] == self.alpha.size:
+            K = X
+        elif is_numeric_array(X) and X.ndim == 2 and X.shape[0] == self.alpha.size:
+            K = X.T
+        else:
+            print("Expected variable 'X' to be kernel matrix or Data object. Got %s" % str(X))
+            raise SystemExit
+
+        return np.dot(K, self.alpha)
@@ -0,0 +1,237 @@
+# MIT License
+#
+# Copyright (c) 2018 Lars Andersen Bratholm
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import print_function
+
+import copy
+
+import numpy as np
+from sklearn.base import BaseEstimator
+from sklearn.linear_model import LinearRegression
+
+from .data import Data
+from ..utils import is_numeric_array, get_unique, is_positive_integer_or_zero_array
+
+class AtomScaler(BaseEstimator):
+    """
+    Subtracts any constant offset or linear dependency on the number of atoms of each element from the property
+    """
+
+    def __init__(self, data=None, elements='auto'):
+        """
+        :param data: Data object (optional)
+        :type data: Data object
+        :param elements: Elements to support. If `elements='auto'` try to determine this automatically.
+        :type elements: array
+        :param normalize: Normalize the transformed data such that the standard deviation is 1
+        :type normalize: bool
+        """
+        # Shallow copy should be fine
+        self._set_data(data)
+        self.elements = elements
+
+        # Initialize model
+        self.model = LinearRegression()
+
+    def _preprocess_input(self, X):
+        """
+        Convenience function that processes X in a way such that
+        X can both be a data object, or an array of indices. And y
+        can be either values to transform or None.
+
+        :param X: Data object, floating values or integer array of indices
+        :type X: Data object or array
+        :param y: Values or None
+        :type y: array or None
+        :return: Nuclear charges and values to transform
+        :rtype: tuple
+        """
+
+        if isinstance(X, Data):
+
+            self._check_data(X)
+
+            data = copy.copy(X)
+
+            # Part of the sklearn CV hack.
+            if not hasattr(data, '_indices'):
+                data._indices = np.arange(len(data))
+
+            if hasattr(data, '_has_transformed_labels'):
+                print("Error: Target data has already been transformed by %s" % self.__class__.__name__)
+                raise SystemExit
+
+            transformed_labels = np.zeros(len(data), dtype=bool)
+            transformed_labels[data._indices] = True
+            data._has_transformed_labels = transformed_labels
+
+        elif self.data and is_positive_integer_or_zero_array(X) \
+                and max(X) <= self.data.natoms.size:
+            # A copy here might avoid some unintended behaviour
+            # if multiple models is used sequentially.
+            data = copy.copy(self.data)
+            data._indices = np.asarray(X, dtype=int).ravel()
+
+
+            if hasattr(data, '_has_transformed_labels'):
+                if any(data._has_transformed_labels[data._indices] == True):
+                    print("Error: Target data has already been transformed by %s" % self.__class__.__name__)
+                    raise SystemExit
+                data._has_transformed_labels[data._indices] = True
+            else:
+                transformed_labels = np.zeros(len(data.energies), dtype=bool)
+                transformed_labels[data._indices] = True
+                data._has_transformed_labels = transformed_labels
+
+        else:
+            print("Expected X to be array of indices or Data object. Got %s" % str(X))
+            raise SystemExit
+
+        return data
+
+    def _check_data(self, X):
+        if X.natoms is None:
+            print("Error: Empty Data object passed to the %s transformer" % self.__class__.__name__)
+            raise SystemExit
+
+        if X.energies is None:
+            print("Error: Expected Data object to have non-empty attribute 'energies'" % self.__class__.__name__)
+            raise SystemExit
+
+
+    def _set_data(self, data):
+        if data:
+            self._check_data(data)
+        self.data = data
+
+    def fit_transform(self, X, y=None):
+        """
+        Fit and transform the data with a linear model.
+        Supports three different types of input.
+        1) X is a list of nuclear charges and y is values to transform.
+        2) X is an array of indices of which to transform.
+        3) X is a data object
+
+        :param X: List with nuclear charges or Data object.
+        :type X: list
+        :param y: Values to transform
+        :type y: array or None
+        :return: Array of transformed values or Data object, depending on input
+        :rtype: array or Data object
+        """
+
+        if not isinstance(X, Data) and y is not None:
+            data = None
+            nuclear_charges = X
+        else:
+            data = self._preprocess_input(X)
+            nuclear_charges = data.nuclear_charges[data._indices]
+            y = data.energies[data._indices]
+
+        if self.elements == 'auto':
+            self.elements = get_unique(nuclear_charges)
+        else:
+            self._check_elements(nuclear_charges)
+
+
+        features = self._featurizer(nuclear_charges)
+
+        delta_y = y - self.model.fit(features, y).predict(features)
+
+        if data:
+            # Force copy
+            data.energies = data.energies.copy()
+            data.energies[data._indices] = delta_y
+            return data
+        else:
+            return delta_y
+
+    def _check_elements(self, nuclear_charges):
+        """
+        Check that the elements in the given nuclear_charges was
+        included in the fit.
+        """
+
+        elements_transform = get_unique(nuclear_charges)
+        if not np.isin(elements_transform, self.elements).all():
+            print("Warning: Trying to transform molecules with elements",
+                  "not included during fit in the %s method." % self.__class__.__name__,
+                  "%s used in training but trying to transform %s" % (str(self.elements), str(elements_transform)))
+
+    def _featurizer(self, X):
+        """
+        Get the counts of each element as features.
+        """
+
+        n = len(X)
+        m = len(self.elements)
+        element_to_index = {v:i for i, v in enumerate(self.elements)}
+        features = np.zeros((n,m), dtype=int)
+
+        for i, x in enumerate(X):
+            count_dict = {k:v for k,v in zip(*np.unique(x, return_counts=True))}
+            for key, value in count_dict.items():
+                if key not in element_to_index:
+                    continue
+                j = element_to_index[key]
+                features[i, j] = value
+
+        return features
+
+    def transform(self, X, y=None):
+        """
+        Transform the data with the fitted linear model.
+        Supports three different types of input.
+        1) X is a list of nuclear charges and y is values to transform.
+        2) X is an array of indices of which to transform.
+        3) X is a data object
+
+        :param X: List with nuclear charges or Data object.
+        :type X: list
+        :param y: Values to transform
+        :type y: array or None
+        :return: Array of transformed values or Data object, depending on input
+        :rtype: array or Data object
+        """
+
+        if not isinstance(X, Data) and y is not None:
+            data = None
+            nuclear_charges = X
+        else:
+            data = self._preprocess_input(X)
+            nuclear_charges = data.nuclear_charges[data._indices]
+            y = data.energies[data._indices]
+
+        self._check_elements(nuclear_charges)
+
+        features = self._featurizer(nuclear_charges)
+
+        delta_y = y - self.model.predict(features)
+
+        if data:
+            # Force copy
+            data.energies = data.energies.copy()
+            data.energies[data._indices] = delta_y
+            return data
+        else:
+            return delta_y
+
@@ -32,7 +32,7 @@
 from .frepresentations import fgenerate_eigenvalue_coulomb_matrix
 from .frepresentations import fgenerate_bob
 
-from qml.data.alchemy import NUCLEAR_CHARGE
+from qml.utils import NUCLEAR_CHARGE
 
 from .slatm import get_boa
 from .slatm import get_sbop
 
@@ -0,0 +1,25 @@
+# MIT License
+#
+# Copyright (c) 2017-2018 Silvia Amabilino, Lars Andersen Bratholm
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from . import alchemy
+from .utils import *
+from .alchemy import ELEMENT_NAME, NUCLEAR_CHARGE
@@ -41,9 +41,6 @@ def is_positive_integer_or_zero(x):
 def is_string(x):
     return isinstance(x, str)
 
-def is_none(x):
-    return isinstance(x, type(None))
-
 def is_dict(x):
     return isinstance(x, dict)
 
@@ -88,10 +85,28 @@ def _is_integer_array(x):
 def is_positive_integer_array(x):
     return (_is_integer_array(x) and _is_positive_array(x))
 
-
 def is_positive_integer_or_zero_array(x):
     return (_is_integer_array(x) and _is_positive_or_zero_array(x))
 
+def get_unique(x):
+    """
+    Gets all unique elements in lists of lists
+    """
+    elements = list(set(item for l in x for item in l))
+    return elements
+
+def get_pairs(x):
+    """
+    Get all unique pairs. E.g. x = [1,2,3] will return
+    [[1, 1], [1, 2], [1, 3], [2, 2], [2, 3], [3, 3]]
+    """
+    pairs = []
+    for i,v in enumerate(x):
+        for w in x[i:]:
+            pairs.append([v,w])
+    return pairs
+
+
 # ------------- ** Checking inputs ** --------------------------
 
 def check_global_representation(x):
@@ -157,14 +172,14 @@ def check_sizes(x, y=None, dy=None, classes=None):
     :return: None
     """
 
-    if is_none(dy) and is_none(classes):
+    if dy is None and classes is None:
 
         if x.shape[0] != y.shape[0]:
             raise InputError("The descriptor and the properties should have the same first number of elements in the "
                              "first dimension. Got %s and %s" % (x.shape[0], y.shape[0]))
 
-    elif is_none(y) and is_none(dy):
-        if is_none(classes):
+    elif y is None and dy is None:
+        if classes is None:
             raise InputError("Only x is not none.")
         else:
             if x.shape[0] != classes.shape[0]:
@@ -173,7 +188,7 @@ def check_sizes(x, y=None, dy=None, classes=None):
                 if x.shape[1] != classes.shape[1]:
                     raise InputError("The number of atoms in the descriptor and in the classes is different: %s and %s." % (x.shape[1], classes.shape[1]))
 
-    elif is_none(dy) and not is_none(classes):
+    elif dy is None and classes is not None:
 
         if x.shape[0] != y.shape[0] or x.shape[0] != classes.shape[0]:
             raise InputError("All x, y and classes should have the first number of elements in the first dimension. Got "
@@ -202,7 +217,7 @@ def check_dy(dy):
     :return: numpy array of floats of shape (n_samples, n_atoms, 3)
     """
 
-    if is_none(dy):
+    if dy is None:
         approved_dy = dy
     else:
         if not is_array_like(dy):
@@ -227,7 +242,7 @@ def check_classes(classes):
     :return: numpy array of ints of shape (n_samples, n_atoms)
     """
 
-    if is_none(classes):
+    if classes is None:
         approved_classes = classes
     else:
         if not is_array_like(classes):
@@ -244,13 +259,6 @@ def check_classes(classes):
 
     return approved_classes
 
-
-
-
-
-
-
-
 #
 #def _is_numeric_array(x):
 #    try:
 
@@ -146,7 +146,9 @@ def setup_qml():
             'qml.kernels',
             'qml.math',
             'qml.representations',
-            'qml.models',
+            'qml.qmlearn',
+            'qml.utils',
+            'qml.models'
             ],
 
         # metadata
 
@@ -27,7 +27,7 @@
 
 import numpy as np
 from qml.aglaia.aglaia import ARMP
-from qml.aglaia.utils import InputError
+from qml.utils import InputError
 import glob
 import os
 
 
@@ -27,7 +27,7 @@
 
 import numpy as np
 from qml.aglaia.aglaia import MRMP
-from qml.aglaia.utils import InputError
+from qml.utils import InputError
 import glob
 import os
 import shutil
 
@@ -29,7 +29,7 @@
 
 # TODO relative imports
 from qml.aglaia.aglaia import MRMP
-from qml.aglaia.utils import InputError
+from qml.utils import InputError
 
 
 # ------------ ** All functions to test the inputs to the classes ** ---------------
Original file line number	Diff line number	Diff line change
`@@ -23,4 +23,3 @@`
`23`	`23`
`24`	`24`	`from .xyzdataprovider import XYZDataProvider`
`25`	`25`	`from .compound import Compound`
`26`		`-from .alchemy import ELEMENT_NAME, NUCLEAR_CHARGE`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`# MIT License`
`2`	`2`	`#`
`3`		`-# Copyright (c) 2016-2017 Anders Steen Christensen, Felix Faber, Lars Andersen Bratholm`
	`3`	`+# Copyright (c) 2016-2018 Anders Steen Christensen, Felix Faber, Lars Andersen Bratholm`
`4`	`4`	`#`
`5`	`5`	`# Permission is hereby granted, free of charge, to any person obtaining a copy`
`6`	`6`	`# of this software and associated documentation files (the "Software"), to deal`
`@@ -25,7 +25,7 @@`
`25`	`25`	`import numpy as np`
`26`	`26`	`import collections`
`27`	`27`
`28`		`-from .alchemy import NUCLEAR_CHARGE`
	`28`	`+from ..utils import NUCLEAR_CHARGE`
`29`	`29`
`30`	`30`	`from ..representations import generate_coulomb_matrix`
`31`	`31`	`from ..representations import generate_atomic_coulomb_matrix`
Original file line number	Diff line number	Diff line change
`@@ -44,4 +44,3 @@ def get_properties(self, idx=None):`
`44`	`44`	`def read_database(self, db_filename):`
`45`	`45`
`46`	`46`	`self.compounds = connect(db_filename)`
`47`		`-`