Tolerance fix (#104)

Silvia · larsbratholm · commit 98d8299382bb · 2019-02-05T16:27:40.000Z
* Corrected small bug in predict function * Started updating so that model can be trained after its been reloaded * Minor modifications * Updated model so one can predict from xyz and disabled shuffling in training because it leads to a problem with predictions * Fix for the problem of shuffling * Added some tests to make sure the predictions work * Fixed a tensorboard problem * The saving of the model doesn't cause an error if the directory already exists * Fixed a bug that made a test fail * Modified the name of a parameter * Made modifications to make te symmetry functions more numerically stable * Added a hack that makes ARMP work with fortran ACSF when there are padded representations. Currently works *ONLY* when there is one molecule for the whole data set. * corrected bug in score function for padded molecules * Changes that make the model work quickly even when there is padding. * Fixed discrepancies between fortran and TF acsf * Corrected bug in setting of ACSF parameters * Attempt at fixing issue #10 * another attempt at fixing #10 * Removed a pointless line * set-up * Added the graceful killer * Modifications which prevent installation from breaking on BC4 * Modification to add neural networks to qmlearn * Fix for issue #8 * Random comment * Started including the atomic model * Made the atomic neural network work * Fixed a bug with the indices * Now training and predictions don't use the default graph, to avoid problems * uncommented examples * Removed unique_elements in data class This can be stored in the NN class, but I might reverse the change later * Made tensorflow an optional dependency The reason for this approach is that pip would just auto install tensorflow and you might want the gpu version or your own compiled one. * Made is_numeric non-private and removed legacy code * Added 1d array util function * Removed QML check and moved functions from utils to tf_utils * Support for linear models (no hidden layers) * fixed import bug in tf_utils * Added text to explain that you are scoring on training set * Restructure. But elements are still not working Sorted elements * Moved documentation from init to class * Constant features will now be removed at fit/predict time * Moved get_batch_size back into utils, since it doesn't depend on tf * Made the NeuralNetwork class compliant with sklearn Cannot be any transforms of the input data * Fixed tests that didn't pass * Fixed mistake in checks of set_classes() in ARMP * started fixing ARMP bugs for QM7 * Fixed bug in padding and added examples that give low errors * Attempted fix to make representations single precision * Hot fix for AtomScaler * Minor bug fixes * More bug fixes to make sure tests run * Fixed some tests that had failures * Reverted the fchl tests to original * Fixed path in acsf test * Readded changes to tests * Modifications after code review * Version with the ACSF basis functions starting at 0.8 A * Updated ACSF representations so that the minimum distance at which to start the binning can be set by the user * Modified the name of the new parameter (minimum distance of the binning in ACSF) * Added a function to the atomscaler that enables to revert back * Relaxed tolerance in tests
diff --git a/qml/qmlearn/preprocessing.py b/qml/qmlearn/preprocessing.py
@@ -206,6 +206,21 @@ def _transform(self, data, features, y):
         else:
             return delta_y
 
+    def _revert_transform(self, data, features, y):
+        """
+        Reverts the work of the transform method.
+        """
+
+        full_y = y + self.model.predict(features)
+
+        if data:
+            # Force copy
+            data.energies = data.energies.copy()
+            data.energies[data._indices] = full_y
+            return data
+        else:
+            return full_y
+
     def _check_elements(self, nuclear_charges):
         """
         Check that the elements in the given nuclear_charges was
@@ -261,3 +276,27 @@ def transform(self, X, y=None):
         features = self._featurizer(nuclear_charges)
 
         return self._transform(data, features, y)
+
+    def revert_transform(self, X, y=None):
+        """
+        Transforms data back to what it originally would have been if it hadn't been transformed with the fitted linear
+        model. Supports three different types of input.
+        1) X is a list of nuclear charges and y is values to transform.
+        2) X is an array of indices of which to transform.
+        3) X is a data object
+
+        :param X: List with nuclear charges or Data object.
+        :type X: list
+        :param y: Values to revert to before transform
+        :type y: array or None
+        :return: Array of untransformed values or Data object, depending on input
+        :rtype: array or Data object
+        """
+
+        data, nuclear_charges, y = self._parse_input(X, y)
+
+        self._check_elements(nuclear_charges)
+
+        features = self._featurizer(nuclear_charges)
+
+        return self._revert_transform(data, features, y)
diff --git a/test/test_armp.py b/test/test_armp.py
@@ -228,7 +228,7 @@ def test_predict_fromxyz():
     pred1 = estimator.predict(idx)
     pred2 = estimator.predict_from_xyz(xyz, zs)
 
-    assert np.all(np.isclose(pred1, pred2, rtol=1.e-6))
+    assert np.all(np.isclose(pred1, pred2, rtol=1.e-5))
 
     estimator.save_nn(save_dir="temp")
 
@@ -243,11 +243,11 @@ def test_predict_fromxyz():
     pred3 = new_estimator.predict(idx)
     pred4 = new_estimator.predict_from_xyz(xyz, zs)
 
-    assert np.all(np.isclose(pred3, pred4, rtol=1.e-6))
-    assert np.all(np.isclose(pred1, pred3, rtol=1.e-6))
-
     shutil.rmtree("temp")
 
+    assert np.all(np.isclose(pred3, pred4, rtol=1.e-5))
+    assert np.all(np.isclose(pred1, pred3, rtol=1.e-5))
+
 def test_retraining():
     xyz = np.array([[[0, 1, 0], [0, 1, 1], [1, 0, 1]],
                     [[1, 2, 2], [3, 1, 2], [1, 3, 4]],
@@ -291,8 +291,8 @@ def test_retraining():
 
     pred4 = new_estimator.predict(idx)
 
-    assert np.all(np.isclose(pred1, pred3, rtol=1.e-6))
-    assert np.all(np.isclose(pred2, pred4, rtol=1.e-6))
+    assert np.all(np.isclose(pred1, pred3, rtol=1.e-5))
+    assert np.all(np.isclose(pred2, pred4, rtol=1.e-5))
 
     shutil.rmtree("temp")