tvayer
diff --git a/‎.gitignore
+4 b/‎.gitignore
+4
diff --git a/‎Distance based/.ipynb_checkpoints/Mahalanobis-checkpoint.ipynb
+329 b/‎Distance based/.ipynb_checkpoints/Mahalanobis-checkpoint.ipynb
+329
diff --git a/‎Kmeans/.ipynb_checkpoints/Ouliter_using Kmeans-checkpoint.ipynb ‎Distance based/.ipynb_checkpoints/Ouliter_using Kmeans-checkpoint.ipynb b/‎Kmeans/.ipynb_checkpoints/Ouliter_using Kmeans-checkpoint.ipynb ‎Distance based/.ipynb_checkpoints/Ouliter_using Kmeans-checkpoint.ipynb
diff --git a/‎Distance based/.ipynb_checkpoints/Ouliter_using Kmeans_2-checkpoint.ipynb
+1,137 b/‎Distance based/.ipynb_checkpoints/Ouliter_using Kmeans_2-checkpoint.ipynb
+1,137
diff --git a/‎Distance based/Mahalanobis.ipynb
+304 b/‎Distance based/Mahalanobis.ipynb
+304
diff --git a/‎Kmeans/Ouliter_using Kmeans.ipynb ‎Distance based/Ouliter_using Kmeans.ipynb b/‎Kmeans/Ouliter_using Kmeans.ipynb ‎Distance based/Ouliter_using Kmeans.ipynb
diff --git a/‎Distance based/Ouliter_using Kmeans_2.ipynb
+1,145 b/‎Distance based/Ouliter_using Kmeans_2.ipynb
+1,145
diff --git a/‎RNN/.ipynb_checkpoints/Untitled-checkpoint.ipynb
-6 b/‎RNN/.ipynb_checkpoints/Untitled-checkpoint.ipynb
-6
diff --git a/‎RNN/Autoencoder.ipynb
+230-49 b/‎RNN/Autoencoder.ipynb
+230-49
diff --git a/‎RNN/Untitled.ipynb
-34 b/‎RNN/Untitled.ipynb
-34
diff --git a/‎SVC/.ipynb_checkpoints/SVC_clustering-checkpoint.ipynb
+290 b/‎SVC/.ipynb_checkpoints/SVC_clustering-checkpoint.ipynb
+290
@@ -6,3 +6,7 @@ CroppedYaleNoisy/
 autoencoder/autoencoder_h2o\.R
 
 *.h5
+
+*.gz
+
+*.pkl
@@ -0,0 +1,290 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# SVC clustering"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Based on https://github.com/josiahw/SimpleSVClustering/blob/master/SimpleSVC.py"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy\n",
+    "import numpy.linalg\n",
+    "import sklearn.datasets\n",
+    "from matplotlib import pyplot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def polyKernel(a,b,pwr):\n",
+    "    return numpy.dot(a,b)**pwr #numpy.dot(a,a) - numpy.dot(b,b) # -1 #"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def rbfKernel(a,b,gamma):\n",
+    "    return numpy.exp(-gamma * numpy.linalg.norm(a - b))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class SimpleSVClustering:\n",
+    "    w = None\n",
+    "    a = None\n",
+    "    b = None\n",
+    "    C = None\n",
+    "    sv = None\n",
+    "    kernel = None\n",
+    "    kargs = ()\n",
+    "    tolerance = None\n",
+    "    verbose = False\n",
+    "\n",
+    "    def __init__(self,\n",
+    "                 C,\n",
+    "                 tolerance = 0.001,\n",
+    "                 kernel = numpy.dot,\n",
+    "                 kargs = ()\n",
+    "                 ):\n",
+    "        \"\"\"\n",
+    "        The parameters are:\n",
+    "         - C: SVC cost\n",
+    "         - tolerance: gradient descent solution accuracy\n",
+    "         - kernel: the kernel function do use as k(a, b, *kargs)\n",
+    "         - kargs: extra parameters for the kernel\n",
+    "        \"\"\"\n",
+    "        self.C = C\n",
+    "        self.kernel = kernel\n",
+    "        self.tolerance = tolerance\n",
+    "        self.kargs = kargs\n",
+    "\n",
+    "    def _checkClass(self, a, b, n_checks = 5):\n",
+    "        \"\"\"\n",
+    "        This does a straight line interpolation between a and b, using n_checks number of segments.\n",
+    "        It returns True if a and b are connected by a high probability region, false otherwise.\n",
+    "        NOTE: authors originally suggested 20 segments but that is SLOOOOOW, so we use 5. In practice it is pretty good.\n",
+    "        \"\"\"\n",
+    "        for i in numpy.arange(1.0/n_checks,1.0,1.0/n_checks):\n",
+    "            if self._predict(i*a + (1-i)*b) > self.b:\n",
+    "                return False\n",
+    "        return True\n",
+    "        #test = [bool(self._predict(i*a + (1-i)*b) <= self.b) for i in numpy.arange(1.0/n_checks,1.0,1.0/n_checks)]\n",
+    "        #return not False in test\n",
+    "\n",
+    "    def _getAllClasses(self, X):\n",
+    "        \"\"\"\n",
+    "        Assign class labels to each vector based on connected graph components.\n",
+    "        TODO: The outputs of this should really be saved in order to embed new points into the clusters.\n",
+    "        \"\"\"\n",
+    "\n",
+    "        #1: build the connected clusters\n",
+    "        unvisited = list(range(len(X)))\n",
+    "        clusters = []\n",
+    "        while len(unvisited):\n",
+    "            #create a new cluster with the first unvisited node\n",
+    "            c = [unvisited[0]]\n",
+    "            unvisited.pop(0)\n",
+    "            i = 0\n",
+    "            while i < len(c) and len(unvisited):\n",
+    "                #for all nodes in the cluster, add all connected unvisited nodes and remove them fromt he unvisited list\n",
+    "                unvisitedNew = []\n",
+    "                for j in unvisited:\n",
+    "                    (c if self._checkClass(X[c[i],:],X[j,:]) else unvisitedNew).append(j)\n",
+    "                unvisited = unvisitedNew\n",
+    "                i += 1\n",
+    "            clusters.append(c)\n",
+    "        \n",
+    "        #3: group components by classification\n",
+    "        classifications = numpy.zeros(len(X))-1\n",
+    "        for i in range(len(clusters)):\n",
+    "            for c in clusters[i]:\n",
+    "                classifications[c] = i\n",
+    "        return classifications\n",
+    "\n",
+    "\n",
+    "    def fit(self, X):\n",
+    "        \"\"\"\n",
+    "        Fit to data X with labels y.\n",
+    "        \"\"\"\n",
+    "\n",
+    "        \"\"\"\n",
+    "        Construct the Q matrix for solving\n",
+    "        \"\"\"\n",
+    "        Q = numpy.zeros((len(data),len(data)))\n",
+    "        for i in range(len(data)):\n",
+    "            for j in range(i,len(data)):\n",
+    "                Qval = 1.\n",
+    "                Qval *= self.kernel(*(\n",
+    "                                (data[i,:], data[j,:])\n",
+    "                                + self.kargs\n",
+    "                                ))\n",
+    "                Q[i,j] = Q[j,i] = Qval\n",
+    "\n",
+    "\n",
+    "        \"\"\"\n",
+    "        Solve for a and w simultaneously by coordinate descent.\n",
+    "        This means no quadratic solver is needed!\n",
+    "        The support vectors correspond to non-zero values in a.\n",
+    "        \"\"\"\n",
+    "        self.w = numpy.zeros(X.shape[1])\n",
+    "        self.a = numpy.zeros(X.shape[0])\n",
+    "        delta = 10000000000.0\n",
+    "        while delta > self.tolerance:\n",
+    "            delta = 0.\n",
+    "            for i in range(len(data)):\n",
+    "                g = numpy.dot(Q[i,:], self.a) - Q[i,i]\n",
+    "                adelta = self.a[i] - min(max(self.a[i] - g/Q[i,i], 0.0), self.C)\n",
+    "                self.w += adelta * X[i,:]\n",
+    "                delta += abs(adelta)\n",
+    "                self.a[i] -= adelta\n",
+    "            if self.verbose:\n",
+    "                print(\"Descent step magnitude:\", delta)\n",
+    "\n",
+    "        #get the data for support vectors\n",
+    "        Qshrunk = Q[self.a >= self.C/100.,:][:,self.a >= self.C/100.]\n",
+    "        self.sv = X[self.a >= self.C/100., :]\n",
+    "        self.a = (self.a)[self.a >= self.C/100.]\n",
+    "\n",
+    "        #Do an all-pairs contour check\n",
+    "\n",
+    "        #calculate the contribution of all SVs\n",
+    "        for i in range(len(self.a)):\n",
+    "            for j in range(len(self.a)):\n",
+    "                Qshrunk[i,j] *= self.a[i]*self.a[j]\n",
+    "\n",
+    "        #this is needed for radius calculation apparently\n",
+    "        self.bOffset = numpy.sum(numpy.sum(Qshrunk))\n",
+    "        if self.verbose:\n",
+    "            print(\"Number of support vectors:\", len(self.a))\n",
+    "\n",
+    "        \"\"\"\n",
+    "        Select support vectors and solve for b to get the final classifier\n",
+    "        \"\"\"\n",
+    "        self.b = numpy.mean(self._predict(self.sv))\n",
+    "\n",
+    "\n",
+    "        if self.verbose:\n",
+    "            print(\"Bias value:\", self.b)\n",
+    "\n",
+    "    def _predict(self, X):\n",
+    "        \"\"\"\n",
+    "        For SVClustering, we need to calculate radius rather than bias.\n",
+    "        \"\"\"\n",
+    "        if (len(X.shape) < 2):\n",
+    "            X = X.reshape((1,-1))\n",
+    "        clss = numpy.zeros(len(X))\n",
+    "        for i in range(len(X)):\n",
+    "            clss[i] += self.kernel(* ((X[i,:],X[i,:]) + self.kargs))\n",
+    "            for j in range(len(self.sv)):\n",
+    "                clss[i] -= 2 * self.a[j] * self.kernel(* ((self.sv[j,:],X[i,:]) + self.kargs))\n",
+    "        return (clss+self.bOffset)**0.5\n",
+    "\n",
+    "    def predict(self, X):\n",
+    "        \"\"\"\n",
+    "        Predict classes for data X.\n",
+    "        NOTE: this should really be done with either the fitting data or a superset of the fitting data.\n",
+    "        \"\"\"\n",
+    "\n",
+    "        return self._getAllClasses(X)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data,labels = sklearn.datasets.make_moons(400,noise=0.01,random_state=0)\n",
+    "data -= numpy.mean(data,axis=0)\n",
+    "\n",
+    "#parameters can be sensitive, these ones work for two moons\n",
+    "C = 0.1\n",
+    "clss = SimpleSVClustering(C,1e-10,rbfKernel,(3.5,))\n",
+    "clss.fit(data)\n",
+    "\n",
+    "#check assigned classes for the two moons as a classification error\n",
+    "t = clss.predict(data)\n",
+    "print(\"Error\", numpy.sum((labels-t)**2) / float(len(data)))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "#generate a heatmap and display classified clusters.\n",
+    "a = numpy.zeros((100,100))\n",
+    "for i in range(100):\n",
+    "    for j in range(100):\n",
+    "        a[j,i] = clss._predict(numpy.array([i*4/100.-2,j*4/100.-2]))\n",
+    "pyplot.imshow(a, cmap='hot', interpolation='nearest')\n",
+    "data *= 25.\n",
+    "data += 50.\n",
+    "pyplot.scatter(data[t==0,0],data[t==0,1],c='r')\n",
+    "pyplot.scatter(data[t==1,0],data[t==1,1],c='b')\n",
+    "\n",
+    "pyplot.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [default]",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
-Original file line number
+Diff line change
 autoencoder/autoencoder_h2o\.R
 *.h5
++
 +*.gz
++
 +*.pkl