anujkumar93
diff --git a/‎Code/.ipynb_checkpoints/process_data-checkpoint.ipynb
+6 b/‎Code/.ipynb_checkpoints/process_data-checkpoint.ipynb
+6
diff --git a/‎Code/data_processing.py
+137 b/‎Code/data_processing.py
+137
diff --git a/‎Code/data_processing.pyc
4.99 KB b/‎Code/data_processing.pyc
4.99 KB
diff --git a/‎Code/process_data.ipynb
+121 b/‎Code/process_data.ipynb
+121
diff --git a/‎Code/test.csv
+2 b/‎Code/test.csv
+2
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
@@ -0,0 +1,137 @@
+import numpy as np
+import os
+import pickle
+import csv
+import datetime as dt
+
+def preprocess_data(char_set,input_case=["all"],log_buffer=dt.timedelta(seconds=1)):
+    """
+    char_set: set of characters for which input has to be considered
+    input_case: one of ['room'], ['library'] or 'all'
+    log_buffer: time duration of type timedelta giving the buffer allowed between two successive keyfreq readings
+    """
+    if input_case=="all":
+        input_case=["room","library"]
+    final_log_keystrokes=[]
+    final_lefthand_gyro=[]
+    final_lefthand_accel=[]
+    final_lefthand_gravity=[]
+    final_righthand_gyro=[]
+    final_righthand_accel=[]
+    final_righthand_gravity=[]
+    keyfreq_folder='../Data/keyfreq/'
+    righthand_folder='../Data/righthand/'
+    lefthand_folder='../Data/lefthand/'
+    for folder in input_case:
+        log_files=sorted(os.listdir(keyfreq_folder+folder))
+        righthand_files=sorted(os.listdir(righthand_folder+folder))
+        lefthand_files=sorted(os.listdir(lefthand_folder+folder))
+        log_files=[os.path.join(keyfreq_folder,folder,f) for f in log_files]
+        lefthand_files=[os.path.join(lefthand_folder,folder,f) for f in lefthand_files]
+        righthand_files=[os.path.join(righthand_folder,folder,f) for f in righthand_files]
+        print "total number of files in ", folder,":", len(log_files)
+        #going over keylogged files first
+        for file_num,log_file in enumerate(log_files):
+            curr_log_keystrokes=[]
+            curr_lefthand_gyro=[]
+            curr_lefthand_accel=[]
+            curr_lefthand_gravity=[]
+            curr_righthand_gyro=[]
+            curr_righthand_accel=[]
+            curr_righthand_gravity=[]
+            with open(log_file) as curr_log_f:
+                line0 = curr_log_f.readline()
+                line0_split=line0.split()
+                time0_split=line0_split[0].split(':')
+                line0_time=dt.datetime.fromtimestamp(float(time0_split[0]))
+                line0_time=line0_time+dt.timedelta(microseconds=float(time0_split[1]))
+                lefthand_file,righthand_file=None,None
+                for rh_file in righthand_files:
+                    file_time_str=rh_file[-19:-4]
+                    file_time=dt.datetime.strptime(file_time_str,'%Y%m%d-%H%M%S')
+                    if file_time>line0_time:
+                        righthand_file=rh_file
+                        break
+                for lh_file in lefthand_files:
+                    file_time_str=lh_file[-19:-4]
+                    file_time=dt.datetime.strptime(file_time_str,'%Y%m%d-%H%M%S')
+                    if file_time>line0_time:
+                        lefthand_file=lh_file
+                        break
+                curr_log_f.seek(0)
+                lh_accel0,lh_gyro0,lh_gravity0=find_starting_line(lh_file)
+                rh_accel0,rh_gyro0,rh_gravity0=find_starting_line(rh_file)
+                # print lh_accel0,lh_gyro0,lh_gravity0
+                # print rh_accel0,rh_gyro0,rh_gravity0
+                curr_log_start_time,curr_log_end_time=None,None
+                for line in curr_log_f:
+                    line_split=line.split()
+                    if line_split[-1] in char_set:
+                        time_split=line_split[0].split(':')
+                        curr_log_start_time=dt.datetime.fromtimestamp(float(time_split[0]))
+                        curr_log_start_time=curr_log_start_time+dt.timedelta(microseconds=float(time_split[1]))
+                        line_split[0]=time_split[0]+time_split[1]
+                        curr_log_keystrokes=[line_split]
+                        prev_end_time=curr_log_start_time
+                        for end_line in curr_log_f:
+                            end_line_split=end_line.split()
+                            end_time_split=end_line_split[0].split(':')
+                            curr_end_time=dt.datetime.fromtimestamp(float(end_time_split[0]))
+                            curr_end_time=curr_end_time+dt.timedelta(microseconds=float(end_time_split[1]))
+                            if (curr_end_time-prev_end_time)>log_buffer and end_line_split[-1] not in char_set:
+                                curr_log_end_time=prev_end_time
+                                break
+                            prev_end_time=curr_end_time
+                            end_line_split[0]=end_time_split[0]+end_time_split[1]
+                            curr_log_keystrokes.append(line_split)
+                        curr_lefthand_gyro.append(get_chunk_data(lh_file,curr_log_start_time-log_buffer/2,curr_log_end_time+log_buffer/2,lh_gyro0))
+                        curr_lefthand_accel.append(get_chunk_data(lh_file,curr_log_start_time-log_buffer/2,curr_log_end_time+log_buffer/2,lh_accel0))
+                        curr_lefthand_gravity.append(get_chunk_data(lh_file,curr_log_start_time-log_buffer/2,curr_log_end_time+log_buffer/2,lh_gravity0))
+                        curr_righthand_gyro.append(get_chunk_data(rh_file,curr_log_start_time-log_buffer/2,curr_log_end_time+log_buffer/2,rh_gyro0))
+                        curr_righthand_accel.append(get_chunk_data(rh_file,curr_log_start_time-log_buffer/2,curr_log_end_time+log_buffer/2,rh_accel0))
+                        curr_righthand_gravity.append(get_chunk_data(rh_file,curr_log_start_time-log_buffer/2,curr_log_end_time+log_buffer/2,rh_gravity0))
+            final_log_keystrokes.append(curr_log_keystrokes)
+            final_lefthand_gyro.append(curr_lefthand_gyro)
+            final_lefthand_accel.append(curr_lefthand_accel)
+            final_lefthand_gravity.append(curr_lefthand_gravity)
+            final_righthand_gyro.append(curr_righthand_gyro)
+            final_righthand_accel.append(curr_righthand_accel)
+            final_righthand_gravity.append(curr_righthand_gravity)
+            print "Successfully completed",str(file_num+1),"files"
+    return final_log_keystrokes,final_lefthand_accel,final_lefthand_gyro,final_lefthand_gravity,\
+                final_righthand_accel,final_righthand_gyro,final_righthand_gravity
+
+def find_starting_line(filename):
+    accel_start,gyro_start,gravity_start=None,None,None
+    accel_set,gyro_set,gravity_set=False,False,False
+    i=0
+    with open(filename,'rb') as lh_f:
+        curr_f=csv.reader(lh_f)
+        for row in curr_f:
+            if not accel_set and "accel" in row[0].lower():
+                accel_start=i
+                accel_set=True
+            if not gyro_set and "gyro" in row[0].lower():
+                gyro_start=i
+                gyro_set=True
+            if not gravity_set and "grav" in row[0].lower():
+                gravity_start=i
+                gravity_set=True
+            i+=1
+    return accel_start,gyro_start,gravity_start
+
+def get_chunk_data(filename, start_time, end_time, offset):
+    result_set=[]
+    with open(filename,'rb') as f:
+        reader=csv.reader(f)
+        lines=list(reader)
+        for i in range(offset,len(lines)):
+            curr_time_str=lines[i][1]+" "+lines[i][2]
+            curr_time=dt.datetime.strptime(curr_time_str,' %b %d %Y %H:%M:%S.%f')
+            if curr_time>start_time and curr_time<end_time:
+                curr_set=[dt.datetime.strftime(curr_time,'%s')+str(curr_time.microsecond)]
+                curr_set.extend(lines[i][3:])
+                result_set.append(curr_set)
+            if curr_time>end_time:
+                break
+    return result_set
@@ -0,0 +1,121 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import csv\n",
+    "import os\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "os.path.join('../Data/keyfreq/library/',os.listdir('../Data/keyfreq/library/'))\n",
+    "with open('../Data/righthand/library/export_20171114-163545.csv','rb') as f:\n",
+    "    csv_r=csv.reader(f)\n",
+    "    lines=list(csv_r)\n",
+    "    print lines[0][3:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total number of files in  library : 8\n",
+      "Successfully completed 1 files\n",
+      "Successfully completed 2 files\n",
+      "Successfully completed 3 files\n",
+      "Successfully completed 4 files\n",
+      "Successfully completed 5 files\n",
+      "Successfully completed 6 files\n",
+      "Successfully completed 7 files\n",
+      "Successfully completed 8 files\n"
+     ]
+    }
+   ],
+   "source": [
+    "import data_processing as dp\n",
+    "import datetime as dt\n",
+    "\n",
+    "char_set={'q','w','e','r','t','y','u','i','o','p','a','s','d','f','g','h','j','k','l','z','x','c','v','b','n','m'}\n",
+    "input_case=['library']\n",
+    "log_buffer=dt.timedelta(seconds=1)\n",
+    "\n",
+    "l_d,lh_a,lh_gy,lh_gr,rh_a,rh_gy,rh_gr=dp.preprocess_data(char_set,input_case,log_buffer)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "log_data shape: (3,)\n",
+      "lh_accel shape: (4,)\n",
+      "lh_gyro shape: (6832, 4)\n",
+      "lh_gravity shape: (6833, 4)\n",
+      "rh_accel shape: (0,)\n",
+      "rh_gyro shape: (0,)\n",
+      "rh_gravity shape: (0,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "log_data=np.asarray(l_d[0])\n",
+    "lh_accel=np.asarray(lh_a[0][0])\n",
+    "lh_gyro=np.asarray(lh_gy[0])\n",
+    "lh_gravity=np.asarray(lh_gr[0])\n",
+    "rh_accel=np.asarray(rh_a[0])\n",
+    "rh_gyro=np.asarray(rh_gy[0])\n",
+    "rh_gravity=np.asarray(rh_gr[0])\n",
+    "# print log_data\n",
+    "print \"log_data shape:\",log_data.shape\n",
+    "print \"lh_accel shape:\",lh_accel.shape\n",
+    "print \"lh_gyro shape:\",lh_gyro.shape\n",
+    "print \"lh_gravity shape:\",lh_gravity.shape\n",
+    "print \"rh_accel shape:\",rh_accel.shape\n",
+    "print \"rh_gyro shape:\",rh_gyro.shape\n",
+    "print \"rh_gravity shape:\",rh_gravity.shape"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [Root]",
+   "language": "python",
+   "name": "Python [Root]"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
@@ -0,0 +1,2 @@
+asdvnds,dsfddskj
+sddgfdg,asddsfsdf
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+asdvnds,dsfddskj`
	`2`	`+sddgfdg,asddsfsdf`