Skip to content

Commit 741c49b

Browse files
committed
Initial commit with keystroke data and processing files
1 parent 7581925 commit 741c49b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+1855279
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"cells": [],
3+
"metadata": {},
4+
"nbformat": 4,
5+
"nbformat_minor": 0
6+
}

Code/data_processing.py

+137
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
import numpy as np
2+
import os
3+
import pickle
4+
import csv
5+
import datetime as dt
6+
7+
def preprocess_data(char_set,input_case=["all"],log_buffer=dt.timedelta(seconds=1)):
8+
"""
9+
char_set: set of characters for which input has to be considered
10+
input_case: one of ['room'], ['library'] or 'all'
11+
log_buffer: time duration of type timedelta giving the buffer allowed between two successive keyfreq readings
12+
"""
13+
if input_case=="all":
14+
input_case=["room","library"]
15+
final_log_keystrokes=[]
16+
final_lefthand_gyro=[]
17+
final_lefthand_accel=[]
18+
final_lefthand_gravity=[]
19+
final_righthand_gyro=[]
20+
final_righthand_accel=[]
21+
final_righthand_gravity=[]
22+
keyfreq_folder='../Data/keyfreq/'
23+
righthand_folder='../Data/righthand/'
24+
lefthand_folder='../Data/lefthand/'
25+
for folder in input_case:
26+
log_files=sorted(os.listdir(keyfreq_folder+folder))
27+
righthand_files=sorted(os.listdir(righthand_folder+folder))
28+
lefthand_files=sorted(os.listdir(lefthand_folder+folder))
29+
log_files=[os.path.join(keyfreq_folder,folder,f) for f in log_files]
30+
lefthand_files=[os.path.join(lefthand_folder,folder,f) for f in lefthand_files]
31+
righthand_files=[os.path.join(righthand_folder,folder,f) for f in righthand_files]
32+
print "total number of files in ", folder,":", len(log_files)
33+
#going over keylogged files first
34+
for file_num,log_file in enumerate(log_files):
35+
curr_log_keystrokes=[]
36+
curr_lefthand_gyro=[]
37+
curr_lefthand_accel=[]
38+
curr_lefthand_gravity=[]
39+
curr_righthand_gyro=[]
40+
curr_righthand_accel=[]
41+
curr_righthand_gravity=[]
42+
with open(log_file) as curr_log_f:
43+
line0 = curr_log_f.readline()
44+
line0_split=line0.split()
45+
time0_split=line0_split[0].split(':')
46+
line0_time=dt.datetime.fromtimestamp(float(time0_split[0]))
47+
line0_time=line0_time+dt.timedelta(microseconds=float(time0_split[1]))
48+
lefthand_file,righthand_file=None,None
49+
for rh_file in righthand_files:
50+
file_time_str=rh_file[-19:-4]
51+
file_time=dt.datetime.strptime(file_time_str,'%Y%m%d-%H%M%S')
52+
if file_time>line0_time:
53+
righthand_file=rh_file
54+
break
55+
for lh_file in lefthand_files:
56+
file_time_str=lh_file[-19:-4]
57+
file_time=dt.datetime.strptime(file_time_str,'%Y%m%d-%H%M%S')
58+
if file_time>line0_time:
59+
lefthand_file=lh_file
60+
break
61+
curr_log_f.seek(0)
62+
lh_accel0,lh_gyro0,lh_gravity0=find_starting_line(lh_file)
63+
rh_accel0,rh_gyro0,rh_gravity0=find_starting_line(rh_file)
64+
# print lh_accel0,lh_gyro0,lh_gravity0
65+
# print rh_accel0,rh_gyro0,rh_gravity0
66+
curr_log_start_time,curr_log_end_time=None,None
67+
for line in curr_log_f:
68+
line_split=line.split()
69+
if line_split[-1] in char_set:
70+
time_split=line_split[0].split(':')
71+
curr_log_start_time=dt.datetime.fromtimestamp(float(time_split[0]))
72+
curr_log_start_time=curr_log_start_time+dt.timedelta(microseconds=float(time_split[1]))
73+
line_split[0]=time_split[0]+time_split[1]
74+
curr_log_keystrokes=[line_split]
75+
prev_end_time=curr_log_start_time
76+
for end_line in curr_log_f:
77+
end_line_split=end_line.split()
78+
end_time_split=end_line_split[0].split(':')
79+
curr_end_time=dt.datetime.fromtimestamp(float(end_time_split[0]))
80+
curr_end_time=curr_end_time+dt.timedelta(microseconds=float(end_time_split[1]))
81+
if (curr_end_time-prev_end_time)>log_buffer and end_line_split[-1] not in char_set:
82+
curr_log_end_time=prev_end_time
83+
break
84+
prev_end_time=curr_end_time
85+
end_line_split[0]=end_time_split[0]+end_time_split[1]
86+
curr_log_keystrokes.append(line_split)
87+
curr_lefthand_gyro.append(get_chunk_data(lh_file,curr_log_start_time-log_buffer/2,curr_log_end_time+log_buffer/2,lh_gyro0))
88+
curr_lefthand_accel.append(get_chunk_data(lh_file,curr_log_start_time-log_buffer/2,curr_log_end_time+log_buffer/2,lh_accel0))
89+
curr_lefthand_gravity.append(get_chunk_data(lh_file,curr_log_start_time-log_buffer/2,curr_log_end_time+log_buffer/2,lh_gravity0))
90+
curr_righthand_gyro.append(get_chunk_data(rh_file,curr_log_start_time-log_buffer/2,curr_log_end_time+log_buffer/2,rh_gyro0))
91+
curr_righthand_accel.append(get_chunk_data(rh_file,curr_log_start_time-log_buffer/2,curr_log_end_time+log_buffer/2,rh_accel0))
92+
curr_righthand_gravity.append(get_chunk_data(rh_file,curr_log_start_time-log_buffer/2,curr_log_end_time+log_buffer/2,rh_gravity0))
93+
final_log_keystrokes.append(curr_log_keystrokes)
94+
final_lefthand_gyro.append(curr_lefthand_gyro)
95+
final_lefthand_accel.append(curr_lefthand_accel)
96+
final_lefthand_gravity.append(curr_lefthand_gravity)
97+
final_righthand_gyro.append(curr_righthand_gyro)
98+
final_righthand_accel.append(curr_righthand_accel)
99+
final_righthand_gravity.append(curr_righthand_gravity)
100+
print "Successfully completed",str(file_num+1),"files"
101+
return final_log_keystrokes,final_lefthand_accel,final_lefthand_gyro,final_lefthand_gravity,\
102+
final_righthand_accel,final_righthand_gyro,final_righthand_gravity
103+
104+
def find_starting_line(filename):
105+
accel_start,gyro_start,gravity_start=None,None,None
106+
accel_set,gyro_set,gravity_set=False,False,False
107+
i=0
108+
with open(filename,'rb') as lh_f:
109+
curr_f=csv.reader(lh_f)
110+
for row in curr_f:
111+
if not accel_set and "accel" in row[0].lower():
112+
accel_start=i
113+
accel_set=True
114+
if not gyro_set and "gyro" in row[0].lower():
115+
gyro_start=i
116+
gyro_set=True
117+
if not gravity_set and "grav" in row[0].lower():
118+
gravity_start=i
119+
gravity_set=True
120+
i+=1
121+
return accel_start,gyro_start,gravity_start
122+
123+
def get_chunk_data(filename, start_time, end_time, offset):
124+
result_set=[]
125+
with open(filename,'rb') as f:
126+
reader=csv.reader(f)
127+
lines=list(reader)
128+
for i in range(offset,len(lines)):
129+
curr_time_str=lines[i][1]+" "+lines[i][2]
130+
curr_time=dt.datetime.strptime(curr_time_str,' %b %d %Y %H:%M:%S.%f')
131+
if curr_time>start_time and curr_time<end_time:
132+
curr_set=[dt.datetime.strftime(curr_time,'%s')+str(curr_time.microsecond)]
133+
curr_set.extend(lines[i][3:])
134+
result_set.append(curr_set)
135+
if curr_time>end_time:
136+
break
137+
return result_set

Code/data_processing.pyc

4.99 KB
Binary file not shown.

Code/process_data.ipynb

+121
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [],
10+
"source": [
11+
"import csv\n",
12+
"import os\n",
13+
"\n",
14+
"%load_ext autoreload\n",
15+
"%autoreload 2\n",
16+
"\n",
17+
"os.path.join('../Data/keyfreq/library/',os.listdir('../Data/keyfreq/library/'))\n",
18+
"with open('../Data/righthand/library/export_20171114-163545.csv','rb') as f:\n",
19+
" csv_r=csv.reader(f)\n",
20+
" lines=list(csv_r)\n",
21+
" print lines[0][3:]"
22+
]
23+
},
24+
{
25+
"cell_type": "code",
26+
"execution_count": 8,
27+
"metadata": {
28+
"collapsed": false,
29+
"scrolled": true
30+
},
31+
"outputs": [
32+
{
33+
"name": "stdout",
34+
"output_type": "stream",
35+
"text": [
36+
"total number of files in library : 8\n",
37+
"Successfully completed 1 files\n",
38+
"Successfully completed 2 files\n",
39+
"Successfully completed 3 files\n",
40+
"Successfully completed 4 files\n",
41+
"Successfully completed 5 files\n",
42+
"Successfully completed 6 files\n",
43+
"Successfully completed 7 files\n",
44+
"Successfully completed 8 files\n"
45+
]
46+
}
47+
],
48+
"source": [
49+
"import data_processing as dp\n",
50+
"import datetime as dt\n",
51+
"\n",
52+
"char_set={'q','w','e','r','t','y','u','i','o','p','a','s','d','f','g','h','j','k','l','z','x','c','v','b','n','m'}\n",
53+
"input_case=['library']\n",
54+
"log_buffer=dt.timedelta(seconds=1)\n",
55+
"\n",
56+
"l_d,lh_a,lh_gy,lh_gr,rh_a,rh_gy,rh_gr=dp.preprocess_data(char_set,input_case,log_buffer)"
57+
]
58+
},
59+
{
60+
"cell_type": "code",
61+
"execution_count": 7,
62+
"metadata": {
63+
"collapsed": false
64+
},
65+
"outputs": [
66+
{
67+
"name": "stdout",
68+
"output_type": "stream",
69+
"text": [
70+
"log_data shape: (3,)\n",
71+
"lh_accel shape: (4,)\n",
72+
"lh_gyro shape: (6832, 4)\n",
73+
"lh_gravity shape: (6833, 4)\n",
74+
"rh_accel shape: (0,)\n",
75+
"rh_gyro shape: (0,)\n",
76+
"rh_gravity shape: (0,)\n"
77+
]
78+
}
79+
],
80+
"source": [
81+
"import numpy as np\n",
82+
"log_data=np.asarray(l_d[0])\n",
83+
"lh_accel=np.asarray(lh_a[0][0])\n",
84+
"lh_gyro=np.asarray(lh_gy[0])\n",
85+
"lh_gravity=np.asarray(lh_gr[0])\n",
86+
"rh_accel=np.asarray(rh_a[0])\n",
87+
"rh_gyro=np.asarray(rh_gy[0])\n",
88+
"rh_gravity=np.asarray(rh_gr[0])\n",
89+
"# print log_data\n",
90+
"print \"log_data shape:\",log_data.shape\n",
91+
"print \"lh_accel shape:\",lh_accel.shape\n",
92+
"print \"lh_gyro shape:\",lh_gyro.shape\n",
93+
"print \"lh_gravity shape:\",lh_gravity.shape\n",
94+
"print \"rh_accel shape:\",rh_accel.shape\n",
95+
"print \"rh_gyro shape:\",rh_gyro.shape\n",
96+
"print \"rh_gravity shape:\",rh_gravity.shape"
97+
]
98+
}
99+
],
100+
"metadata": {
101+
"kernelspec": {
102+
"display_name": "Python [Root]",
103+
"language": "python",
104+
"name": "Python [Root]"
105+
},
106+
"language_info": {
107+
"codemirror_mode": {
108+
"name": "ipython",
109+
"version": 2
110+
},
111+
"file_extension": ".py",
112+
"mimetype": "text/x-python",
113+
"name": "python",
114+
"nbconvert_exporter": "python",
115+
"pygments_lexer": "ipython2",
116+
"version": "2.7.12"
117+
}
118+
},
119+
"nbformat": 4,
120+
"nbformat_minor": 0
121+
}

Code/test.csv

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
asdvnds,dsfddskj
2+
sddgfdg,asddsfsdf

0 commit comments

Comments
 (0)