-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchartPlot.py
87 lines (70 loc) · 2.09 KB
/
chartPlot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from elasticsearch import Elasticsearch
import time
import pprint
import sys
import subprocess
import os
JSON_FILE="/Users/sam/cs410/TEAM_BHP_DATA_TOPARSE/srajend2.json"
INDEX_NAME="srajend2_index"
es=Elasticsearch()
def main():
delete_index(INDEX_NAME)
create_index(INDEX_NAME)
pprint.pprint(es.indices.stats())
#split_file_into_chunks(JSON_FILE)
load_bulk_data_in_chunks()
def delete_index(_index):
global es
print es.indices.delete(index=_index, ignore=[400, 404])
def create_index(_index):
global es
print es.indices.create(index=_index, ignore=400)
def listFiles(dirpath):
filenames = next(os.walk(dirpath))[2]
yield filenames
def curljson(fil):
# res = es.bulk(index="srajend2_index", body = fil, refresh = True, request_timeout=100)
start_time = time.time()
PWD = os.getcwd()
cmd = ['curl','-s','-XPOST','localhost:9200/_bulk','--data-binary','@'+str(fil)]
print cmd
# process = subprocess.Popen(cmd,stdin=subprocess.PIPE, stdout=subprocess.PIPE,stderr=subprocess.PIPE)
try:
res = subprocess.Popen(cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE,stdin=subprocess.PIPE)
res.communicate()
except subprocess.CalledProcessError:
print "ERROR"
elp_time=time.time() - start_time
return elp_time
def load_bulk_data_in_chunks():
i=2
iterList=[]
while i < 257:
iterList.append(i)
i=i*2
for chunk in iterList:
print "********* CHUNK ",chunk ," *******************"
PWD = os.getcwd()
timeelp=0
for jsons in listFiles("/tmp/JUNKS_"+str(chunk)):
delete_index(INDEX_NAME)
create_index(INDEX_NAME)
p = "/tmp/JUNKS_"+str(chunk)
os.chdir(p)
for json in jsons:
timeelp+=curljson(json)
print "TABLE,",chunk,",",timeelp
os.chdir(PWD)
def split_file_into_chunks(json_file):
cmd = ['/bin/sh','splitJunks.sh',json_file]
print cmd
#process = subprocess.Popen(cmd,stdin=subprocess.PIPE, stdout=subprocess.PIPE,stderr=subprocess.PIPE)
try:
subprocess.check_call(cmd)
except subprocess.CalledProcessError:
print "ERROR!"
print "*** DONE saved in /tmp/JUNKS* *** "
if __name__ == "__main__":
main()
#collect the chunks
# split_file_into_chunks(JSON_FILE)