-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathawslib.py
147 lines (127 loc) · 5.57 KB
/
awslib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
from random import choice
from string import ascii_uppercase
from datetime import datetime
from boto3.s3.transfer import S3Transfer
def upload_to_s3_rand(session, file_to_upload, s3bucket,
prefix=None, postfix=None, rand_length=12):
"""
Uploads a file to an S3 bucket giving it a randomized name and returns
that name.
Args:
session (boto3.session):
file_to_upload (str): hopefully obvious
s3bucket (str): bucket to upload to
prefix (str): a string that will be prepended to the random string.
also serves as a 'path' on S3
postfix (str): will be appended to the random string
rand_length (int): length of the random string to be genrated
Returns:
name of the S3 object, None if it fails
"""
rand = ''.join(choice(ascii_uppercase) for i in range(rand_length))
s3key = '{0}{1}{2}'.format(prefix or '', rand, postfix or '')
s3c = session.client('s3')
s3transfer = S3Transfer(s3c)
s3transfer.upload_file(file_to_upload, s3bucket, s3key)
return '{}/{}'.format(s3bucket, s3key)
def get_amazon_linux_ami(latest=True):
"""
A dummy function for now, when developped it should return one and only
one id of an Amazon Linux AMI to be used. Arguments can be passed on to
to identify the exact image. 'latest' should return the newest version
of Amazon Linux, HVM, EBS-Backed, 64-bit (the most used image),
depending on region.
Check this page on how to find the Linux AMI:
http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/finding-an-ami.html
Here's how to filter images from the command line:
aws --profile bf_edcdev_luputb ec2 describe-images --owners self amazon \
--filters "Name=root-device-type,Values=ebs" \
"Name=virtualization-type,Values=hvm" \
"Name=architecture,Values=x86_64"
"""
return 'ami-0eac257d'
def get_emr_release_label():
"""
dummy function
At some point it will return a release label depnding on passed parameters.
'latest' should be a valid parameter, probably the default
"""
return 'emr-4.3.0'
def get_cluster_ids(session, state=None, states=[],
created_after=datetime.min, created_before=datetime.max,
tags_any={}, tags_all={}):
"""Returns a list of clusters that satisfy parameters
Args:
state (str): Shortcut for cluster states. Can be either 'on' or 'off'.
'on' means ['STARTING','BOOTSTRAPPING','RUNNING','WAITING']
'off' means ['TERMINATING','TERMINATED','TERMINATED_WITH_ERRORS']
If defined the states will be appended to the list provided
in the 'states' parameter if that's also being received.
states (list): Can be any combination of 'STARTING','BOOTSTRAPPING',
'RUNNING','WAITING','TERMINATING','TERMINATED' and
'TERMINATED_WITH_ERRORS'.
created_after (datetime): self-explanatory
created_before (datetime): self-eplanatory
tags_any (dict): If the cluster is tagged with any of these, it
will be added to the list.
tags_all (dict): all tags must exist on the cluster for it to
make the cut.
Returns:
list of cluster IDs
"""
cluster_ids = []
if state == 'on':
states.extend(['STARTING','BOOTSTRAPPING','RUNNING','WAITING'])
elif state == 'off':
states.extend(['TERMINATING','TERMINATED','TERMINATED_WITH_ERRORS'])
else:
pass
emr = session.client('emr')
paginator = emr.get_paginator('list_clusters')
for page in paginator.paginate(CreatedAfter=created_after,
CreatedBefore=created_before,
ClusterStates=states):
for cluster in page['Clusters']:
cluster_ids.append(cluster['Id'])
# If no other condition was specified, there is no point in going through
# the whole list looking for nothing. return the list as we have it.
# WILL NEED TO BE ADJUSTED IF MORE ARGUMENTS ARE ADDED
# (there's probably a better way, but not going to bother yet)
if len(tags_any) == 0 and len(tags_all) == 0:
return cluster_ids
# There are additional filters to apply. Go through the list, describe
# each cluster and test againist filters. First failure, remove it
# from the list and move on.
# We have to iterate over a copy of the list, otherwise the remove()
# will cause the loop to jump over clusters
for cluster_id in list(cluster_ids):
keep_cluster = False
cluster = emr.describe_cluster(ClusterId=cluster_id)['Cluster']
# check tags
c_tags = cluster['Tags']
# first, tags_any. first tag found, continue to next filter
# if no tag found, remove from list and move to next cluster
if len(tags_any) > 0:
for c_tag in c_tags:
if c_tag['Key'] in tags_any.keys() and \
c_tag['Value'] == tags_any[c_tag['Key']]:
keep_cluster = True
break
if not keep_cluster:
cluster_ids.remove(cluster_id)
continue
# next, tags_all
keep_cluster = True
for key in tags_all.keys():
tag_found = False
for c_tag in c_tags:
if c_tag['Key'] == key and c_tag['Value'] == tags_all[key]:
tag_found = True
break
if not tag_found:
keep_cluster = False
break
if not keep_cluster:
cluster_ids.remove(cluster_id)
continue
return cluster_ids