-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathcharm.py
executable file
·1246 lines (1051 loc) · 48.2 KB
/
charm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
"""Charm code for MongoDB service."""
# Copyright 2023 Canonical Ltd.
# See LICENSE file for licensing details.
import json
import logging
import re
import subprocess
import time
from typing import Dict, List, Optional, Set
from charms.grafana_agent.v0.cos_agent import COSAgentProvider
from charms.mongodb.v0.helpers import (
KEY_FILE,
TLS_EXT_CA_FILE,
TLS_EXT_PEM_FILE,
TLS_INT_CA_FILE,
TLS_INT_PEM_FILE,
build_unit_status,
copy_licenses_to_unit,
generate_keyfile,
generate_password,
get_create_user_cmd,
)
from charms.mongodb.v0.mongodb import (
MongoDBConfiguration,
MongoDBConnection,
NotReadyError,
PyMongoError,
)
from charms.mongodb.v0.mongodb_backups import S3_RELATION, MongoDBBackups
from charms.mongodb.v0.mongodb_provider import MongoDBProvider
from charms.mongodb.v0.mongodb_tls import MongoDBTLS
from charms.mongodb.v0.mongodb_vm_legacy_provider import MongoDBLegacyProvider
from charms.mongodb.v0.users import (
CHARM_USERS,
BackupUser,
MongoDBUser,
MonitorUser,
OperatorUser,
)
from charms.operator_libs_linux.v1 import snap
from ops import JujuVersion
from ops.charm import (
ActionEvent,
CharmBase,
InstallEvent,
LeaderElectedEvent,
RelationDepartedEvent,
RelationEvent,
RelationJoinedEvent,
SecretChangedEvent,
SecretRemoveEvent,
StartEvent,
StorageDetachingEvent,
UpdateStatusEvent,
)
from ops.main import main
from ops.model import (
ActiveStatus,
BlockedStatus,
MaintenanceStatus,
Relation,
SecretNotFoundError,
Unit,
WaitingStatus,
)
from tenacity import Retrying, before_log, retry, stop_after_attempt, wait_fixed
from config import Config
from exceptions import (
AdminUserCreationError,
ApplicationHostNotFoundError,
SecretNotAddedError,
)
from machine_helpers import (
push_file_to_unit,
remove_file_from_unit,
update_mongod_service,
)
logger = logging.getLogger(__name__)
APP_SCOPE = Config.Relations.APP_SCOPE
UNIT_SCOPE = Config.Relations.UNIT_SCOPE
Scopes = Config.Relations.Scopes
class MongodbOperatorCharm(CharmBase):
"""Charm the service."""
def __init__(self, *args):
super().__init__(*args)
self._port = Config.MONGODB_PORT
self.framework.observe(self.on.install, self._on_install)
self.framework.observe(self.on.start, self._on_start)
self.framework.observe(self.on.update_status, self._on_update_status)
self.framework.observe(
self.on[Config.Relations.PEERS].relation_joined, self._on_relation_joined
)
self.framework.observe(
self.on[Config.Relations.PEERS].relation_changed, self._on_relation_handler
)
self.framework.observe(
self.on[Config.Relations.PEERS].relation_departed, self._on_relation_departed
)
# if a new leader has been elected update hosts of MongoDB
self.framework.observe(self.on.leader_elected, self._on_leader_elected)
self.framework.observe(self.on.mongodb_storage_detaching, self._on_storage_detaching)
# actions
self.framework.observe(self.on.get_primary_action, self._on_get_primary_action)
self.framework.observe(self.on.get_password_action, self._on_get_password)
self.framework.observe(self.on.set_password_action, self._on_set_password)
# secrets
self.framework.observe(self.on.secret_remove, self._on_secret_remove)
self.framework.observe(self.on.secret_changed, self._on_secret_changed)
# handle provider side of relations
self.client_relations = MongoDBProvider(self, substrate=Config.SUBSTRATE)
self.legacy_client_relations = MongoDBLegacyProvider(self)
self.tls = MongoDBTLS(self, Config.Relations.PEERS, substrate=Config.SUBSTRATE)
self.backups = MongoDBBackups(self)
# relation events for Prometheus metrics are handled in the MetricsEndpointProvider
self._grafana_agent = COSAgentProvider(
self,
metrics_endpoints=Config.Monitoring.METRICS_ENDPOINTS,
metrics_rules_dir=Config.Monitoring.METRICS_RULES_DIR,
logs_rules_dir=Config.Monitoring.LOGS_RULES_DIR,
log_slots=Config.Monitoring.LOG_SLOTS,
)
self.secrets = {APP_SCOPE: {}, UNIT_SCOPE: {}}
# BEGIN: properties
@property
def _primary(self) -> str:
"""Retrieves the unit with the primary replica."""
try:
with MongoDBConnection(self.mongodb_config) as mongo:
primary_ip = mongo.primary()
except PyMongoError as e:
logger.error("Unable to access primary due to: %s", e)
return None
# check if current unit matches primary ip
if primary_ip == self._unit_ip(self.unit):
return self.unit.name
# check if peer unit matches primary ip
for unit in self._peers.units:
if primary_ip == self._unit_ip(unit):
return unit.name
return None
@property
def _unit_ips(self) -> List[str]:
"""Retrieve IP addresses associated with MongoDB application.
Returns:
a list of IP address associated with MongoDB application.
"""
peer_addresses = []
if self._peers:
peer_addresses = [self._unit_ip(unit) for unit in self._peers.units]
logger.debug("peer addresses: %s", peer_addresses)
self_address = self._unit_ip(self.unit)
logger.debug("unit address: %s", self_address)
addresses = []
if peer_addresses:
addresses.extend(peer_addresses)
addresses.append(self_address)
return addresses
@property
def _replica_set_hosts(self):
"""Fetch current list of hosts in the replica set.
Returns:
A list of hosts addresses (strings).
"""
return json.loads(self.app_peer_data.get("replica_set_hosts", "[]"))
@property
def mongodb_config(self) -> MongoDBConfiguration:
"""Generates a MongoDBConfiguration object for this deployment of MongoDB."""
return self._get_mongodb_config_for_user(OperatorUser, set(self._unit_ips))
@property
def monitor_config(self) -> MongoDBConfiguration:
"""Generates a MongoDBConfiguration object for monitoring."""
return self._get_mongodb_config_for_user(MonitorUser, MonitorUser.get_hosts())
@property
def backup_config(self) -> MongoDBConfiguration:
"""Generates a MongoDBConfiguration object for backup."""
self._check_or_set_user_password(BackupUser)
return self._get_mongodb_config_for_user(BackupUser, BackupUser.get_hosts())
@property
def unit_peer_data(self) -> Dict:
"""Peer relation data object."""
if not self._peers:
return {}
return self._peers.data[self.unit]
@property
def app_peer_data(self) -> Dict:
"""Peer relation data object."""
if not self._peers:
return {}
return self._peers.data[self.app]
@property
def _peers(self) -> Optional[Relation]:
"""Fetch the peer relation.
Returns:
An `ops.model.Relation` object representing the peer relation.
"""
return self.model.get_relation(Config.Relations.PEERS)
@property
def db_initialised(self) -> bool:
"""Check if MongoDB is initialised."""
return "db_initialised" in self.app_peer_data
@db_initialised.setter
def db_initialised(self, value):
"""Set the db_initialised flag."""
if isinstance(value, bool):
self.app_peer_data["db_initialised"] = str(value)
else:
raise ValueError(
f"'db_initialised' must be a boolean value. Proivded: {value} is of type {type(value)}"
)
@property
def _juju_has_secrets(self) -> bool:
return JujuVersion.from_environ().has_secrets
# END: properties
# BEGIN: charm event handlers
def _on_install(self, event: InstallEvent) -> None:
"""Handle the install event (fired on startup)."""
self.unit.status = MaintenanceStatus("installing MongoDB")
try:
self._install_snap_packages(packages=Config.SNAP_PACKAGES)
except snap.SnapError:
self.unit.status = BlockedStatus("couldn't install MongoDB")
return
# if a new unit is joining a cluster with a legacy relation it should start without auth
auth = not self.client_relations._get_users_from_relations(
None, rel=Config.Relations.OBSOLETE_RELATIONS_NAME
)
# clear the default config file - user provided config files will be added in the config
# changed hook
try:
with open(Config.MONGOD_CONF_FILE_PATH, "r+") as f:
f.truncate(0)
except IOError:
self.unit.status = BlockedStatus("Could not install MongoDB")
return
# Construct the mongod startup commandline args for systemd and reload the daemon.
update_mongod_service(
auth=auth, machine_ip=self._unit_ip(self.unit), config=self.mongodb_config
)
# add licenses
copy_licenses_to_unit()
def _on_start(self, event: StartEvent) -> None:
"""Enables MongoDB service and initialises replica set.
Args:
event: The triggering start event.
"""
# mongod requires keyFile and TLS certificates on the file system
self._instatiate_keyfile(event)
self.push_tls_certificate_to_workload()
try:
logger.debug("starting MongoDB.")
self.unit.status = MaintenanceStatus("starting MongoDB")
snap_cache = snap.SnapCache()
mongodb_snap = snap_cache["charmed-mongodb"]
mongodb_snap.start(services=["mongod"], enable=True)
self.unit.status = ActiveStatus()
except snap.SnapError as e:
logger.error("An exception occurred when starting mongod agent, error: %s.", str(e))
self.unit.status = BlockedStatus("couldn't start MongoDB")
return
try:
self._open_port_tcp(self._port)
except subprocess.CalledProcessError:
self.unit.status = BlockedStatus("failed to open TCP port for MongoDB")
return
# check if this unit's deployment of MongoDB is ready
with MongoDBConnection(self.mongodb_config, "localhost", direct=True) as direct_mongo:
if not direct_mongo.is_ready:
logger.debug("mongodb service is not ready yet.")
self.unit.status = WaitingStatus("waiting for MongoDB to start")
event.defer()
return
# mongod is now active
self.unit.status = ActiveStatus()
try:
self._connect_mongodb_exporter()
except snap.SnapError as e:
logger.error(
"An exception occurred when starting mongodb exporter, error: %s.", str(e)
)
self.unit.status = BlockedStatus("couldn't start mongodb exporter")
return
# only leader should initialise the replica set
if not self.unit.is_leader():
return
self._initialise_replica_set(event)
def _on_relation_joined(self, event: RelationJoinedEvent) -> None:
"""Add peer to replica set.
Args:
event: The triggering relation joined event.
"""
if not self.unit.is_leader():
return
self._on_relation_handler(event)
# app relations should be made aware of the new set of hosts
try:
self.client_relations.update_app_relation_data()
except PyMongoError as e:
logger.error("Deferring on updating app relation data since: error: %r", e)
event.defer()
return
def _on_relation_handler(self, event: RelationEvent) -> None:
"""Adds the unit as a replica to the MongoDB replica set.
Args:
event: The triggering relation joined/changed event.
"""
# changing the monitor password will lead to non-leader units receiving a relation changed
# event. We must update the monitor and pbm URI if the password changes so that COS/pbm
# can continue to work
self._connect_mongodb_exporter()
self._connect_pbm_agent()
# only leader should configure replica set and app-changed-events can trigger the relation
# changed hook resulting in no JUJU_REMOTE_UNIT if this is the case we should return
# further reconfiguration can be successful only if a replica set is initialised.
if not (self.unit.is_leader() and event.unit) or not self.db_initialised:
return
with MongoDBConnection(self.mongodb_config) as mongo:
try:
replset_members = mongo.get_replset_members()
# compare set of mongod replica set members and juju hosts to avoid the unnecessary
# reconfiguration.
if replset_members == self.mongodb_config.hosts:
return
for member in self.mongodb_config.hosts - replset_members:
logger.debug("Adding %s to replica set", member)
with MongoDBConnection(
self.mongodb_config, member, direct=True
) as direct_mongo:
if not direct_mongo.is_ready:
self.unit.status = WaitingStatus("waiting to reconfigure replica set")
logger.debug("Deferring reconfigure: %s is not ready yet.", member)
event.defer()
return
mongo.add_replset_member(member)
self.unit.status = ActiveStatus()
except NotReadyError:
self.unit.status = WaitingStatus("waiting to reconfigure replica set")
logger.error("Deferring reconfigure: another member doing sync right now")
event.defer()
except PyMongoError as e:
self.unit.status = WaitingStatus("waiting to reconfigure replica set")
logger.error("Deferring reconfigure: error=%r", e)
event.defer()
def _on_leader_elected(self, event: LeaderElectedEvent) -> None:
"""Generates necessary keyfile and updates replica hosts."""
if not self.get_secret(APP_SCOPE, Config.Secrets.SECRET_KEYFILE_NAME):
self._generate_secrets()
self._update_hosts(event)
# app relations should be made aware of the new set of hosts
try:
self.client_relations.update_app_relation_data()
except PyMongoError as e:
logger.error("Deferring on updating app relation data since: error: %r", e)
event.defer()
return
def _on_relation_departed(self, event: RelationDepartedEvent) -> None:
"""Remove peer from replica set if it wasn't able to remove itself.
Args:
event: The triggering relation departed event.
"""
# allow leader to update relation data and hosts if it isn't leaving
if not self.unit.is_leader() or event.departing_unit == self.unit:
return
self._update_hosts(event)
# app relations should be made aware of the new set of hosts
try:
self.client_relations.update_app_relation_data()
except PyMongoError as e:
logger.error("Deferring on updating app relation data since: error: %r", e)
event.defer()
return
def _on_storage_detaching(self, event: StorageDetachingEvent) -> None:
"""Before storage detaches, allow removing unit to remove itself from the set.
If the removing unit is primary also allow it to step down and elect another unit as
primary while it still has access to its storage.
"""
# if we are removing the last replica it will not be able to step down as primary and we
# cannot reconfigure the replica set to have 0 members. To prevent retrying for 10 minutes
# set this flag to True. please note that planned_units will always be >=1. When planned
# units is 1 that means there are no other peers expected.
single_node_replica_set = self.app.planned_units() == 1 and len(self._peers.units) == 0
if single_node_replica_set:
return
try:
# retries over a period of 10 minutes in an attempt to resolve race conditions it is
# not possible to defer in storage detached.
logger.debug("Removing %s from replica set", self._unit_ip(self.unit))
for attempt in Retrying(
stop=stop_after_attempt(10),
wait=wait_fixed(1),
reraise=True,
):
with attempt:
# remove_replset_member retries for 60 seconds
with MongoDBConnection(self.mongodb_config) as mongo:
mongo.remove_replset_member(self._unit_ip(self.unit))
except NotReadyError:
logger.info(
"Failed to remove %s from replica set, another member is syncing", self.unit.name
)
except PyMongoError as e:
logger.error("Failed to remove %s from replica set, error=%r", self.unit.name, e)
def _on_update_status(self, event: UpdateStatusEvent):
# cannot have both legacy and new relations since they have different auth requirements
if self.client_relations._get_users_from_relations(
None, rel="obsolete"
) and self.client_relations._get_users_from_relations(None):
self.unit.status = BlockedStatus("cannot have both legacy and new relations")
return
# no need to report on replica set status until initialised
if not self.db_initialised:
return
# Cannot check more advanced MongoDB statuses if mongod hasn't started.
with MongoDBConnection(self.mongodb_config, "localhost", direct=True) as direct_mongo:
if not direct_mongo.is_ready:
self.unit.status = WaitingStatus("Waiting for MongoDB to start")
return
# leader should periodically handle configuring the replica set. Incidents such as network
# cuts can lead to new IP addresses and therefore will require a reconfigure. Especially
# in the case that the leader a change in IP address it will not receive a relation event.
if self.unit.is_leader():
self._handle_reconfigure(event)
# update the units status based on it's replica set config and backup status. An error in
# the status of MongoDB takes precedence over pbm status.
mongodb_status = build_unit_status(self.mongodb_config, self._unit_ip(self.unit))
pbm_status = self.backups._get_pbm_status()
if (
not isinstance(mongodb_status, ActiveStatus)
or not self.model.get_relation(
S3_RELATION
) # if s3 relation doesn't exist only report MongoDB status
or isinstance(pbm_status, ActiveStatus) # pbm is ready then report the MongoDB status
):
self.unit.status = mongodb_status
else:
self.unit.status = pbm_status
def _on_get_primary_action(self, event: ActionEvent):
event.set_results({"replica-set-primary": self._primary})
def _on_get_password(self, event: ActionEvent) -> None:
"""Returns the password for the user as an action response."""
username = self._get_user_or_fail_event(
event, default_username=OperatorUser.get_username()
)
if not username:
return
key_name = MongoDBUser.get_password_key_name_for_user(username)
event.set_results(
{Config.Actions.PASSWORD_PARAM_NAME: self.get_secret(APP_SCOPE, key_name)}
)
def _on_set_password(self, event: ActionEvent) -> None:
"""Set the password for the admin user."""
# changing the backup password while a backup/restore is in progress can be disastrous
pbm_status = self.backups._get_pbm_status()
if isinstance(pbm_status, MaintenanceStatus):
event.fail("Cannot change password while a backup/restore is in progress.")
return
# only leader can write the new password into peer relation.
if not self.unit.is_leader():
event.fail("The action can be run only on leader unit.")
return
username = self._get_user_or_fail_event(
event, default_username=OperatorUser.get_username()
)
if not username:
return
new_password = event.params.get(Config.Actions.PASSWORD_PARAM_NAME, generate_password())
if len(new_password) > Config.Secrets.MAX_PASSWORD_LENGTH:
event.fail(
f"Password cannot be longer than {Config.Secrets.MAX_PASSWORD_LENGTH} characters."
)
return
with MongoDBConnection(self.mongodb_config) as mongo:
try:
mongo.set_user_password(username, new_password)
except NotReadyError:
event.fail(
"Failed changing the password: Not all members healthy or finished initial sync."
)
return
except PyMongoError as e:
event.fail(f"Failed changing the password: {e}")
return
secret_id = self.set_secret(
APP_SCOPE, MongoDBUser.get_password_key_name_for_user(username), new_password
)
if username == BackupUser.get_username():
self._connect_pbm_agent()
if username == MonitorUser.get_username():
self._connect_mongodb_exporter()
event.set_results(
{Config.Actions.PASSWORD_PARAM_NAME: new_password, "secret-id": secret_id}
)
def _on_secret_remove(self, event: SecretRemoveEvent):
# We are keeping this function empty on purpose until the issue with secrets
# is not fixed. The issue is: https://bugs.launchpad.net/juju/+bug/2023364
logging.error(
f"_on_secret_remove: Secret {event._id} seems to have no observers, could be removed"
)
def _on_secret_changed(self, event: SecretChangedEvent):
if self._compare_secret_ids(
event.secret.id, self.app_peer_data.get(Config.Secrets.SECRET_INTERNAL_LABEL)
):
scope = APP_SCOPE
elif self._compare_secret_ids(
event.secret.id, self.unit_peer_data.get(Config.Secrets.SECRET_INTERNAL_LABEL)
):
scope = UNIT_SCOPE
else:
logging.debug("Secret %s changed, but it's unknown", event.secret.id)
return
logging.debug("Secret %s for scope %s changed, refreshing", event.secret.id, scope)
self._update_juju_secrets_cache(scope)
# changed secrets means that the URIs used for PBM and mongodb_exporter are now out of date
self._connect_mongodb_exporter()
self._connect_pbm_agent()
# END: charm event handlers
# BEGIN: users management
@retry(
stop=stop_after_attempt(3),
wait=wait_fixed(5),
reraise=True,
before=before_log(logger, logging.DEBUG),
)
def _init_operator_user(self) -> None:
"""Creates initial admin user for MongoDB.
Initial admin user can be created only through localhost connection.
see https://www.mongodb.com/docs/manual/core/localhost-exception/
unfortunately, pymongo unable to create connection that considered
as local connection by MongoDB, even if socket connection used.
As a result, where are only hackish ways to create initial user.
It is needed to install mongodb-clients inside charm container to make
this function work correctly.
"""
if self._is_user_created(OperatorUser) or not self.unit.is_leader():
return
out = subprocess.run(
get_create_user_cmd(self.mongodb_config),
input=self.mongodb_config.password.encode(),
)
if out.returncode == 0:
raise AdminUserCreationError
logger.debug(f"{OperatorUser.get_username()} user created")
self._set_user_created(OperatorUser)
@retry(
stop=stop_after_attempt(3),
wait=wait_fixed(5),
reraise=True,
before=before_log(logger, logging.DEBUG),
)
def _init_monitor_user(self):
"""Creates the monitor user on the MongoDB database."""
if self._is_user_created(MonitorUser):
return
with MongoDBConnection(self.mongodb_config) as mongo:
logger.debug("creating the monitor user roles...")
mongo.create_role(
role_name=MonitorUser.get_mongodb_role(), privileges=MonitorUser.get_privileges()
)
logger.debug("creating the monitor user...")
mongo.create_user(self.monitor_config)
self._set_user_created(MonitorUser)
# leader should reconnect to exporter after creating the monitor user - since the snap
# will have an authorisation error until the the user has been created and the daemon
# has been restarted
self._connect_mongodb_exporter()
@retry(
stop=stop_after_attempt(3),
wait=wait_fixed(5),
reraise=True,
before=before_log(logger, logging.DEBUG),
)
def _init_backup_user(self):
"""Creates the backup user on the MongoDB database."""
if self._is_user_created(BackupUser):
return
with MongoDBConnection(self.mongodb_config) as mongo:
# first we must create the necessary roles for the PBM tool
logger.debug("creating the backup user roles...")
mongo.create_role(
role_name=BackupUser.get_mongodb_role(), privileges=BackupUser.get_privileges()
)
logger.debug("creating the backup user...")
mongo.create_user(self.backup_config)
self._set_user_created(BackupUser)
# END: users management
# BEGIN: helper functions
def _is_user_created(self, user: MongoDBUser) -> bool:
return f"{user.get_username()}-user-created" in self.app_peer_data
def _set_user_created(self, user: MongoDBUser) -> None:
self.app_peer_data[f"{user.get_username()}-user-created"] = "True"
def _get_mongodb_config_for_user(
self, user: MongoDBUser, hosts: Set[str]
) -> MongoDBConfiguration:
external_ca, _ = self.tls.get_tls_files(UNIT_SCOPE)
internal_ca, _ = self.tls.get_tls_files(APP_SCOPE)
return MongoDBConfiguration(
replset=self.app.name,
database=user.get_database_name(),
username=user.get_username(),
password=self.get_secret(APP_SCOPE, user.get_password_key_name()),
hosts=hosts,
roles=user.get_roles(),
tls_external=external_ca is not None,
tls_internal=internal_ca is not None,
)
def _get_user_or_fail_event(self, event: ActionEvent, default_username: str) -> Optional[str]:
"""Returns MongoDBUser object or raises ActionFail if user doesn't exist."""
username = event.params.get(Config.Actions.USERNAME_PARAM_NAME, default_username)
if username not in CHARM_USERS:
event.fail(
f"The action can be run only for users used by the charm:"
f" {', '.join(CHARM_USERS)} not {username}"
)
return
return username
def _check_or_set_user_password(self, user: MongoDBUser) -> None:
key = user.get_password_key_name()
if not self.get_secret(APP_SCOPE, key):
self.set_secret(APP_SCOPE, key, generate_password())
def _generate_secrets(self) -> None:
"""Generate secrets and put them into peer relation.
The same keyFile and admin password on all members needed, hence it is generated once and
share between members via the app data.
"""
self._check_or_set_user_password(OperatorUser)
self._check_or_set_user_password(MonitorUser)
if not self.get_secret(APP_SCOPE, Config.Secrets.SECRET_KEYFILE_NAME):
self.set_secret(APP_SCOPE, Config.Secrets.SECRET_KEYFILE_NAME, generate_keyfile())
def _update_hosts(self, event: LeaderElectedEvent) -> None:
"""Update replica set hosts and remove any unremoved replicas from the config."""
if not self.db_initialised:
return
self.process_unremoved_units(event)
self.app_peer_data["replica_set_hosts"] = json.dumps(self._unit_ips)
def process_unremoved_units(self, event: LeaderElectedEvent) -> None:
"""Removes replica set members that are no longer running as a juju hosts."""
with MongoDBConnection(self.mongodb_config) as mongo:
try:
replset_members = mongo.get_replset_members()
for member in replset_members - self.mongodb_config.hosts:
logger.debug("Removing %s from replica set", member)
mongo.remove_replset_member(member)
except NotReadyError:
logger.info("Deferring process_unremoved_units: another member is syncing")
event.defer()
except PyMongoError as e:
logger.error("Deferring process_unremoved_units: error=%r", e)
event.defer()
def _handle_reconfigure(self, event: UpdateStatusEvent):
"""Reconfigures the replica set if necessary.
Removes any mongod hosts that are no longer present in the replica set or adds hosts that
should exist in the replica set. This function is meant to be called periodically by the
leader in the update status hook to perform any necessary cluster healing.
"""
if not self.unit.is_leader():
logger.debug("only the leader can perform reconfigurations to the replica set.")
return
# remove any IPs that are no longer juju hosts & update app data.
self._update_hosts(event)
# Add in any new IPs to the replica set. Relation handlers require a reference to
# a unit.
event.unit = self.unit
self._on_relation_handler(event)
# app relations should be made aware of the new set of hosts
try:
self.client_relations.update_app_relation_data()
except PyMongoError as e:
logger.error("Deferring on updating app relation data since: error: %r", e)
event.defer()
return
def _open_port_tcp(self, port: int) -> None:
"""Open the given port.
Args:
port: The port to open.
"""
try:
logger.debug("opening tcp port")
subprocess.check_call(["open-port", "{}/TCP".format(port)])
except subprocess.CalledProcessError as e:
logger.exception("failed opening port: %s", str(e))
raise
def _install_snap_packages(self, packages: List[str]) -> None:
"""Installs package(s) to container.
Args:
packages: list of packages to install.
"""
for snap_name, snap_channel, snap_revision in packages:
try:
snap_cache = snap.SnapCache()
snap_package = snap_cache[snap_name]
snap_package.ensure(
snap.SnapState.Latest, channel=snap_channel, revision=snap_revision
)
# snaps will auto refresh so it is necessary to hold the current revision
snap_package.hold()
except snap.SnapError as e:
logger.error(
"An exception occurred when installing %s. Reason: %s", snap_name, str(e)
)
raise
def _instatiate_keyfile(self, event: StartEvent) -> None:
# wait for keyFile to be created by leader unit
if not self.get_secret(APP_SCOPE, Config.Secrets.SECRET_KEYFILE_NAME):
logger.debug("waiting for leader unit to generate keyfile contents")
event.defer()
return
# put keyfile on the machine with appropriate permissions
push_file_to_unit(
parent_dir=Config.MONGOD_CONF_DIR,
file_name=KEY_FILE,
file_contents=self.get_secret(APP_SCOPE, Config.Secrets.SECRET_KEYFILE_NAME),
)
def push_tls_certificate_to_workload(self) -> None:
"""Uploads certificate to the workload container."""
external_ca, external_pem = self.tls.get_tls_files(UNIT_SCOPE)
if external_ca is not None:
push_file_to_unit(
parent_dir=Config.MONGOD_CONF_DIR,
file_name=TLS_EXT_CA_FILE,
file_contents=external_ca,
)
if external_pem is not None:
push_file_to_unit(
parent_dir=Config.MONGOD_CONF_DIR,
file_name=TLS_EXT_PEM_FILE,
file_contents=external_pem,
)
internal_ca, internal_pem = self.tls.get_tls_files(APP_SCOPE)
if internal_ca is not None:
push_file_to_unit(
parent_dir=Config.MONGOD_CONF_DIR,
file_name=TLS_INT_CA_FILE,
file_contents=internal_ca,
)
if internal_pem is not None:
push_file_to_unit(
parent_dir=Config.MONGOD_CONF_DIR,
file_name=TLS_INT_PEM_FILE,
file_contents=internal_pem,
)
@staticmethod
def delete_tls_certificate_from_workload() -> None:
"""Deletes certificate from VM."""
logger.info("Deleting TLS certificate from VM")
for file in [
Config.TLS.EXT_CA_FILE,
Config.TLS.EXT_PEM_FILE,
Config.TLS.INT_CA_FILE,
Config.TLS.INT_PEM_FILE,
]:
remove_file_from_unit(Config.MONGOD_CONF_DIR, file)
def _connect_mongodb_exporter(self) -> None:
"""Exposes the endpoint to mongodb_exporter."""
if not self.db_initialised:
return
# must wait for leader to set URI before connecting
if not self.get_secret(APP_SCOPE, MonitorUser.get_password_key_name()):
return
snap_cache = snap.SnapCache()
mongodb_snap = snap_cache["charmed-mongodb"]
mongodb_snap.set({Config.Monitoring.URI_PARAM_NAME: self.monitor_config.uri})
mongodb_snap.restart(services=[Config.Monitoring.SERVICE_NAME])
def _connect_pbm_agent(self) -> None:
"""Updates URI for pbm-agent."""
if not self.db_initialised:
return
# must wait for leader to set URI before any attempts to update are made
if not self.get_secret(APP_SCOPE, BackupUser.get_password_key_name()):
return
snap_cache = snap.SnapCache()
pbm_snap = snap_cache["charmed-mongodb"]
pbm_snap.stop(services=[Config.Backup.SERVICE_NAME])
pbm_snap.set({Config.Backup.URI_PARAM_NAME: self.backup_config.uri})
try:
# Added to avoid systemd error:
# 'snap.charmed-mongodb.pbm-agent.service: Start request repeated too quickly'
time.sleep(1)
pbm_snap.start(services=[Config.Backup.SERVICE_NAME], enable=True)
except snap.SnapError as e:
logger.error(f"Failed to restart {Config.Backup.SERVICE_NAME}: {str(e)}")
self._get_service_status(Config.Backup.SERVICE_NAME)
raise e
def _get_service_status(self, service_name) -> None:
logger.error(f"Getting status of {service_name} service:")
self._run_diagnostic_command(
f"systemctl status snap.charmed-mongodb.{service_name}.service"
)
self._run_diagnostic_command(
f"journalctl -xeu snap.charmed-mongodb.{service_name}.service"
)
def _run_diagnostic_command(self, cmd) -> None:
logger.error("Running diagnostic command: %s", cmd)
try:
output = subprocess.check_output(cmd, shell=True, text=True)
logger.error(output)
except subprocess.CalledProcessError as e:
logger.error(f"Exception occurred running '{cmd}'\n {e}")
def _initialise_replica_set(self, event: StartEvent) -> None:
if self.db_initialised:
# The replica set should be initialised only once. Check should be
# external (e.g., check initialisation inside peer relation). We
# shouldn't rely on MongoDB response because the data directory
# can be corrupted.
return
with MongoDBConnection(self.mongodb_config, "localhost", direct=True) as direct_mongo:
try:
logger.info("Replica Set initialization")
direct_mongo.init_replset()
self._peers.data[self.app]["replica_set_hosts"] = json.dumps(
[self._unit_ip(self.unit)]
)
logger.info("User initialization")
self._init_operator_user()
self._init_backup_user()
self._init_monitor_user()
logger.info("Manage relations")
self.client_relations.oversee_users(None, None)
except subprocess.CalledProcessError as e:
logger.error(
"Deferring on_start: exit code: %i, stderr: %s", e.exit_code, e.stderr
)
event.defer()
self.unit.status = WaitingStatus("waiting to initialise replica set")
return
except PyMongoError as e:
logger.error("Deferring on_start since: error=%r", e)
event.defer()
self.unit.status = WaitingStatus("waiting to initialise replica set")
return
# replica set initialised properly and ready to go
self.db_initialised = True
self.unit.status = ActiveStatus()
def _unit_ip(self, unit: Unit) -> str:
"""Returns the ip address of a given unit."""
# check if host is current host
if unit == self.unit:
return str(self.model.get_binding(Config.Relations.PEERS).network.bind_address)
# check if host is a peer
elif unit in self._peers.data:
return str(self._peers.data[unit].get("private-address"))
# raise exception if host not found
else:
raise ApplicationHostNotFoundError
def get_secret(self, scope: str, key: str) -> Optional[str]:
"""Get secret from the secret storage."""
if self._juju_has_secrets:
return self._juju_secret_get(scope, key)
if scope == UNIT_SCOPE:
return self.unit_peer_data.get(key, None)
elif scope == APP_SCOPE:
return self.app_peer_data.get(key, None)
else:
raise RuntimeError("Unknown secret scope.")
def set_secret(self, scope: str, key: str, value: Optional[str]) -> Optional[str]: