Cephadm
Cephadm
Cephadm
/usr/bin/python3
DEFAULT_IMAGE='docker.io/ceph/ceph:v15'
DEFAULT_IMAGE_IS_MASTER=False
LATEST_STABLE_RELEASE = 'octopus'
DATA_DIR = '/var/lib/ceph'
LOG_DIR = '/var/log/ceph'
LOCK_DIR = '/run/cephadm'
LOGROTATE_DIR = '/etc/logrotate.d'
UNIT_DIR = '/etc/systemd/system'
LOG_DIR_MODE = 0o770
DATA_DIR_MODE = 0o700
CONTAINER_PREFERENCE = ['podman', 'docker'] # prefer podman to docker
CUSTOM_PS1 = r'[ceph: \u@\h \W]\$ '
DEFAULT_TIMEOUT = None # in seconds
DEFAULT_RETRY = 10
SHELL_DEFAULT_CONF = '/etc/ceph/ceph.conf'
SHELL_DEFAULT_KEYRING = '/etc/ceph/ceph.client.admin.keyring'
"""
You can invoke cephadm in two ways:
2. By piping the script to the python3 binary. In this latter case, you should
prepend one or more lines to the beginning of the script.
For arguments,
injected_argv = [...]
e.g.,
injected_argv = ['ls']
injected_stdin = '...'
"""
import argparse
import datetime
import fcntl
import ipaddress
import json
import logging
from logging.config import dictConfig
import os
import platform
import pwd
import random
import re
import select
import shutil
import socket
import string
import subprocess
import sys
import tempfile
import time
import errno
import struct
try:
from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable,
IO
except ImportError:
pass
import re
import uuid
container_path = ''
cached_stdin = None
DATEFMT = '%Y-%m-%dT%H:%M:%S.%f'
class termcolor:
yellow = '\033[93m'
red = '\033[31m'
end = '\033[0m'
class Error(Exception):
pass
class TimeoutExpired(Error):
pass
##################################
class Ceph(object):
daemons = ('mon', 'mgr', 'mds', 'osd', 'rgw', 'rbd-mirror',
'crash')
##################################
class Monitoring(object):
"""Define the configs for the monitoring containers"""
port_map = {
"prometheus": [9095], # Avoid default 9090, due to conflict with cockpit
UI
"node-exporter": [9100],
"grafana": [3000],
"alertmanager": [9093, 9094],
}
components = {
"prometheus": {
"image": "docker.io/prom/prometheus:v2.18.1",
"cpus": '2',
"memory": '4GB',
"args": [
"--config.file=/etc/prometheus/prometheus.yml",
"--storage.tsdb.path=/prometheus",
"--web.listen-address=:{}".format(port_map['prometheus'][0]),
],
"config-json-files": [
"prometheus.yml",
],
},
"node-exporter": {
"image": "docker.io/prom/node-exporter:v0.18.1",
"cpus": "1",
"memory": "1GB",
"args": [
"--no-collector.timex",
],
},
"grafana": {
"image": "docker.io/ceph/ceph-grafana:6.6.2",
"cpus": "2",
"memory": "4GB",
"args": [],
"config-json-files": [
"grafana.ini",
"provisioning/datasources/ceph-dashboard.yml",
"certs/cert_file",
"certs/cert_key",
],
},
"alertmanager": {
"image": "docker.io/prom/alertmanager:v0.20.0",
"cpus": "2",
"memory": "2GB",
"args": [],
"config-json-files": [
"alertmanager.yml",
],
"config-json-args": [
"peers",
],
},
} # type: ignore
##################################
class NFSGanesha(object):
"""Defines a NFS-Ganesha container"""
daemon_type = 'nfs'
entrypoint = '/usr/bin/ganesha.nfsd'
daemon_args = ['-F', '-L', 'STDERR']
required_files = ['ganesha.conf']
port_map = {
"nfs" : 2049,
}
def __init__(self,
fsid,
daemon_id,
config_json,
image=DEFAULT_IMAGE):
# type: (str, Union[int, str], Dict, str) -> None
self.fsid = fsid
self.daemon_id = daemon_id
self.image = image
# config-json options
self.pool = dict_get(config_json, 'pool', require=True)
self.namespace = dict_get(config_json, 'namespace')
self.userid = dict_get(config_json, 'userid')
self.extra_args = dict_get(config_json, 'extra_args', [])
self.files = dict_get(config_json, 'files', {})
@classmethod
def init(cls, fsid, daemon_id):
# type: (str, Union[int, str]) -> NFSGanesha
return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
@staticmethod
def get_container_mounts(data_dir):
# type: (str) -> Dict[str, str]
mounts = dict()
mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z'
return mounts
@staticmethod
def get_container_envs():
# type: () -> List[str]
envs = [
'CEPH_CONF=%s' % ('/etc/ceph/ceph.conf')
]
return envs
@staticmethod
def get_version(container_id):
# type: (str) -> Optional[str]
version = None
out, err, code = call(
[container_path, 'exec', container_id,
NFSGanesha.entrypoint, '-v'])
if code == 0:
match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out)
if match:
version = match.group(1)
return version
def validate(self):
# type: () -> None
if not is_fsid(self.fsid):
raise Error('not an fsid: %s' % self.fsid)
if not self.daemon_id:
raise Error('invalid daemon_id: %s' % self.daemon_id)
if not self.image:
raise Error('invalid image: %s' % self.image)
def get_daemon_name(self):
# type: () -> str
return '%s.%s' % (self.daemon_type, self.daemon_id)
def get_daemon_args(self):
# type: () -> List[str]
return self.daemon_args + self.extra_args
assert self.pool
args=['--pool', self.pool]
if self.namespace:
args += ['--ns', self.namespace]
if self.userid:
args += ['--userid', self.userid]
args += [action, self.get_daemon_name()]
##################################
class CephIscsi(object):
"""Defines a Ceph-Iscsi container"""
daemon_type = 'iscsi'
entrypoint = '/usr/bin/rbd-target-api'
required_files = ['iscsi-gateway.cfg']
def __init__(self,
fsid,
daemon_id,
config_json,
image=DEFAULT_IMAGE):
# type: (str, Union[int, str], Dict, str) -> None
self.fsid = fsid
self.daemon_id = daemon_id
self.image = image
# config-json options
self.files = dict_get(config_json, 'files', {})
@classmethod
def init(cls, fsid, daemon_id):
# type: (str, Union[int, str]) -> CephIscsi
return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
@staticmethod
def get_container_mounts(data_dir, log_dir):
# type: (str, str) -> Dict[str, str]
mounts = dict()
mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-
gateway.cfg:z'
mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
mounts[log_dir] = '/var/log/rbd-target-api:z'
mounts['/dev'] = '/dev'
return mounts
@staticmethod
def get_container_binds():
# type: () -> List[List[str]]
binds = []
lib_modules = ['type=bind',
'source=/lib/modules',
'destination=/lib/modules',
'ro=true']
binds.append(lib_modules)
return binds
@staticmethod
def get_version(container_id):
# type: (str) -> Optional[str]
version = None
out, err, code = call(
[container_path, 'exec', container_id,
'/usr/bin/python3', '-c', "import pkg_resources;
print(pkg_resources.require('ceph_iscsi')[0].version)"])
if code == 0:
version = out.strip()
return version
def validate(self):
# type: () -> None
if not is_fsid(self.fsid):
raise Error('not an fsid: %s' % self.fsid)
if not self.daemon_id:
raise Error('invalid daemon_id: %s' % self.daemon_id)
if not self.image:
raise Error('invalid image: %s' % self.image)
def get_daemon_name(self):
# type: () -> str
return '%s.%s' % (self.daemon_type, self.daemon_id)
@staticmethod
def configfs_mount_umount(data_dir, mount=True):
# type: (str, bool) -> List[str]
mount_path = os.path.join(data_dir, 'configfs')
if mount:
cmd = "if ! grep -qs {0} /proc/mounts; then " \
"mount -t configfs none {0}; fi".format(mount_path)
else:
cmd = "if grep -qs {0} /proc/mounts; then " \
"umount {0}; fi".format(mount_path)
return cmd.split()
def get_tcmu_runner_container(self):
# type: () -> CephContainer
tcmu_container = get_container(self.fsid, self.daemon_type, self.daemon_id)
tcmu_container.entrypoint = "/usr/bin/tcmu-runner"
tcmu_container.cname = self.get_container_name(desc='tcmu')
# remove extra container args for tcmu container.
# extra args could cause issue with forking service type
tcmu_container.container_args = []
return tcmu_container
##################################
class CustomContainer(object):
"""Defines a custom container"""
daemon_type = 'container'
# config-json options
self.entrypoint = dict_get(config_json, 'entrypoint')
self.uid = dict_get(config_json, 'uid', 65534) # nobody
self.gid = dict_get(config_json, 'gid', 65534) # nobody
self.volume_mounts = dict_get(config_json, 'volume_mounts', {})
self.args = dict_get(config_json, 'args', [])
self.envs = dict_get(config_json, 'envs', [])
self.privileged = dict_get(config_json, 'privileged', False)
self.bind_mounts = dict_get(config_json, 'bind_mounts', [])
self.ports = dict_get(config_json, 'ports', [])
self.dirs = dict_get(config_json, 'dirs', [])
self.files = dict_get(config_json, 'files', {})
@classmethod
def init(cls, fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer':
return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
"""
Create dirs/files below the container data directory.
"""
logger.info('Creating custom container configuration '
'dirs/files in {} ...'.format(data_dir))
if not os.path.isdir(data_dir):
raise OSError('data_dir is not a directory: %s' % data_dir)
Example:
{
/foo/conf: /conf
foo/conf: /conf
}
becomes
{
/foo/conf: /conf
/var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
}
"""
mounts = {}
for source, destination in self.volume_mounts.items():
source = os.path.join(data_dir, source)
mounts[source] = destination
return mounts
##################################
def dict_get(d: Dict, key: str, default: Any = None, require: bool = False) -> Any:
"""
Helper function to get a key from a dictionary.
:param d: The dictionary to process.
:param key: The name of the key to get.
:param default: The default value in case the key does not
exist. Default is `None`.
:param require: Set to `True` if the key is required. An
exception will be raised if the key does not exist in
the given dictionary.
:return: Returns the value of the given key.
:raises: :exc:`self.Error` if the given key does not exist
and `require` is set to `True`.
"""
if require and key not in d.keys():
raise Error('{} missing from dict'.format(key))
return d.get(key, default)
##################################
##################################
def get_supported_daemons():
# type: () -> List[str]
supported_daemons = list(Ceph.daemons)
supported_daemons.extend(Monitoring.components)
supported_daemons.append(NFSGanesha.daemon_type)
supported_daemons.append(CephIscsi.daemon_type)
supported_daemons.append(CustomContainer.daemon_type)
assert len(supported_daemons) == len(set(supported_daemons))
return supported_daemons
##################################
def port_in_use(port_num):
# type: (int) -> bool
"""Detect whether a port is in use on the local machine - IPv4 and IPv6"""
logger.info('Verifying port %d ...' % port_num)
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
attempt_bind(s, '0.0.0.0', port_num)
s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
attempt_bind(s, '::', port_num)
except OSError:
return True
else:
return False
##################################
try:
TimeoutError
except NameError:
TimeoutError = OSError
class Timeout(TimeoutError):
"""
Raised when the lock could not be acquired in *timeout*
seconds.
"""
def __str__(self):
temp = "The file lock '{}' could not be acquired."\
.format(self.lock_file)
return temp
class _Acquire_ReturnProxy(object):
def __init__(self, lock):
self.lock = lock
return None
def __enter__(self):
return self.lock
class FileLock(object):
def __init__(self, name, timeout=-1):
if not os.path.exists(LOCK_DIR):
os.mkdir(LOCK_DIR, 0o700)
self._lock_file = os.path.join(LOCK_DIR, name + '.lock')
@property
def is_locked(self):
return self._lock_file_fd is not None
lock_id = id(self)
lock_filename = self._lock_file
start_time = time.time()
try:
while True:
if not self.is_locked:
logger.debug('Acquiring lock %s on %s', lock_id,
lock_filename)
self._acquire()
if self.is_locked:
logger.debug('Lock %s acquired on %s', lock_id,
lock_filename)
break
elif timeout >= 0 and time.time() - start_time > timeout:
logger.warning('Timeout acquiring lock %s on %s', lock_id,
lock_filename)
raise Timeout(self._lock_file)
else:
logger.debug(
'Lock %s not acquired on %s, waiting %s seconds ...',
lock_id, lock_filename, poll_intervall
)
time.sleep(poll_intervall)
except: # noqa
# Something did go wrong, so decrement the counter.
self._lock_counter = max(0, self._lock_counter - 1)
raise
return _Acquire_ReturnProxy(lock = self)
if self._lock_counter == 0 or force:
lock_id = id(self)
lock_filename = self._lock_file
return None
def __enter__(self):
self.acquire()
return self
def __del__(self):
self.release(force=True)
return None
def _acquire(self):
open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC
fd = os.open(self._lock_file, open_mode)
try:
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
except (IOError, OSError):
os.close(fd)
else:
self._lock_file_fd = fd
return None
def _release(self):
# Do not remove the lockfile:
#
# https://github.com/benediktschmitt/py-filelock/issues/31
# https://stackoverflow.com/questions/17708885/flock-removing-locked-
file-without-race-condition
fd = self._lock_file_fd
self._lock_file_fd = None
fcntl.flock(fd, fcntl.LOCK_UN) # type: ignore
os.close(fd) # type: ignore
return None
##################################
# Popen wrappers, lifted from ceph-volume
out = ''
err = ''
reads = None
stop = False
out_buffer = '' # partial line (no newline yet)
err_buffer = '' # partial line (no newline yet)
start_time = time.time()
end_time = None
if timeout:
end_time = start_time + timeout
while not stop:
if end_time and (time.time() >= end_time):
stop = True
if process.poll() is None:
logger.info(desc + ':timeout after %s seconds' % timeout)
process.kill()
if reads and process.poll() is not None:
# we want to stop, but first read off anything remaining
# on stdout/stderr
stop = True
else:
reads, _, _ = select.select(
[process.stdout.fileno(), process.stderr.fileno()],
[], [], timeout
)
for fd in reads:
try:
message_b = os.read(fd, 1024)
if isinstance(message_b, bytes):
message = message_b.decode('utf-8')
if isinstance(message_b, str):
message = message_b
if stop and message:
# process has terminated, but have more to read still, so not
stopping yet
# (os.read returns '' when it encounters EOF)
stop = False
if not message:
continue
if fd == process.stdout.fileno():
out += message
message = out_buffer + message
lines = message.split('\n')
out_buffer = lines.pop()
for line in lines:
if verbose:
logger.info(desc + ':stdout ' + line)
else:
logger.debug(desc + ':stdout ' + line)
elif fd == process.stderr.fileno():
err += message
message = err_buffer + message
lines = message.split('\n')
err_buffer = lines.pop()
for line in lines:
if verbose:
logger.info(desc + ':stderr ' + line)
else:
logger.debug(desc + ':stderr ' + line)
else:
assert False
except (IOError, OSError):
pass
if verbose:
logger.debug(desc + ':profile rt=%s, stop=%s, exit=%s, reads=%s'
% (time.time()-start_time, stop, process.poll(), reads))
returncode = process.wait()
if out_buffer != '':
if verbose:
logger.info(desc + ':stdout ' + out_buffer)
else:
logger.debug(desc + ':stdout ' + out_buffer)
if err_buffer != '':
if verbose:
logger.info(desc + ':stderr ' + err_buffer)
else:
logger.debug(desc + ':stderr ' + err_buffer)
ret = 1
if sys.version_info >= (3, 3):
ret = call_timeout_py3(command, timeout)
else:
# py2 subprocess has no timeout arg
ret = call_timeout_py2(command, timeout)
return ret
##################################
num += 1
time.sleep(1)
def read_config(fn):
# type: (Optional[str]) -> ConfigParser
# bend over backwards here because py2's ConfigParser doesn't like
# whitespace before config option names (e.g., '\n foo = bar\n').
# Yeesh!
if sys.version_info >= (3, 2):
cp = ConfigParser()
else:
cp = SafeConfigParser()
if fn:
with open(fn, 'r') as f:
raw_conf = f.read()
nice_conf = re.sub(r'\n(\s)+', r'\n', raw_conf)
s_io = StringIO(nice_conf)
if sys.version_info >= (3, 2):
cp.read_file(s_io)
else:
cp.readfp(s_io)
return cp
def pathify(p):
# type: (str) -> str
p = os.path.expanduser(p)
return os.path.abspath(p)
def get_file_timestamp(fn):
# type: (str) -> Optional[str]
try:
mt = os.path.getmtime(fn)
return datetime.datetime.fromtimestamp(
mt, tz=datetime.timezone.utc
).strftime(DATEFMT)
except Exception as e:
return None
def try_convert_datetime(s):
# type: (str) -> Optional[str]
# This is super irritating because
# 1) podman and docker use different formats
# 2) python's strptime can't parse either one
#
# I've seen:
# docker 18.09.7: 2020-03-03T09:21:43.636153304Z
# podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
# 2020-03-03 15:52:30.136257504 -0600 CST
# (In the podman case, there is a different string format for
# 'inspect' and 'inspect --format {{.Created}}'!!)
# replace trailling Z with -0000, since (on python 3.6.8) it won't parse
if s and s[-1] == 'Z':
s = s[:-1] + '-0000'
# cut off the redundnat 'CST' part that strptime can't parse, if
# present.
v = s.split(' ')
s = ' '.join(v[0:3])
def get_podman_version():
# type: () -> Tuple[int, ...]
if 'podman' not in container_path:
raise ValueError('not using podman')
out, _, _ = call_throws([container_path, '--version'])
return _parse_podman_version(out)
def _parse_podman_version(out):
# type: (str) -> Tuple[int, ...]
_, _, version_str = out.strip().split()
def get_hostname():
# type: () -> str
return socket.gethostname()
def get_fqdn():
# type: () -> str
return socket.getfqdn() or socket.gethostname()
def get_arch():
# type: () -> str
return platform.uname().machine
def generate_service_id():
# type: () -> str
return get_hostname() + '.' + ''.join(random.choice(string.ascii_lowercase)
for _ in range(6))
def generate_password():
# type: () -> str
return ''.join(random.choice(string.ascii_lowercase + string.digits)
for i in range(10))
def normalize_container_id(i):
# type: (str) -> str
# docker adds the sha256: prefix, but AFAICS both
# docker (18.09.7 in bionic at least) and podman
# both always use sha256, so leave off the prefix
# for consistency.
prefix = 'sha256:'
if i.startswith(prefix):
i = i[len(prefix):]
return i
def make_fsid():
# type: () -> str
return str(uuid.uuid1())
def is_fsid(s):
# type: (str) -> bool
try:
uuid.UUID(s)
except ValueError:
return False
return True
def infer_fsid(func):
"""
If we only find a single fsid in /var/lib/ceph/*, use that
"""
@wraps(func)
def _infer_fsid():
if args.fsid:
logger.debug('Using specified fsid: %s' % args.fsid)
return func()
fsids_set = set()
daemon_list = list_daemons(detail=False)
for daemon in daemon_list:
if not is_fsid(daemon['fsid']):
# 'unknown' fsid
continue
elif 'name' not in args or not args.name:
# args.name not specified
fsids_set.add(daemon['fsid'])
elif daemon['name'] == args.name:
# args.name is a match
fsids_set.add(daemon['fsid'])
fsids = sorted(fsids_set)
if not fsids:
# some commands do not always require an fsid
pass
elif len(fsids) == 1:
logger.info('Inferring fsid %s' % fsids[0])
args.fsid = fsids[0]
else:
raise Error('Cannot infer an fsid, one must be specified: %s' % fsids)
return func()
return _infer_fsid
def infer_config(func):
"""
If we find a MON daemon, use the config from that container
"""
@wraps(func)
def _infer_config():
if args.config:
logger.debug('Using specified config: %s' % args.config)
return func()
config = None
if args.fsid:
name = args.name
if not name:
daemon_list = list_daemons(detail=False)
for daemon in daemon_list:
if daemon['name'].startswith('mon.'):
name = daemon['name']
break
if name:
config = '/var/lib/ceph/{}/{}/config'.format(args.fsid, name)
if config:
logger.info('Inferring config %s' % config)
args.config = config
elif os.path.exists(SHELL_DEFAULT_CONF):
logger.debug('Using default config: %s' % SHELL_DEFAULT_CONF)
args.config = SHELL_DEFAULT_CONF
return func()
return _infer_config
def _get_default_image():
if DEFAULT_IMAGE_IS_MASTER:
warn = '''This is a development version of cephadm.
For information regarding the latest stable release:
https://docs.ceph.com/docs/{}/cephadm/install
'''.format(LATEST_STABLE_RELEASE)
for line in warn.splitlines():
logger.warning('{}{}{}'.format(termcolor.yellow, line, termcolor.end))
return DEFAULT_IMAGE
def infer_image(func):
"""
Use the most recent ceph image
"""
@wraps(func)
def _infer_image():
if not args.image:
args.image = os.environ.get('CEPHADM_IMAGE')
if not args.image:
args.image = get_last_local_ceph_image()
if not args.image:
args.image = _get_default_image()
return func()
return _infer_image
def default_image(func):
@wraps(func)
def _default_image():
if not args.image:
if 'name' in args and args.name:
type_ = args.name.split('.', 1)[0]
if type_ in Monitoring.components:
args.image = Monitoring.components[type_]['image']
if not args.image:
args.image = os.environ.get('CEPHADM_IMAGE')
if not args.image:
args.image = _get_default_image()
return func()
return _default_image
def get_last_local_ceph_image():
"""
:return: The most recent local ceph image (already pulled)
"""
out, _, _ = call_throws(
[container_path, 'images',
'--filter', 'label=ceph=True',
'--filter', 'dangling=false',
'--format', '{{.Repository}} {{.Tag}}'])
for line in out.splitlines():
if len(line.split()) == 2:
repository, tag = line.split()
r = '{}:{}'.format(repository, tag)
logger.info('Using recent ceph image %s' % r)
return r
return None
def get_log_dir(fsid):
# type: (str) -> str
return os.path.join(args.log_dir, fsid)
if os.path.islink(src_file):
# shutil.move() in py2 does not handle symlinks correctly
src_rl = os.readlink(src_file)
logger.debug("symlink '%s' -> '%s'" % (dst_file, src_rl))
os.symlink(src_rl, dst_file)
os.unlink(src_file)
else:
logger.debug("move file '%s' -> '%s'" % (src_file, dst_file))
shutil.move(src_file, dst_file)
logger.debug('chown %s:%s \'%s\'' % (uid, gid, dst_file))
os.chown(dst_file, uid, gid)
if os.path.isfile(executable):
return executable
if path is None:
path = os.environ.get('PATH', None)
if path is None:
try:
path = os.confstr("CS_PATH")
except (AttributeError, ValueError):
# os.confstr() or CS_PATH is not available
path = os.defpath
# bpo-35755: Don't use os.defpath if the PATH environment variable is
# set to an empty string
paths = path.split(os.pathsep)
for p in paths:
f = os.path.join(p, executable)
if os.path.isfile(f):
# the file exists, we have a shot at spawn working
return f
return None
def find_program(filename):
# type: (str) -> str
name = find_executable(filename)
if name is None:
raise ValueError('%s not found' % filename)
return name
def check_unit(unit_name):
# type: (str) -> Tuple[bool, str, bool]
# NOTE: we ignore the exit code here because systemctl outputs
# various exit codes based on the state of the service, but the
# string result is more explicit (and sufficient).
enabled = False
installed = False
try:
out, err, code = call(['systemctl', 'is-enabled', unit_name],
verbose_on_failure=False)
if code == 0:
enabled = True
installed = True
elif "disabled" in out:
installed = True
except Exception as e:
logger.warning('unable to run systemctl: %s' % e)
enabled = False
installed = False
state = 'unknown'
try:
out, err, code = call(['systemctl', 'is-active', unit_name],
verbose_on_failure=False)
out = out.strip()
if out in ['active']:
state = 'running'
elif out in ['inactive']:
state = 'stopped'
elif out in ['failed', 'auto-restart']:
state = 'error'
else:
state = 'unknown'
except Exception as e:
logger.warning('unable to run systemctl: %s' % e)
state = 'unknown'
return (enabled, state, installed)
if os.path.exists(config_file):
config = read_config(config_file)
if config.has_section('global') and config.has_option('global', 'fsid'):
return config.get('global', 'fsid')
return None
return r
if config:
config_path = os.path.join(data_dir, 'config')
with open(config_path, 'w') as f:
os.fchown(f.fileno(), uid, gid)
os.fchmod(f.fileno(), 0o600)
f.write(config)
if keyring:
keyring_path = os.path.join(data_dir, 'keyring')
with open(keyring_path, 'w') as f:
os.fchmod(f.fileno(), 0o600)
os.fchown(f.fileno(), uid, gid)
f.write(keyring)
if daemon_type in Monitoring.components.keys():
config_json: Dict[str, Any] = get_parm(args.config_json)
required_files = Monitoring.components[daemon_type].get('config-json-
files', list())
# populate the config directory for the component from the config-json
for fname in required_files:
if 'files' in config_json: # type: ignore
content = dict_get_join(config_json['files'], fname)
with open(os.path.join(data_dir_root, config_dir, fname), 'w') as
f:
os.fchown(f.fileno(), uid, gid)
os.fchmod(f.fileno(), 0o600)
f.write(content)
def get_parm(option):
# type: (str) -> Dict[str, str]
if not option:
return dict()
global cached_stdin
if option == '-':
if cached_stdin is not None:
j = cached_stdin
else:
try:
j = injected_stdin # type: ignore
except NameError:
j = sys.stdin.read()
cached_stdin = j
else:
# inline json string
if option[0] == '{' and option[-1] == '}':
j = option
# json file
elif os.path.exists(option):
with open(option, 'r') as f:
j = f.read()
else:
raise Error("Config file {} not found".format(option))
try:
js = json.loads(j)
except ValueError as e:
raise Error("Invalid JSON in {}: {}".format(option, e))
else:
return js
def get_config_and_keyring():
# type: () -> Tuple[Optional[str], Optional[str]]
config = None
keyring = None
if daemon_type == CephIscsi.daemon_type:
binds.extend(CephIscsi.get_container_binds())
elif daemon_type == CustomContainer.daemon_type:
assert daemon_id
cc = CustomContainer.init(fsid, daemon_id)
data_dir = get_data_dir(fsid, daemon_type, daemon_id)
binds.extend(cc.get_container_binds(data_dir))
return binds
if daemon_type in Ceph.daemons:
if fsid:
run_path = os.path.join('/var/run/ceph', fsid);
if os.path.exists(run_path):
mounts[run_path] = '/var/run/ceph:z'
log_dir = get_log_dir(fsid)
mounts[log_dir] = '/var/log/ceph:z'
crash_dir = '/var/lib/ceph/%s/crash' % fsid
if os.path.exists(crash_dir):
mounts[crash_dir] = '/var/lib/ceph/crash:z'
try:
if args.shared_ceph_folder: # make easy manager modules/ceph-volume
development
ceph_folder = pathify(args.shared_ceph_folder)
if os.path.exists(ceph_folder):
mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] =
'/usr/lib/python3.6/site-packages/ceph_volume'
mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
mounts[ceph_folder + '/src/python-common/ceph'] =
'/usr/lib/python3.6/site-packages/ceph'
mounts[ceph_folder + '/monitoring/grafana/dashboards'] =
'/etc/grafana/dashboards/ceph-dashboard'
mounts[ceph_folder + '/monitoring/prometheus/alerts'] =
'/etc/prometheus/ceph'
else:
logger.error('{}{}{}'.format(termcolor.red,
'Ceph shared source folder does not exist.',
termcolor.end))
except AttributeError:
pass
if daemon_type == NFSGanesha.daemon_type:
assert daemon_id
data_dir = get_data_dir(fsid, daemon_type, daemon_id)
mounts.update(NFSGanesha.get_container_mounts(data_dir))
if daemon_type == CephIscsi.daemon_type:
assert daemon_id
data_dir = get_data_dir(fsid, daemon_type, daemon_id)
log_dir = get_log_dir(fsid)
mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir))
if daemon_type == CustomContainer.daemon_type:
assert daemon_id
cc = CustomContainer.init(fsid, daemon_id)
data_dir = get_data_dir(fsid, daemon_type, daemon_id)
mounts.update(cc.get_container_mounts(data_dir))
return mounts
if container_args is None:
container_args = []
if daemon_type in ['mon', 'osd']:
# mon and osd need privileged in order for libudev to query devices
privileged = True
if daemon_type == 'rgw':
entrypoint = '/usr/bin/radosgw'
name = 'client.rgw.%s' % daemon_id
elif daemon_type == 'rbd-mirror':
entrypoint = '/usr/bin/rbd-mirror'
name = 'client.rbd-mirror.%s' % daemon_id
elif daemon_type == 'crash':
entrypoint = '/usr/bin/ceph-crash'
name = 'client.crash.%s' % daemon_id
elif daemon_type in ['mon', 'mgr', 'mds', 'osd']:
entrypoint = '/usr/bin/ceph-' + daemon_type
name = '%s.%s' % (daemon_type, daemon_id)
elif daemon_type in Monitoring.components:
entrypoint = ''
elif daemon_type == NFSGanesha.daemon_type:
entrypoint = NFSGanesha.entrypoint
name = '%s.%s' % (daemon_type, daemon_id)
envs.extend(NFSGanesha.get_container_envs())
elif daemon_type == CephIscsi.daemon_type:
entrypoint = CephIscsi.entrypoint
name = '%s.%s' % (daemon_type, daemon_id)
# So the container can modprobe iscsi_target_mod and have write perms
# to configfs we need to make this a privileged container.
privileged = True
elif daemon_type == CustomContainer.daemon_type:
cc = CustomContainer.init(fsid, daemon_id)
entrypoint = cc.entrypoint
host_network = False
envs.extend(cc.get_container_envs())
container_args.extend(cc.get_container_args())
if daemon_type in Monitoring.components:
uid, gid = extract_uid_gid_monitoring(daemon_type)
monitoring_args = [
'--user',
str(uid),
# FIXME: disable cpu/memory limits for the time being (not supported
# by ubuntu 18.04 kernel!)
]
container_args.extend(monitoring_args)
elif daemon_type == 'crash':
ceph_args = ['-n', name]
elif daemon_type in Ceph.daemons:
ceph_args = ['-n', name, '-f']
return CephContainer(
image=args.image,
entrypoint=entrypoint,
args=ceph_args + get_daemon_args(fsid, daemon_type, daemon_id),
container_args=container_args,
volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id),
envs=envs,
privileged=privileged,
ptrace=ptrace,
init=args.container_init,
host_network=host_network,
)
def extract_uid_gid(img='', file_path='/var/lib/ceph'):
# type: (str, Union[str, List[str]]) -> Tuple[int, int]
if not img:
img = args.image
if isinstance(file_path, str):
paths = [file_path]
else:
paths = file_path
for fp in paths:
try:
out = CephContainer(
image=img,
entrypoint='stat',
args=['-c', '%u %g', fp]
).run()
uid, gid = out.split(' ')
return int(uid), int(gid)
except RuntimeError:
pass
raise RuntimeError('uid/gid not found')
ports = ports or []
if any([port_in_use(port) for port in ports]):
raise Error("TCP Port(s) '{}' required for {} already in
use".format(",".join(map(str, ports)), daemon_type))
# --mkfs
create_daemon_dirs(fsid, daemon_type, daemon_id, uid, gid)
mon_dir = get_data_dir(fsid, 'mon', daemon_id)
log_dir = get_log_dir(fsid)
out = CephContainer(
image=args.image,
entrypoint='/usr/bin/ceph-mon',
args=['--mkfs',
'-i', str(daemon_id),
'--fsid', fsid,
'-c', '/tmp/config',
'--keyring', '/tmp/keyring',
] + get_daemon_args(fsid, 'mon', daemon_id),
volume_mounts={
log_dir: '/var/log/ceph:z',
mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id),
tmp_keyring.name: '/tmp/keyring:z',
tmp_config.name: '/tmp/config:z',
},
).run()
# write conf
with open(mon_dir + '/config', 'w') as f:
os.fchown(f.fileno(), uid, gid)
os.fchmod(f.fileno(), 0o600)
f.write(config)
else:
# dirs, conf, keyring
create_daemon_dirs(
fsid, daemon_type, daemon_id,
uid, gid,
config, keyring)
if not reconfig:
deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
osd_fsid=osd_fsid)
update_firewalld(daemon_type)
if daemon_type in Ceph.daemons:
install_path = find_program('install')
f.write('{install_path} -d -m0770 -o {uid} -g {gid}
/var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid,
gid=gid))
# pre-start cmd(s)
if daemon_type == 'osd':
# osds have a pre-start step
assert osd_fsid
simple_fn = os.path.join('/etc/ceph/osd',
'%s-%s.json.adopted-by-cephadm' % (daemon_id,
osd_fsid))
if os.path.exists(simple_fn):
f.write('# Simple OSDs need chown on startup:\n')
for n in ['block', 'block.db', 'block.wal']:
p = os.path.join(data_dir, n)
f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p,
uid=uid, gid=gid))
else:
prestart = CephContainer(
image=args.image,
entrypoint='/usr/sbin/ceph-volume',
args=[
'lvm', 'activate',
str(daemon_id), osd_fsid,
'--no-systemd'
],
privileged=True,
volume_mounts=get_container_mounts(fsid, daemon_type,
daemon_id),
bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type,
daemon_id),
)
_write_container_cmd_to_bash(f, prestart, 'LVM OSDs use ceph-volume
lvm activate')
elif daemon_type == NFSGanesha.daemon_type:
# add nfs to the rados grace db
nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
prestart = nfs_ganesha.get_rados_grace_container('add')
_write_container_cmd_to_bash(f, prestart, 'add daemon to rados grace')
elif daemon_type == CephIscsi.daemon_type:
f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True))
+ '\n')
ceph_iscsi = CephIscsi.init(fsid, daemon_id)
tcmu_container = ceph_iscsi.get_tcmu_runner_container()
_write_container_cmd_to_bash(f, tcmu_container, 'iscsi tcmu-runnter
container', background=True)
# post-stop command(s)
with open(data_dir + '/unit.poststop.new', 'w') as f:
if daemon_type == 'osd':
assert osd_fsid
poststop = CephContainer(
image=args.image,
entrypoint='/usr/sbin/ceph-volume',
args=[
'lvm', 'deactivate',
str(daemon_id), osd_fsid,
],
privileged=True,
volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type,
daemon_id),
)
_write_container_cmd_to_bash(f, poststop, 'deactivate osd')
elif daemon_type == NFSGanesha.daemon_type:
# remove nfs from the rados grace db
nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
poststop = nfs_ganesha.get_rados_grace_container('remove')
_write_container_cmd_to_bash(f, poststop, 'remove daemon from rados
grace')
elif daemon_type == CephIscsi.daemon_type:
# make sure we also stop the tcmu container
ceph_iscsi = CephIscsi.init(fsid, daemon_id)
tcmu_container = ceph_iscsi.get_tcmu_runner_container()
f.write('! '+ ' '.join(tcmu_container.stop_cmd()) + '\n')
f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir,
mount=False)) + '\n')
os.fchmod(f.fileno(), 0o600)
os.rename(data_dir + '/unit.poststop.new',
data_dir + '/unit.poststop')
# systemd
install_base_units(fsid)
unit = get_unit_file(fsid)
unit_file = 'ceph-%s@.service' % (fsid)
with open(args.unit_dir + '/' + unit_file + '.new', 'w') as f:
f.write(unit)
os.rename(args.unit_dir + '/' + unit_file + '.new',
args.unit_dir + '/' + unit_file)
call_throws(['systemctl', 'daemon-reload'])
class Firewalld(object):
def __init__(self):
# type: () -> None
self.available = self.check()
def check(self):
# type: () -> bool
self.cmd = find_executable('firewall-cmd')
if not self.cmd:
logger.debug('firewalld does not appear to be present')
return False
(enabled, state, _) = check_unit('firewalld.service')
if not enabled:
logger.debug('firewalld.service is not enabled')
return False
if state != "running":
logger.debug('firewalld.service is not running')
return False
logger.info("firewalld ready")
return True
if daemon_type == 'mon':
svc = 'ceph-mon'
elif daemon_type in ['mgr', 'mds', 'osd']:
svc = 'ceph'
elif daemon_type == NFSGanesha.daemon_type:
svc = 'nfs'
else:
return
def apply_rules(self):
# type: () -> None
if not self.available:
return
call_throws([self.cmd, '--reload'])
def update_firewalld(daemon_type):
# type: (str) -> None
firewall = Firewalld()
firewall.enable_service_for(daemon_type)
fw_ports = []
if daemon_type in Monitoring.port_map.keys():
fw_ports.extend(Monitoring.port_map[daemon_type]) # prometheus etc
firewall.open_ports(fw_ports)
firewall.apply_rules()
def install_base_units(fsid):
# type: (str) -> None
"""
Set up ceph.target and ceph-$fsid.target units.
"""
# global unit
existed = os.path.exists(args.unit_dir + '/ceph.target')
with open(args.unit_dir + '/ceph.target.new', 'w') as f:
f.write('[Unit]\n'
'Description=All Ceph clusters and services\n'
'\n'
'[Install]\n'
'WantedBy=multi-user.target\n')
os.rename(args.unit_dir + '/ceph.target.new',
args.unit_dir + '/ceph.target')
if not existed:
# we disable before enable in case a different ceph.target
# (from the traditional package) is present; while newer
# systemd is smart enough to disable the old
# (/lib/systemd/...) and enable the new (/etc/systemd/...),
# some older versions of systemd error out with EEXIST.
call_throws(['systemctl', 'disable', 'ceph.target'])
call_throws(['systemctl', 'enable', 'ceph.target'])
call_throws(['systemctl', 'start', 'ceph.target'])
# cluster unit
existed = os.path.exists(args.unit_dir + '/ceph-%s.target' % fsid)
with open(args.unit_dir + '/ceph-%s.target.new' % fsid, 'w') as f:
f.write('[Unit]\n'
'Description=Ceph cluster {fsid}\n'
'PartOf=ceph.target\n'
'Before=ceph.target\n'
'\n'
'[Install]\n'
'WantedBy=multi-user.target ceph.target\n'.format(
fsid=fsid)
)
os.rename(args.unit_dir + '/ceph-%s.target.new' % fsid,
args.unit_dir + '/ceph-%s.target' % fsid)
if not existed:
call_throws(['systemctl', 'enable', 'ceph-%s.target' % fsid])
call_throws(['systemctl', 'start', 'ceph-%s.target' % fsid])
def get_unit_file(fsid):
# type: (str) -> str
extra_args = ''
if 'podman' in container_path:
extra_args = ('ExecStartPre=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n'
'ExecStopPost=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n'
'Type=forking\n'
'PIDFile=/%t/%n-pid\n')
# According to:
# http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
# these can be removed once ceph-mon will dynamically change network
# configuration.
After=network-online.target local-fs.target time-sync.target
Wants=network-online.target local-fs.target time-sync.target
PartOf=ceph-{fsid}.target
Before=ceph-{fsid}.target
[Service]
LimitNOFILE=1048576
LimitNPROC=1048576
EnvironmentFile=-/etc/environment
ExecStartPre=-{container_path} rm ceph-{fsid}-%i
ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
ExecStop=-{container_path} stop ceph-{fsid}-%i
ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
KillMode=none
Restart=on-failure
RestartSec=10s
TimeoutStartSec=120
TimeoutStopSec=120
StartLimitInterval=30min
StartLimitBurst=5
{extra_args}
[Install]
WantedBy=ceph-{fsid}.target
""".format(
container_path=container_path,
fsid=fsid,
data_dir=args.data_dir,
extra_args=extra_args)
return u
##################################
class CephContainer:
def __init__(self,
image: str,
entrypoint: str,
args: List[str] = [],
volume_mounts: Dict[str, str] = {},
cname: str = '',
container_args: List[str] = [],
envs: Optional[List[str]] = None,
privileged: bool = False,
ptrace: bool = False,
bind_mounts: Optional[List[List[str]]] = None,
init: bool = False,
host_network: bool = True,
) -> None:
self.image = image
self.entrypoint = entrypoint
self.args = args
self.volume_mounts = volume_mounts
self.cname = cname
self.container_args = container_args
self.envs = envs
self.privileged = privileged
self.ptrace = ptrace
self.bind_mounts = bind_mounts if bind_mounts else []
self.init = init
self.host_network = host_network
if self.host_network:
cmd_args.append('--net=host')
if self.entrypoint:
cmd_args.extend(['--entrypoint', self.entrypoint])
if self.privileged:
cmd_args.extend([
'--privileged',
# let OSD etc read block devs that haven't been chowned
'--group-add=disk'])
if self.ptrace and not self.privileged:
# if privileged, the SYS_PTRACE cap is already added
# in addition, --cap-add and --privileged are mutually
# exclusive since podman >= 2.0
cmd_args.append('--cap-add=SYS_PTRACE')
if self.init:
cmd_args.append('--init')
if self.cname:
cmd_args.extend(['--name', self.cname])
if self.envs:
for env in self.envs:
envs.extend(['-e', env])
vols = sum(
[['-v', '%s:%s' % (host_dir, container_dir)]
for host_dir, container_dir in self.volume_mounts.items()], [])
binds = sum([['--mount', '{}'.format(','.join(bind))]
for bind in self.bind_mounts], [])
if self.host_network:
cmd_args.append('--net=host')
if self.privileged:
cmd_args.extend([
'--privileged',
# let OSD etc read block devs that haven't been chowned
'--group-add=disk',
])
if self.envs:
for env in self.envs:
envs.extend(['-e', env])
vols = sum(
[['-v', '%s:%s' % (host_dir, container_dir)]
for host_dir, container_dir in self.volume_mounts.items()], [])
binds = sum([['--mount', '{}'.format(','.join(bind))]
for bind in self.bind_mounts], [])
def stop_cmd(self):
# type () -> List[str]
ret = [
str(container_path),
'stop', self.cname,
]
return ret
##################################
@infer_image
def command_version():
# type: () -> int
out = CephContainer(args.image, 'ceph', ['--version']).run()
print(out.strip())
return 0
##################################
@infer_image
def command_pull():
# type: () -> int
_pull_image(args.image)
return command_inspect_image()
def _pull_image(image):
# type: (str) -> None
logger.info('Pulling container image %s...' % image)
ignorelist = [
"error creating read-write layer with ID",
"net/http: TLS handshake timeout",
"Digest did not match, expected",
]
@infer_image
def command_inspect_image():
# type: () -> int
out, err, ret = call_throws([
container_path, 'inspect',
'--format', '{{.ID}},{{json .RepoDigests}}',
args.image])
if ret:
return errno.ENOENT
info_from = get_image_info_from_inspect(out.strip(), args.image)
def unwrap_ipv6(address):
# type: (str) -> str
if address.startswith('[') and address.endswith(']'):
return address[1:-1]
return address
def wrap_ipv6(address):
# type: (str) -> str
return address
def is_ipv6(address):
# type: (str) -> bool
address = unwrap_ipv6(address)
try:
return ipaddress.ip_address(unicode(address)).version == 6
except ValueError:
logger.warning("Address: {} isn't a valid IP address".format(address))
return False
@default_image
def command_bootstrap():
# type: () -> int
if not args.output_config:
args.output_config = os.path.join(args.output_dir, 'ceph.conf')
if not args.output_keyring:
args.output_keyring = os.path.join(args.output_dir,
'ceph.client.admin.keyring')
if not args.output_pub_ssh_key:
args.output_pub_ssh_key = os.path.join(args.output_dir, 'ceph.pub')
if not args.skip_prepare_host:
command_prepare_host()
else:
logger.info('Skip prepare_host')
# initial vars
fsid = args.fsid or make_fsid()
hostname = get_hostname()
if '.' in hostname and not args.allow_fqdn_hostname:
raise Error('hostname is a fully qualified domain name (%s); either fix
(e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname,
hostname.split('.')[0]))
mon_id = args.mon_id or hostname
mgr_id = args.mgr_id or generate_service_id()
logger.info('Cluster fsid: %s' % fsid)
ipv6 = False
l = FileLock(fsid)
l.acquire()
# ip
r = re.compile(r':(\d+)$')
base_ip = ''
if args.mon_ip:
ipv6 = is_ipv6(args.mon_ip)
if ipv6:
args.mon_ip = wrap_ipv6(args.mon_ip)
hasport = r.findall(args.mon_ip)
if hasport:
port = int(hasport[0])
if port == 6789:
addr_arg = '[v1:%s]' % args.mon_ip
elif port == 3300:
addr_arg = '[v2:%s]' % args.mon_ip
else:
logger.warning('Using msgr2 protocol for unrecognized port %d' %
port)
addr_arg = '[v2:%s]' % args.mon_ip
base_ip = args.mon_ip[0:-(len(str(port)))-1]
check_ip_port(base_ip, port)
else:
base_ip = args.mon_ip
addr_arg = '[v2:%s:3300,v1:%s:6789]' % (args.mon_ip, args.mon_ip)
check_ip_port(args.mon_ip, 3300)
check_ip_port(args.mon_ip, 6789)
elif args.mon_addrv:
addr_arg = args.mon_addrv
if addr_arg[0] != '[' or addr_arg[-1] != ']':
raise Error('--mon-addrv value %s must use square backets' %
addr_arg)
ipv6 = addr_arg.count('[') > 1
for addr in addr_arg[1:-1].split(','):
hasport = r.findall(addr)
if not hasport:
raise Error('--mon-addrv value %s must include port number' %
addr_arg)
port = int(hasport[0])
# strip off v1: or v2: prefix
addr = re.sub(r'^\w+:', '', addr)
base_ip = addr[0:-(len(str(port)))-1]
check_ip_port(base_ip, port)
else:
raise Error('must specify --mon-ip or --mon-addrv')
logger.debug('Base mon IP is %s, final addrv is %s' % (base_ip, addr_arg))
mon_network = None
if not args.skip_mon_network:
# make sure IP is configured locally, and then figure out the
# CIDR network
for net, ips in list_networks().items():
if ipaddress.ip_address(unicode(unwrap_ipv6(base_ip))) in \
[ipaddress.ip_address(unicode(ip)) for ip in ips]:
mon_network = net
logger.info('Mon IP %s is in CIDR network %s' % (base_ip,
mon_network))
break
if not mon_network:
raise Error('Failed to infer CIDR network for mon ip %s; pass '
'--skip-mon-network to configure it later' % base_ip)
# config
cp = read_config(args.config)
if not cp.has_section('global'):
cp.add_section('global')
cp.set('global', 'fsid', fsid);
cp.set('global', 'mon host', addr_arg)
cp.set('global', 'container_image', args.image)
cpf = StringIO()
cp.write(cpf)
config = cpf.getvalue()
if args.registry_json or args.registry_url:
command_registry_login()
if not args.skip_pull:
_pull_image(args.image)
keyring = ('[mon.]\n'
'\tkey = %s\n'
'\tcaps mon = allow *\n'
'[client.admin]\n'
'\tkey = %s\n'
'\tcaps mon = allow *\n'
'\tcaps mds = allow *\n'
'\tcaps mgr = allow *\n'
'\tcaps osd = allow *\n'
'[mgr.%s]\n'
'\tkey = %s\n'
'\tcaps mon = profile mgr\n'
'\tcaps mds = allow *\n'
'\tcaps osd = allow *\n'
% (mon_key, admin_key, mgr_id, mgr_key))
# pass monmap file to ceph user for use by ceph-mon --mkfs below
os.fchown(tmp_monmap.fileno(), uid, gid)
# create mon
logger.info('Creating mon...')
create_daemon_dirs(fsid, 'mon', mon_id, uid, gid)
mon_dir = get_data_dir(fsid, 'mon', mon_id)
log_dir = get_log_dir(fsid)
out = CephContainer(
image=args.image,
entrypoint='/usr/bin/ceph-mon',
args=['--mkfs',
'-i', mon_id,
'--fsid', fsid,
'-c', '/dev/null',
'--monmap', '/tmp/monmap',
'--keyring', '/tmp/keyring',
] + get_daemon_args(fsid, 'mon', mon_id),
volume_mounts={
log_dir: '/var/log/ceph:z',
mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
tmp_bootstrap_keyring.name: '/tmp/keyring:z',
tmp_monmap.name: '/tmp/monmap:z',
},
).run()
if mon_network:
logger.info('Setting mon public_network...')
cli(['config', 'set', 'mon', 'public_network', mon_network])
if ipv6:
logger.info('Enabling IPv6 (ms_bind_ipv6)')
cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
# create mgr
logger.info('Creating mgr...')
mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key)
mgr_c = get_container(fsid, 'mgr', mgr_id)
# Note:the default port used by the Prometheus node exporter is opened in fw
deploy_daemon(fsid, 'mgr', mgr_id, mgr_c, uid, gid,
config=config, keyring=mgr_keyring, ports=[9283])
# output files
with open(args.output_keyring, 'w') as f:
os.fchmod(f.fileno(), 0o600)
f.write('[client.admin]\n'
'\tkey = ' + admin_key + '\n')
logger.info('Wrote keyring to %s' % args.output_keyring)
# ssh
if not args.skip_ssh:
cli(['config-key', 'set', 'mgr/cephadm/ssh_user', args.ssh_user])
if args.ssh_config:
logger.info('Using provided ssh config...')
mounts = {
pathify(args.ssh_config.name): '/tmp/cephadm-ssh-config:z',
}
cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'],
extra_mounts=mounts)
if not os.path.exists(ssh_dir):
makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700)
if os.path.exists(auth_keys_file):
with open(auth_keys_file, 'r') as f:
f.seek(0, os.SEEK_END)
if f.tell() > 0:
f.seek(f.tell()-1, os.SEEK_SET) # go to last char
if f.read() != '\n':
add_newline = True
host = get_hostname()
logger.info('Adding host %s...' % host)
try:
cli(['orch', 'host', 'add', host])
except RuntimeError as e:
raise Error('Failed to add host <%s>: %s' % (host, e))
if not args.orphan_initial_daemons:
for t in ['mon', 'mgr', 'crash']:
logger.info('Deploying %s service with default placement...' % t)
cli(['orch', 'apply', t])
if not args.skip_monitoring_stack:
logger.info('Enabling mgr prometheus module...')
cli(['mgr', 'module', 'enable', 'prometheus'])
for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
logger.info('Deploying %s service with default placement...' % t)
cli(['orch', 'apply', t])
if args.container_init:
cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init',
args.container_init, '--force'])
if not args.skip_dashboard:
# Configure SSL port (cephadm only allows to configure dashboard SSL port)
# if the user does not want to use SSL he can change this setting once the
cluster is up
cli(["config", "set", "mgr", "mgr/dashboard/ssl_server_port" ,
str(args.ssl_dashboard_port)])
if args.apply_spec:
logger.info('Applying %s to cluster' % args.apply_spec)
with open(args.apply_spec) as f:
for line in f:
if 'hostname:' in line:
line = line.replace('\n', '')
split = line.split(': ')
if split[1] != host:
logger.info('Adding ssh key to %s' % split[1])
ssh_key = '/etc/ceph/ceph.pub'
if args.ssh_public_key:
ssh_key = args.ssh_public_key.name
out, err, code = call_throws(['ssh-copy-id', '-f', '-i',
ssh_key, '%s@%s' % (args.ssh_user, split[1])])
mounts = {}
mounts[pathify(args.apply_spec)] = '/tmp/spec.yml:z'
##################################
def command_registry_login():
if args.registry_json:
logger.info("Pulling custom registry login info from %s." %
args.registry_json)
d = get_parm(args.registry_json)
if d.get('url') and d.get('username') and d.get('password'):
args.registry_url = d.get('url')
args.registry_username = d.get('username')
args.registry_password = d.get('password')
registry_login(args.registry_url, args.registry_username,
args.registry_password)
else:
raise Error("json provided for custom registry login did not include
all necessary fields. "
"Please setup json file as\n"
"{\n"
" \"url\": \"REGISTRY_URL\",\n"
" \"username\": \"REGISTRY_USERNAME\",\n"
" \"password\": \"REGISTRY_PASSWORD\"\n"
"}\n")
elif args.registry_url and args.registry_username and args.registry_password:
registry_login(args.registry_url, args.registry_username,
args.registry_password)
else:
raise Error("Invalid custom registry arguments received. To login to a
custom registry include "
"--registry-url, --registry-username and --registry-
password "
"options or --registry-json option")
return 0
##################################
def extract_uid_gid_monitoring(daemon_type):
# type: (str) -> Tuple[int, int]
if daemon_type == 'prometheus':
uid, gid = extract_uid_gid(file_path='/etc/prometheus')
elif daemon_type == 'node-exporter':
uid, gid = 65534, 65534
elif daemon_type == 'grafana':
uid, gid = extract_uid_gid(file_path='/var/lib/grafana')
elif daemon_type == 'alertmanager':
uid, gid = extract_uid_gid(file_path=['/etc/alertmanager',
'/etc/prometheus'])
else:
raise Error("{} not implemented yet".format(daemon_type))
return uid, gid
@default_image
def command_deploy():
# type: () -> None
daemon_type, daemon_id = args.name.split('.', 1)
l = FileLock(args.fsid)
l.acquire()
redeploy = False
unit_name = get_unit_name(args.fsid, daemon_type, daemon_id)
(_, state, _) = check_unit(unit_name)
if state == 'running':
redeploy = True
if args.reconfig:
logger.info('%s daemon %s ...' % ('Reconfig', args.name))
elif redeploy:
logger.info('%s daemon %s ...' % ('Redeploy', args.name))
else:
logger.info('%s daemon %s ...' % ('Deploy', args.name))
if daemon_type in Ceph.daemons:
config, keyring = get_config_and_keyring()
uid, gid = extract_uid_gid()
make_var_run(args.fsid, uid, gid)
else:
raise Error('daemon type {} not implemented in command_deploy function'
.format(daemon_type))
##################################
@infer_image
def command_run():
# type: () -> int
(daemon_type, daemon_id) = args.name.split('.', 1)
c = get_container(args.fsid, daemon_type, daemon_id)
command = c.run_cmd()
return call_timeout(command, args.timeout)
##################################
@infer_fsid
@infer_config
@infer_image
def command_shell():
# type: () -> int
if args.fsid:
make_log_dir(args.fsid)
if args.name:
if '.' in args.name:
(daemon_type, daemon_id) = args.name.split('.', 1)
else:
daemon_type = args.name
daemon_id = None
else:
daemon_type = 'osd' # get the most mounts
daemon_id = None
c = CephContainer(
image=args.image,
entrypoint='doesnotmatter',
args=[],
container_args=container_args,
volume_mounts=mounts,
bind_mounts=binds,
envs=args.env,
privileged=True)
command = c.shell_cmd(command)
##################################
@infer_fsid
def command_enter():
# type: () -> int
if not args.fsid:
raise Error('must pass --fsid to specify cluster')
(daemon_type, daemon_id) = args.name.split('.', 1)
container_args = [] # type: List[str]
if args.command:
command = args.command
else:
command = ['sh']
container_args += [
'-it',
'-e', 'LANG=C',
'-e', "PS1=%s" % CUSTOM_PS1,
]
c = CephContainer(
image=args.image,
entrypoint='doesnotmatter',
container_args=container_args,
cname='ceph-%s-%s.%s' % (args.fsid, daemon_type, daemon_id),
)
command = c.exec_cmd(command)
return call_timeout(command, args.timeout)
##################################
@infer_fsid
@infer_image
def command_ceph_volume():
# type: () -> None
if args.fsid:
make_log_dir(args.fsid)
l = FileLock(args.fsid)
l.acquire()
tmp_config = None
tmp_keyring = None
(config, keyring) = get_config_and_keyring()
if config:
# tmp config file
tmp_config = write_tmp(config, uid, gid)
mounts[tmp_config.name] = '/etc/ceph/ceph.conf:z'
if keyring:
# tmp keyring file
tmp_keyring = write_tmp(keyring, uid, gid)
mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
c = CephContainer(
image=args.image,
entrypoint='/usr/sbin/ceph-volume',
envs=args.env,
args=args.command,
privileged=True,
volume_mounts=mounts,
)
out, err, code = call_throws(c.run_cmd(), verbose=True)
if not code:
print(out)
##################################
@infer_fsid
def command_unit():
# type: () -> None
if not args.fsid:
raise Error('must pass --fsid to specify cluster')
call_throws([
'systemctl',
args.command,
unit_name])
##################################
@infer_fsid
def command_logs():
# type: () -> None
if not args.fsid:
raise Error('must pass --fsid to specify cluster')
cmd = [find_program('journalctl')]
cmd.extend(['-u', unit_name])
if args.command:
cmd.extend(args.command)
##################################
def list_networks():
# type: () -> Dict[str,List[str]]
res = _list_ipv4_networks()
res.update(_list_ipv6_networks())
return res
def _list_ipv4_networks():
out, _, _ = call_throws([find_executable('ip'), 'route', 'ls'])
return _parse_ipv4_route(out)
def _parse_ipv4_route(out):
r = {} # type: Dict[str,List[str]]
p = re.compile(r'^(\S+) (.*)scope link (.*)src (\S+)')
for line in out.splitlines():
m = p.findall(line)
if not m:
continue
net = m[0][0]
ip = m[0][3]
if net not in r:
r[net] = []
r[net].append(ip)
return r
def _list_ipv6_networks():
routes, _, _ = call_throws([find_executable('ip'), '-6', 'route', 'ls'])
ips, _, _ = call_throws([find_executable('ip'), '-6', 'addr', 'ls'])
return _parse_ipv6_route(routes, ips)
return r
def command_list_networks():
# type: () -> None
r = list_networks()
print(json.dumps(r, indent=4))
##################################
def command_ls():
# type: () -> None
ls = list_daemons(detail=not args.no_detail,
legacy_dir=args.legacy_dir)
print(json.dumps(ls, indent=4))
data_dir = args.data_dir
if legacy_dir is not None:
data_dir = os.path.abspath(legacy_dir + data_dir)
# /var/lib/ceph
if os.path.exists(data_dir):
for i in os.listdir(data_dir):
if i in ['mon', 'osd', 'mds', 'mgr']:
daemon_type = i
for j in os.listdir(os.path.join(data_dir, i)):
if '-' not in j:
continue
(cluster, daemon_id) = j.split('-', 1)
fsid = get_legacy_daemon_fsid(
cluster, daemon_type, daemon_id,
legacy_dir=legacy_dir)
legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
i = {
'style': 'legacy',
'name': '%s.%s' % (daemon_type, daemon_id),
'fsid': fsid if fsid is not None else 'unknown',
'systemd_unit': legacy_unit_name,
}
if detail:
(i['enabled'], i['state'], _) =
check_unit(legacy_unit_name)
if not host_version:
try:
out, err, code = call(['ceph', '-v'])
if not code and out.startswith('ceph version '):
host_version = out.split(' ')[2]
except Exception:
pass
i['host_version'] = host_version
ls.append(i)
elif is_fsid(i):
fsid = str(i) # convince mypy that fsid is a str here
for j in os.listdir(os.path.join(data_dir, i)):
if '.' in j:
name = j
(daemon_type, daemon_id) = j.split('.', 1)
unit_name = get_unit_name(fsid,
daemon_type,
daemon_id)
else:
continue
i = {
'style': 'cephadm:v1',
'name': name,
'fsid': fsid,
'systemd_unit': unit_name,
}
if detail:
# get container id
(i['enabled'], i['state'], _) = check_unit(unit_name)
container_id = None
image_name = None
image_id = None
version = None
start_stamp = None
ls.append(i)
return ls
##################################
@default_image
def command_adopt():
# type: () -> None
if not args.skip_pull:
_pull_image(args.image)
# legacy check
if args.style != 'legacy':
raise Error('adoption of style %s not implemented' % args.style)
# lock
fsid = get_legacy_daemon_fsid(args.cluster,
daemon_type,
daemon_id,
legacy_dir=args.legacy_dir)
if not fsid:
raise Error('could not detect legacy fsid; set fsid in ceph.conf')
l = FileLock(fsid)
l.acquire()
class AdoptOsd(object):
def __init__(self, osd_data_dir, osd_id):
# type: (str, str) -> None
self.osd_data_dir = osd_data_dir
self.osd_id = osd_id
def check_online_osd(self):
# type: () -> Tuple[Optional[str], Optional[str]]
def check_offline_lvm_osd(self):
# type: () -> Tuple[Optional[str], Optional[str]]
c = CephContainer(
image=args.image,
entrypoint='/usr/sbin/ceph-volume',
args=['lvm', 'list', '--format=json'],
privileged=True
)
out, err, code = call_throws(c.run_cmd(), verbose=False)
if not code:
try:
js = json.loads(out)
if self.osd_id in js:
logger.info("Found offline LVM OSD {}".format(self.osd_id))
osd_fsid = js[self.osd_id][0]['tags']['ceph.osd_fsid']
for device in js[self.osd_id]:
if device['tags']['ceph.type'] == 'block':
osd_type = 'bluestore'
break
if device['tags']['ceph.type'] == 'data':
osd_type = 'filestore'
break
except ValueError as e:
logger.info("Invalid JSON in ceph-volume lvm list: {}".format(e))
osd_file = glob("/etc/ceph/osd/{}-[a-f0-9-]*.json".format(self.osd_id))
if len(osd_file) == 1:
with open(osd_file[0], 'r') as f:
try:
js = json.loads(f.read())
logger.info("Found offline simple OSD {}".format(self.osd_id))
osd_fsid = js["fsid"]
osd_type = js["type"]
if osd_type != "filestore":
# need this to be mounted for the adopt to work, as it
# needs to move files from this directory
call_throws(['mount', js["data"]["path"],
self.osd_data_dir])
except ValueError as e:
logger.info("Invalid JSON in {}: {}".format(osd_file, e))
data_dir_src = ('/var/lib/ceph/%s/%s-%s' %
(daemon_type, args.cluster, daemon_id))
data_dir_src = os.path.abspath(args.legacy_dir + data_dir_src)
if not os.path.exists(data_dir_src):
raise Error("{}.{} data directory '{}' does not exist. "
"Incorrect ID specified, or daemon alrady adopted?".format(
daemon_type, daemon_id, data_dir_src))
osd_fsid = None
if daemon_type == 'osd':
adopt_osd = AdoptOsd(data_dir_src, daemon_id)
osd_fsid, osd_type = adopt_osd.check_online_osd()
if not osd_fsid:
osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd()
if not osd_fsid:
osd_fsid, osd_type = adopt_osd.check_offline_simple_osd()
if not osd_fsid:
raise Error('Unable to find OSD {}'.format(daemon_id))
logger.info('objectstore_type is %s' % osd_type)
assert osd_type
if osd_type == 'filestore':
raise Error('FileStore is not supported by cephadm')
# data
logger.info('Moving data...')
data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
uid=uid, gid=gid)
move_files(glob(os.path.join(data_dir_src, '*')),
data_dir_dst,
uid=uid, gid=gid)
logger.debug('Remove dir \'%s\'' % (data_dir_src))
if os.path.ismount(data_dir_src):
call_throws(['umount', data_dir_src])
os.rmdir(data_dir_src)
logger.info('Chowning content...')
call_throws(['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst])
if daemon_type == 'mon':
# rename *.ldb -> *.sst, in case they are coming from ubuntu
store = os.path.join(data_dir_dst, 'store.db')
num_renamed = 0
if os.path.exists(store):
for oldf in os.listdir(store):
if oldf.endswith('.ldb'):
newf = oldf.replace('.ldb', '.sst')
oldp = os.path.join(store, oldf)
newp = os.path.join(store, newf)
logger.debug('Renaming %s -> %s' % (oldp, newp))
os.rename(oldp, newp)
if num_renamed:
logger.info('Renamed %d leveldb *.ldb files to *.sst',
num_renamed)
if daemon_type == 'osd':
for n in ['block', 'block.db', 'block.wal']:
p = os.path.join(data_dir_dst, n)
if os.path.exists(p):
logger.info('Chowning %s...' % p)
os.chown(p, uid, gid)
# disable the ceph-volume 'simple' mode files on the host
simple_fn = os.path.join('/etc/ceph/osd',
'%s-%s.json' % (daemon_id, osd_fsid))
if os.path.exists(simple_fn):
new_fn = simple_fn + '.adopted-by-cephadm'
logger.info('Renaming %s -> %s', simple_fn, new_fn)
os.rename(simple_fn, new_fn)
logger.info('Disabling host unit ceph-volume@ simple unit...')
call(['systemctl', 'disable',
'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)])
else:
# assume this is an 'lvm' c-v for now, but don't error
# out if it's not.
logger.info('Disabling host unit ceph-volume@ lvm unit...')
call(['systemctl', 'disable',
'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)])
# config
config_src = '/etc/ceph/%s.conf' % (args.cluster)
config_src = os.path.abspath(args.legacy_dir + config_src)
config_dst = os.path.join(data_dir_dst, 'config')
copy_files([config_src], config_dst, uid=uid, gid=gid)
# logs
logger.info('Moving logs...')
log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' %
(args.cluster, daemon_type, daemon_id))
log_dir_src = os.path.abspath(args.legacy_dir + log_dir_src)
log_dir_dst = make_log_dir(fsid, uid=uid, gid=gid)
move_files(glob(log_dir_src),
log_dir_dst,
uid=uid, gid=gid)
daemon_type = 'prometheus'
(uid, gid) = extract_uid_gid_monitoring(daemon_type)
_stop_and_disable('prometheus')
# config
config_src = '/etc/prometheus/prometheus.yml'
config_src = os.path.abspath(args.legacy_dir + config_src)
config_dst = os.path.join(data_dir_dst, 'etc/prometheus')
makedirs(config_dst, uid, gid, 0o755)
copy_files([config_src], config_dst, uid=uid, gid=gid)
# data
data_src = '/var/lib/prometheus/metrics/'
data_src = os.path.abspath(args.legacy_dir + data_src)
data_dst = os.path.join(data_dir_dst, 'data')
copy_tree([data_src], data_dst, uid=uid, gid=gid)
_stop_and_disable('grafana-server')
# config
config_src = '/etc/grafana/grafana.ini'
config_src = os.path.abspath(args.legacy_dir + config_src)
config_dst = os.path.join(data_dir_dst, 'etc/grafana')
makedirs(config_dst, uid, gid, 0o755)
copy_files([config_src], config_dst, uid=uid, gid=gid)
prov_src = '/etc/grafana/provisioning/'
prov_src = os.path.abspath(args.legacy_dir + prov_src)
prov_dst = os.path.join(data_dir_dst, 'etc/grafana')
copy_tree([prov_src], prov_dst, uid=uid, gid=gid)
# cert
cert = '/etc/grafana/grafana.crt'
key = '/etc/grafana/grafana.key'
if os.path.exists(cert) and os.path.exists(key):
cert_src = '/etc/grafana/grafana.crt'
cert_src = os.path.abspath(args.legacy_dir + cert_src)
makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755)
cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file')
copy_files([cert_src], cert_dst, uid=uid, gid=gid)
key_src = '/etc/grafana/grafana.key'
key_src = os.path.abspath(args.legacy_dir + key_src)
key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key')
copy_files([key_src], key_dst, uid=uid, gid=gid)
_adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini'))
else:
logger.debug("Skipping ssl, missing cert {} or key {}".format(cert, key))
daemon_type = 'alertmanager'
(uid, gid) = extract_uid_gid_monitoring(daemon_type)
_stop_and_disable('prometheus-alertmanager')
data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
uid=uid, gid=gid)
# config
config_src = '/etc/prometheus/alertmanager.yml'
config_src = os.path.abspath(args.legacy_dir + config_src)
config_dst = os.path.join(data_dir_dst, 'etc/alertmanager')
makedirs(config_dst, uid, gid, 0o755)
copy_files([config_src], config_dst, uid=uid, gid=gid)
# data
data_src = '/var/lib/prometheus/alertmanager/'
data_src = os.path.abspath(args.legacy_dir + data_src)
data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data')
copy_tree([data_src], data_dst, uid=uid, gid=gid)
def _adjust_grafana_ini(filename):
# type: (str) -> None
def _stop_and_disable(unit_name):
# type: (str) -> None
def command_rm_daemon():
# type: () -> None
l = FileLock(args.fsid)
l.acquire()
##################################
def command_rm_cluster():
# type: () -> None
if not args.force:
raise Error('must pass --force to proceed: '
'this command may destroy precious data!')
l = FileLock(args.fsid)
l.acquire()
# cluster units
for unit_name in ['ceph-%s.target' % args.fsid]:
call(['systemctl', 'stop', unit_name],
verbose_on_failure=False)
call(['systemctl', 'reset-failed', unit_name],
verbose_on_failure=False)
call(['systemctl', 'disable', unit_name],
verbose_on_failure=False)
# rm units
call_throws(['rm', '-f', args.unit_dir +
'/ceph-%s@.service' % args.fsid])
call_throws(['rm', '-f', args.unit_dir +
'/ceph-%s.target' % args.fsid])
call_throws(['rm', '-rf',
args.unit_dir + '/ceph-%s.target.wants' % args.fsid])
# rm data
call_throws(['rm', '-rf', args.data_dir + '/' + args.fsid])
# rm logs
call_throws(['rm', '-rf', args.log_dir + '/' + args.fsid])
call_throws(['rm', '-rf', args.log_dir +
'/*.wants/ceph-%s@*' % args.fsid])
# rm logrotate config
call_throws(['rm', '-f', args.logrotate_dir + '/ceph-%s' % args.fsid])
if os.path.exists(files[0]):
valid_fsid = False
with open(files[0]) as f:
if args.fsid in f.read():
valid_fsid = True
if valid_fsid:
for n in range(0, len(files)):
if os.path.exists(files[n]):
os.remove(files[n])
##################################
def check_time_sync(enabler=None):
# type: (Optional[Packager]) -> bool
units = [
'chrony.service', # 18.04 (at least)
'chronyd.service', # el / opensuse
'systemd-timesyncd.service',
'ntpd.service', # el7 (at least)
'ntp.service', # 18.04 (at least)
]
if not check_units(units, enabler):
logger.warning('No time sync service is running; checked for %s' % units)
return False
return True
def command_check_host():
# type: () -> None
global container_path
errors = []
commands = ['systemctl', 'lvcreate']
if args.docker:
container_path = find_program('docker')
else:
for i in CONTAINER_PREFERENCE:
try:
container_path = find_program(i)
break
except Exception as e:
logger.debug('Could not locate %s: %s' % (i, e))
if not container_path:
errors.append('Unable to locate any of %s' % CONTAINER_PREFERENCE)
else:
logger.info('podman|docker (%s) is present' % container_path)
if errors:
raise Error('\n'.join(errors))
##################################
def command_prepare_host():
# type: () -> None
logger.info('Verifying podman|docker is present...')
pkg = None
if not container_path:
if not pkg:
pkg = create_packager()
pkg.install_podman()
##################################
class CustomValidation(argparse.Action):
daemons = get_supported_daemons()
if daemon_type not in daemons:
raise argparse.ArgumentError(self,
"name must declare the type of daemon e.g.
"
"{}".format(', '.join(daemons)))
##################################
def get_distro():
# type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
distro = None
distro_version = None
distro_codename = None
with open('/etc/os-release', 'r') as f:
for line in f.readlines():
line = line.strip()
if '=' not in line or line.startswith('#'):
continue
(var, val) = line.split('=', 1)
if val[0] == '"' and val[-1] == '"':
val = val[1:-1]
if var == 'ID':
distro = val.lower()
elif var == 'VERSION_ID':
distro_version = val.lower()
elif var == 'VERSION_CODENAME':
distro_codename = val.lower()
return distro, distro_version, distro_codename
class Packager(object):
def __init__(self, stable=None, version=None, branch=None, commit=None):
assert \
(stable and not version and not branch and not commit) or \
(not stable and version and not branch and not commit) or \
(not stable and not version and branch) or \
(not stable and not version and not branch and not commit)
self.stable = stable
self.version = version
self.branch = branch
self.commit = commit
def add_repo(self):
raise NotImplementedError
def rm_repo(self):
raise NotImplementedError
def repo_gpgkey(self):
if args.gpg_url:
return args.gpg_url
if self.stable or self.version:
return 'https://download.ceph.com/keys/release.asc', 'release'
else:
return 'https://download.ceph.com/keys/autobuild.asc', 'autobuild'
class Apt(Packager):
DISTRO_NAMES = {
'ubuntu': 'ubuntu',
'debian': 'debian',
}
def repo_path(self):
return '/etc/apt/sources.list.d/ceph.list'
def add_repo(self):
url, name = self.repo_gpgkey()
logger.info('Installing repo GPG key from %s...' % url)
try:
response = urlopen(url)
except HTTPError as err:
logger.error('failed to fetch GPG repo key from %s: %s' % (
url, err))
raise Error('failed to fetch GPG key')
key = response.read().decode('utf-8')
with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'w') as f:
f.write(key)
if self.version:
content = 'deb %s/debian-%s/ %s main\n' % (
args.repo_url, self.version, self.distro_codename)
elif self.stable:
content = 'deb %s/debian-%s/ %s main\n' % (
args.repo_url, self.stable, self.distro_codename)
else:
content = self.query_shaman(self.distro, self.distro_codename,
self.branch,
self.commit)
def rm_repo(self):
for name in ['autobuild', 'release']:
p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
if os.path.exists(p):
logger.info('Removing repo GPG key %s...' % p)
os.unlink(p)
if os.path.exists(self.repo_path()):
logger.info('Removing repo at %s...' % self.repo_path())
os.unlink(self.repo_path())
def install_podman(self):
if self.distro == 'ubuntu':
logger.info('Setting up repo for pdoman...')
self.install(['software-properties-common'])
call_throws(['add-apt-repository', '-y', 'ppa:projectatomic/ppa'])
call_throws(['apt', 'update'])
class YumDnf(Packager):
DISTRO_NAMES = {
'centos': ('centos', 'el'),
'rhel': ('centos', 'el'),
'scientific': ('centos', 'el'),
'fedora': ('fedora', 'fc'),
}
[ceph repo]
name= ceph repo
proxy=
gpgcheck=
Previously, and for historical purposes, this is how the template used
to look::
custom_repo =
[{repo_name}]
name={name}
baseurl={baseurl}
enabled={enabled}
gpgcheck={gpgcheck}
type={_type}
gpgkey={gpgkey}
proxy={proxy}
"""
lines = []
# ensure that there is an actual value (not None nor empty string)
if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
lines.append(tmpl_value % kw.get(tmpl_key))
return '\n'.join(lines)
def repo_path(self):
return '/etc/yum.repos.d/ceph.repo'
def repo_baseurl(self):
assert self.stable or self.version
if self.version:
return '%s/rpm-%s/%s' % (args.repo_url, self.version,
self.distro_code)
else:
return '%s/rpm-%s/%s' % (args.repo_url, self.stable,
self.distro_code)
def add_repo(self):
if self.stable or self.version:
content = ''
for n, t in {
'Ceph': '$basearch',
'Ceph-noarch': 'noarch',
'Ceph-source': 'SRPMS'}.items():
content += '[%s]\n' % (n)
content += self.custom_repo(
name='Ceph %s' % t,
baseurl=self.repo_baseurl() + '/' + t,
enabled=1,
gpgcheck=1,
gpgkey=self.repo_gpgkey()[0],
)
content += '\n\n'
else:
content = self.query_shaman(self.distro_normalized, self.major,
self.branch,
self.commit)
if self.distro_code.startswith('el'):
logger.info('Enabling EPEL...')
call_throws([self.tool, 'install', '-y', 'epel-release'])
def rm_repo(self):
if os.path.exists(self.repo_path()):
os.unlink(self.repo_path())
def install_podman(self):
self.install(['podman'])
class Zypper(Packager):
DISTRO_NAMES = [
'sles',
'opensuse-tumbleweed',
'opensuse-leap'
]
# ensure that there is an actual value (not None nor empty string)
if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
lines.append(tmpl_value % kw.get(tmpl_key))
return '\n'.join(lines)
def repo_path(self):
return '/etc/zypp/repos.d/ceph.repo'
def repo_baseurl(self):
assert self.stable or self.version
if self.version:
return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro)
else:
return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro)
def add_repo(self):
if self.stable or self.version:
content = ''
for n, t in {
'Ceph': '$basearch',
'Ceph-noarch': 'noarch',
'Ceph-source': 'SRPMS'}.items():
content += '[%s]\n' % (n)
content += self.custom_repo(
name='Ceph %s' % t,
baseurl=self.repo_baseurl() + '/' + t,
enabled=1,
gpgcheck=1,
gpgkey=self.repo_gpgkey()[0],
)
content += '\n\n'
else:
content = self.query_shaman(self.distro, self.distro_version,
self.branch,
self.commit)
def rm_repo(self):
if os.path.exists(self.repo_path()):
os.unlink(self.repo_path())
def install_podman(self):
self.install(['podman'])
def command_add_repo():
if args.version and args.release:
raise Error('you can specify either --release or --version but not both')
if not args.version and not args.release and not args.dev and not
args.dev_commit:
raise Error('please supply a --release, --version, --dev or --dev-commit
argument')
if args.version:
try:
(x, y, z) = args.version.split('.')
except Exception as e:
raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
pkg = create_packager(stable=args.release,
version=args.version,
branch=args.dev,
commit=args.dev_commit)
pkg.add_repo()
def command_rm_repo():
pkg = create_packager()
pkg.rm_repo()
def command_install():
pkg = create_packager()
pkg.install(args.packages)
##################################
def get_ipv4_address(ifname):
# type: (str) -> str
def _extract(sock, offset):
return socket.inet_ntop(
socket.AF_INET,
fcntl.ioctl(
sock.fileno(),
offset,
struct.pack('256s', bytes(ifname[:15], 'utf-8'))
)[20:24])
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
try:
addr = _extract(s, 35093) # '0x8915' = SIOCGIFADDR
dq_mask = _extract(s, 35099) # 0x891b = SIOCGIFNETMASK
except OSError:
# interface does not have an ipv4 address
return ''
dec_mask = sum([bin(int(i)).count('1')
for i in dq_mask.split('.')])
return '{}/{}'.format(addr, dec_mask)
def get_ipv6_address(ifname):
# type: (str) -> str
if not os.path.exists('/proc/net/if_inet6'):
return ''
raw = read_file(['/proc/net/if_inet6'])
data = raw.splitlines()
# based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
# field 0 is ipv6, field 2 is scope
for iface_setting in data:
field = iface_setting.split()
if field[-1] == ifname:
ipv6_raw = field[0]
ipv6_fmtd = ":".join([ipv6_raw[_p:_p+4] for _p in range(0,
len(field[0]),4)])
# apply naming rules using ipaddress module
ipv6 = ipaddress.ip_address(ipv6_fmtd)
return "{}/{}".format(str(ipv6), int('0x{}'.format(field[2]), 16))
return ''
if mode == 'binary':
unit_list = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
divisor = 1024.0
yotta = "YiB"
##################################
class HostFacts():
_dmi_path_list = ['/sys/class/dmi/id']
_nic_path_list = ['/sys/class/net']
_selinux_path_list = ['/etc/selinux/config']
_apparmor_path_list = ['/etc/apparmor']
_disk_vendor_workarounds = {
"0x1af4": "Virtio Block Device"
}
def __init__(self):
self.cpu_model = 'Unknown'
self.cpu_count = 0
self.cpu_cores = 0
self.cpu_threads = 0
self.interfaces = {}
self._meminfo = read_file(['/proc/meminfo']).splitlines()
self._get_cpuinfo()
self._process_nics()
self.arch = platform.processor()
self.kernel = platform.release()
def _get_cpuinfo(self):
# type: () -> None
"""Determine cpu information via /proc/cpuinfo"""
raw = read_file(['/proc/cpuinfo'])
output = raw.splitlines()
cpu_set = set()
def _get_block_devs(self):
# type: () -> List[str]
"""Determine the list of block devices by looking at /sys/block"""
return [dev for dev in os.listdir('/sys/block')
if not dev.startswith('dm')]
@property
def operating_system(self):
# type: () -> str
"""Determine OS version"""
raw_info = read_file(['/etc/os-release'])
os_release = raw_info.splitlines()
rel_str = 'Unknown'
rel_dict = dict()
for line in os_release:
if "=" in line:
var_name, var_value = line.split('=')
rel_dict[var_name] = var_value.strip('"')
# Would normally use PRETTY_NAME, but NAME and VERSION are more
# consistent
if all(_v in rel_dict for _v in ["NAME", "VERSION"]):
rel_str = "{} {}".format(rel_dict['NAME'], rel_dict['VERSION'])
return rel_str
@property
def hostname(self):
# type: () -> str
"""Return the hostname"""
return platform.node()
@property
def subscribed(self):
# type: () -> str
"""Highlevel check to see if the host is subscribed to receive
updates/support"""
def _red_hat():
# type: () -> str
# RHEL 7 and RHEL 8
entitlements_dir = '/etc/pki/entitlement'
if os.path.exists(entitlements_dir):
pems = glob('{}/*.pem'.format(entitlements_dir))
if len(pems) >= 2:
return "Yes"
return "No"
os_name = self.operating_system
if os_name.upper().startswith("RED HAT"):
return _red_hat()
return "Unknown"
@property
def hdd_count(self):
# type: () -> int
"""Return a count of HDDs (spinners)"""
return len(self._get_devs_by_type(rota='1'))
@property
def hdd_list(self):
# type: () -> List[Dict[str, object]]
"""Return a list of devices that are HDDs (spinners)"""
devs = self._get_devs_by_type(rota='1')
return self._dev_list(devs)
@property
def flash_list(self):
# type: () -> List[Dict[str, object]]
"""Return a list of devices that are flash based (SSD, NVMe)"""
devs = self._get_devs_by_type(rota='0')
return self._dev_list(devs)
@property
def hdd_capacity_bytes(self):
# type: () -> int
"""Return the total capacity for all HDD devices (bytes)"""
return self._get_capacity_by_type(rota='1')
@property
def hdd_capacity(self):
# type: () -> str
"""Return the total capacity for all HDD devices (human readable format)"""
return bytes_to_human(self.hdd_capacity_bytes)
@property
def cpu_load(self):
# type: () -> Dict[str, float]
"""Return the cpu load average data for the host"""
raw = read_file(['/proc/loadavg']).strip()
data = raw.split()
return {
"1min": float(data[0]),
"5min": float(data[1]),
"15min": float(data[2]),
}
@property
def flash_count(self):
# type: () -> int
"""Return the number of flash devices in the system (SSD, NVMe)"""
return len(self._get_devs_by_type(rota='0'))
@property
def flash_capacity_bytes(self):
# type: () -> int
"""Return the total capacity for all flash devices (bytes)"""
return self._get_capacity_by_type(rota='0')
@property
def flash_capacity(self):
# type: () -> str
"""Return the total capacity for all Flash devices (human readable
format)"""
return bytes_to_human(self.flash_capacity_bytes)
def _process_nics(self):
# type: () -> None
"""Look at the NIC devices and extract network related metadata"""
# from
https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
hw_lookup = {
"1": "ethernet",
"32": "infiniband",
"772": "loopback",
}
try:
mtu = int(read_file([os.path.join(nic_path, iface, 'mtu')]))
except ValueError:
mtu = 0
else:
iftype = 'logical'
driver = ''
self.interfaces[iface] = {
"mtu": mtu,
"upper_devs_list": upper_devs_list,
"lower_devs_list": lower_devs_list,
"operstate": operstate,
"iftype": iftype,
"nic_type": nic_type,
"driver": driver,
"speed": speed,
"ipv4_address": get_ipv4_address(iface),
"ipv6_address": get_ipv6_address(iface),
}
@property
def nic_count(self):
# type: () -> int
"""Return a total count of all physical NICs detected in the host"""
phys_devs = []
for iface in self.interfaces:
if self.interfaces[iface]["iftype"] == 'physical':
phys_devs.append(iface)
return len(phys_devs)
@property
def memory_total_kb(self):
# type: () -> int
"""Determine the memory installed (kb)"""
return self._get_mem_data('MemTotal')
@property
def memory_free_kb(self):
# type: () -> int
"""Determine the memory free (not cache, immediately usable)"""
return self._get_mem_data('MemFree')
@property
def memory_available_kb(self):
# type: () -> int
"""Determine the memory available to new applications without swapping"""
return self._get_mem_data('MemAvailable')
@property
def vendor(self):
# type: () -> str
"""Determine server vendor from DMI data in sysfs"""
return read_file(HostFacts._dmi_path_list, "sys_vendor")
@property
def model(self):
# type: () -> str
"""Determine server model information from DMI data in sysfs"""
family = read_file(HostFacts._dmi_path_list, "product_family")
product = read_file(HostFacts._dmi_path_list, "product_name")
if family == 'Unknown' and product:
return "{}".format(product)
@property
def bios_version(self):
# type: () -> str
"""Determine server BIOS version from DMI data in sysfs"""
return read_file(HostFacts._dmi_path_list, "bios_version")
@property
def bios_date(self):
# type: () -> str
"""Determine server BIOS date from DMI data in sysfs"""
return read_file(HostFacts._dmi_path_list, "bios_date")
@property
def timestamp(self):
# type: () -> float
"""Return the current time as Epoch seconds"""
return time.time()
@property
def system_uptime(self):
# type: () -> float
"""Return the system uptime (in secs)"""
raw_time = read_file(['/proc/uptime'])
up_secs, _ = raw_time.split()
return float(up_secs)
@property
def kernel_security(self):
# type: () -> Dict[str, str]
"""Determine the security features enabled in the kernel - SELinux,
AppArmor"""
def _fetch_selinux():
"""Read the selinux config file to determine state"""
security = {}
for selinux_path in HostFacts._selinux_path_list:
if os.path.exists(selinux_path):
selinux_config = read_file([selinux_path]).splitlines()
security['type'] = 'SELinux'
for line in selinux_config:
if line.strip().startswith('#'):
continue
k, v = line.split('=')
security[k] = v
if security['SELINUX'].lower() == "disabled":
security['description'] = "SELinux: Disabled"
else:
security['description'] = "SELinux: Enabled({},
{})".format(security['SELINUX'], security['SELINUXTYPE'])
return security
def _fetch_apparmor():
"""Read the apparmor profiles directly, returning an overview of
AppArmor status"""
security = {}
for apparmor_path in HostFacts._apparmor_path_list:
if os.path.exists(apparmor_path):
security['type'] = "AppArmor"
security['description'] = "AppArmor: Enabled"
try:
profiles =
read_file(['/sys/kernel/security/apparmor/profiles'])
except OSError:
pass
else:
summary = {} # type: Dict[str, int]
for line in profiles.split('\n'):
item, mode = line.split(' ')
mode= mode.strip('()')
if mode in summary:
summary[mode] += 1
else:
summary[mode] = 0
summary_str = ",".join(["{} {}".format(v, k) for k, v in
summary.items()])
security = {**security, **summary} # type: ignore
security['description'] += "({})".format(summary_str)
return security
if os.path.exists('/sys/kernel/security/lsm'):
lsm = read_file(['/sys/kernel/security/lsm']).strip()
if 'selinux' in lsm:
return _fetch_selinux()
elif 'apparmor' in lsm:
return _fetch_apparmor()
else:
return {
"type": "Unknown",
"description": "Linux Security Module framework is active, but
is not using SELinux or AppArmor"
}
return {
"type": "None",
"description": "Linux Security Module framework is not available"
}
def dump(self):
# type: () -> str
"""Return the attributes of this HostFacts object as json"""
data = {k: getattr(self, k) for k in dir(self)
if not k.startswith('_') and
isinstance(getattr(self, k),
(float, int, str, list, dict, tuple))
}
return json.dumps(data, indent=2, sort_keys=True)
##################################
def command_gather_facts():
"""gather_facts is intended to provide host releated metadata to the caller"""
host = HostFacts()
print(host.dump())
##################################
def _get_parser():
# type: () -> argparse.ArgumentParser
parser = argparse.ArgumentParser(
description='Bootstrap Ceph daemons with systemd and containers.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
'--image',
help='container image. Can also be set via the "CEPHADM_IMAGE" '
'env var')
parser.add_argument(
'--docker',
action='store_true',
help='use docker instead of podman')
parser.add_argument(
'--data-dir',
default=DATA_DIR,
help='base directory for daemon data')
parser.add_argument(
'--log-dir',
default=LOG_DIR,
help='base directory for daemon logs')
parser.add_argument(
'--logrotate-dir',
default=LOGROTATE_DIR,
help='location of logrotate configuration files')
parser.add_argument(
'--unit-dir',
default=UNIT_DIR,
help='base directory for systemd units')
parser.add_argument(
'--verbose', '-v',
action='store_true',
help='Show debug-level log messages')
parser.add_argument(
'--timeout',
type=int,
default=DEFAULT_TIMEOUT,
help='timeout in seconds')
parser.add_argument(
'--retry',
type=int,
default=DEFAULT_RETRY,
help='max number of retries')
parser.add_argument(
'--env', '-e',
action='append',
default=[],
help='set environment variable')
subparsers = parser.add_subparsers(help='sub-command')
parser_version = subparsers.add_parser(
'version', help='get ceph version from container')
parser_version.set_defaults(func=command_version)
parser_pull = subparsers.add_parser(
'pull', help='pull latest image version')
parser_pull.set_defaults(func=command_pull)
parser_inspect_image = subparsers.add_parser(
'inspect-image', help='inspect local container image')
parser_inspect_image.set_defaults(func=command_inspect_image)
parser_ls = subparsers.add_parser(
'ls', help='list daemon instances on this host')
parser_ls.set_defaults(func=command_ls)
parser_ls.add_argument(
'--no-detail',
action='store_true',
help='Do not include daemon status')
parser_ls.add_argument(
'--legacy-dir',
default='/',
help='base directory for legacy daemon data')
parser_list_networks = subparsers.add_parser(
'list-networks', help='list IP networks')
parser_list_networks.set_defaults(func=command_list_networks)
parser_adopt = subparsers.add_parser(
'adopt', help='adopt daemon deployed with a different tool')
parser_adopt.set_defaults(func=command_adopt)
parser_adopt.add_argument(
'--name', '-n',
required=True,
help='daemon name (type.id)')
parser_adopt.add_argument(
'--style',
required=True,
help='deployment style (legacy, ...)')
parser_adopt.add_argument(
'--cluster',
default='ceph',
help='cluster name')
parser_adopt.add_argument(
'--legacy-dir',
default='/',
help='base directory for legacy daemon data')
parser_adopt.add_argument(
'--config-json',
help='Additional configuration information in JSON format')
parser_adopt.add_argument(
'--skip-firewalld',
action='store_true',
help='Do not configure firewalld')
parser_adopt.add_argument(
'--skip-pull',
action='store_true',
help='do not pull the latest image before adopting')
parser_adopt.add_argument(
'--force-start',
action='store_true',
help="start newly adoped daemon, even if it wasn't running previously")
parser_adopt.add_argument(
'--container-init',
action='store_true',
help='Run podman/docker with `--init`')
parser_rm_daemon = subparsers.add_parser(
'rm-daemon', help='remove daemon instance')
parser_rm_daemon.set_defaults(func=command_rm_daemon)
parser_rm_daemon.add_argument(
'--name', '-n',
required=True,
action=CustomValidation,
help='daemon name (type.id)')
parser_rm_daemon.add_argument(
'--fsid',
required=True,
help='cluster FSID')
parser_rm_daemon.add_argument(
'--force',
action='store_true',
help='proceed, even though this may destroy valuable data')
parser_rm_daemon.add_argument(
'--force-delete-data',
action='store_true',
help='delete valuable daemon data instead of making a backup')
parser_rm_cluster = subparsers.add_parser(
'rm-cluster', help='remove all daemons for a cluster')
parser_rm_cluster.set_defaults(func=command_rm_cluster)
parser_rm_cluster.add_argument(
'--fsid',
required=True,
help='cluster FSID')
parser_rm_cluster.add_argument(
'--force',
action='store_true',
help='proceed, even though this may destroy valuable data')
parser_run = subparsers.add_parser(
'run', help='run a ceph daemon, in a container, in the foreground')
parser_run.set_defaults(func=command_run)
parser_run.add_argument(
'--name', '-n',
required=True,
help='daemon name (type.id)')
parser_run.add_argument(
'--fsid',
required=True,
help='cluster FSID')
parser_shell = subparsers.add_parser(
'shell', help='run an interactive shell inside a daemon container')
parser_shell.set_defaults(func=command_shell)
parser_shell.add_argument(
'--fsid',
help='cluster FSID')
parser_shell.add_argument(
'--name', '-n',
help='daemon name (type.id)')
parser_shell.add_argument(
'--config', '-c',
help='ceph.conf to pass through to the container')
parser_shell.add_argument(
'--keyring', '-k',
help='ceph.keyring to pass through to the container')
parser_shell.add_argument(
'--mount', '-m',
help=("mount a file or directory in the container. "
"Support multiple mounts. "
"ie: `--mount /foo /bar:/bar`. "
"When no destination is passed, default is /mnt"),
nargs='+')
parser_shell.add_argument(
'--env', '-e',
action='append',
default=[],
help='set environment variable')
parser_shell.add_argument(
'command', nargs=argparse.REMAINDER,
help='command (optional)')
parser_enter = subparsers.add_parser(
'enter', help='run an interactive shell inside a running daemon container')
parser_enter.set_defaults(func=command_enter)
parser_enter.add_argument(
'--fsid',
help='cluster FSID')
parser_enter.add_argument(
'--name', '-n',
required=True,
help='daemon name (type.id)')
parser_enter.add_argument(
'command', nargs=argparse.REMAINDER,
help='command')
parser_ceph_volume = subparsers.add_parser(
'ceph-volume', help='run ceph-volume inside a container')
parser_ceph_volume.set_defaults(func=command_ceph_volume)
parser_ceph_volume.add_argument(
'--fsid',
help='cluster FSID')
parser_ceph_volume.add_argument(
'--config-json',
help='JSON file with config and (client.bootrap-osd) key')
parser_ceph_volume.add_argument(
'--config', '-c',
help='ceph conf file')
parser_ceph_volume.add_argument(
'--keyring', '-k',
help='ceph.keyring to pass through to the container')
parser_ceph_volume.add_argument(
'command', nargs=argparse.REMAINDER,
help='command')
parser_unit = subparsers.add_parser(
'unit', help='operate on the daemon\'s systemd unit')
parser_unit.set_defaults(func=command_unit)
parser_unit.add_argument(
'command',
help='systemd command (start, stop, restart, enable, disable, ...)')
parser_unit.add_argument(
'--fsid',
help='cluster FSID')
parser_unit.add_argument(
'--name', '-n',
required=True,
help='daemon name (type.id)')
parser_logs = subparsers.add_parser(
'logs', help='print journald logs for a daemon container')
parser_logs.set_defaults(func=command_logs)
parser_logs.add_argument(
'--fsid',
help='cluster FSID')
parser_logs.add_argument(
'--name', '-n',
required=True,
help='daemon name (type.id)')
parser_logs.add_argument(
'command', nargs='*',
help='additional journalctl args')
parser_bootstrap = subparsers.add_parser(
'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
parser_bootstrap.set_defaults(func=command_bootstrap)
parser_bootstrap.add_argument(
'--config', '-c',
help='ceph conf file to incorporate')
parser_bootstrap.add_argument(
'--mon-id',
required=False,
help='mon id (default: local hostname)')
parser_bootstrap.add_argument(
'--mon-addrv',
help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
parser_bootstrap.add_argument(
'--mon-ip',
help='mon IP')
parser_bootstrap.add_argument(
'--mgr-id',
required=False,
help='mgr id (default: randomly generated)')
parser_bootstrap.add_argument(
'--fsid',
help='cluster FSID')
parser_bootstrap.add_argument(
'--output-dir',
default='/etc/ceph',
help='directory to write config, keyring, and pub key files')
parser_bootstrap.add_argument(
'--output-keyring',
help='location to write keyring file with new cluster admin and mon keys')
parser_bootstrap.add_argument(
'--output-config',
help='location to write conf file to connect to new cluster')
parser_bootstrap.add_argument(
'--output-pub-ssh-key',
help='location to write the cluster\'s public SSH key')
parser_bootstrap.add_argument(
'--skip-ssh',
action='store_true',
help='skip setup of ssh key on local host')
parser_bootstrap.add_argument(
'--initial-dashboard-user',
default='admin',
help='Initial user for the dashboard')
parser_bootstrap.add_argument(
'--initial-dashboard-password',
help='Initial password for the initial dashboard user')
parser_bootstrap.add_argument(
'--ssl-dashboard-port',
type=int,
default = 8443,
help='Port number used to connect with dashboard using SSL')
parser_bootstrap.add_argument(
'--dashboard-key',
type=argparse.FileType('r'),
help='Dashboard key')
parser_bootstrap.add_argument(
'--dashboard-crt',
type=argparse.FileType('r'),
help='Dashboard certificate')
parser_bootstrap.add_argument(
'--ssh-config',
type=argparse.FileType('r'),
help='SSH config')
parser_bootstrap.add_argument(
'--ssh-private-key',
type=argparse.FileType('r'),
help='SSH private key')
parser_bootstrap.add_argument(
'--ssh-public-key',
type=argparse.FileType('r'),
help='SSH public key')
parser_bootstrap.add_argument(
'--ssh-user',
default='root',
help='set user for SSHing to cluster hosts, passwordless sudo will be
needed for non-root users')
parser_bootstrap.add_argument(
'--skip-mon-network',
action='store_true',
help='set mon public_network based on bootstrap mon ip')
parser_bootstrap.add_argument(
'--skip-dashboard',
action='store_true',
help='do not enable the Ceph Dashboard')
parser_bootstrap.add_argument(
'--dashboard-password-noupdate',
action='store_true',
help='stop forced dashboard password change')
parser_bootstrap.add_argument(
'--no-minimize-config',
action='store_true',
help='do not assimilate and minimize the config file')
parser_bootstrap.add_argument(
'--skip-ping-check',
action='store_true',
help='do not verify that mon IP is pingable')
parser_bootstrap.add_argument(
'--skip-pull',
action='store_true',
help='do not pull the latest image before bootstrapping')
parser_bootstrap.add_argument(
'--skip-firewalld',
action='store_true',
help='Do not configure firewalld')
parser_bootstrap.add_argument(
'--allow-overwrite',
action='store_true',
help='allow overwrite of existing --output-* config/keyring/ssh files')
parser_bootstrap.add_argument(
'--allow-fqdn-hostname',
action='store_true',
help='allow hostname that is fully-qualified (contains ".")')
parser_bootstrap.add_argument(
'--skip-prepare-host',
action='store_true',
help='Do not prepare host')
parser_bootstrap.add_argument(
'--orphan-initial-daemons',
action='store_true',
help='Do not create initial mon, mgr, and crash service specs')
parser_bootstrap.add_argument(
'--skip-monitoring-stack',
action='store_true',
help='Do not automatically provision monitoring stack (prometheus, grafana,
alertmanager, node-exporter)')
parser_bootstrap.add_argument(
'--apply-spec',
help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply
services)')
parser_bootstrap.add_argument(
'--shared_ceph_folder',
metavar='CEPH_SOURCE_FOLDER',
help='Development mode. Several folders in containers are volumes mapped to
different sub-folders in the ceph source folder')
parser_bootstrap.add_argument(
'--registry-url',
help='url for custom registry')
parser_bootstrap.add_argument(
'--registry-username',
help='username for custom registry')
parser_bootstrap.add_argument(
'--registry-password',
help='password for custom registry')
parser_bootstrap.add_argument(
'--registry-json',
help='json file with custom registry login info (URL, Username, Password)')
parser_bootstrap.add_argument(
'--container-init',
action='store_true',
help='Run podman/docker with `--init`')
parser_deploy = subparsers.add_parser(
'deploy', help='deploy a daemon')
parser_deploy.set_defaults(func=command_deploy)
parser_deploy.add_argument(
'--name',
required=True,
action=CustomValidation,
help='daemon name (type.id)')
parser_deploy.add_argument(
'--fsid',
required=True,
help='cluster FSID')
parser_deploy.add_argument(
'--config', '-c',
help='config file for new daemon')
parser_deploy.add_argument(
'--config-json',
help='Additional configuration information in JSON format')
parser_deploy.add_argument(
'--keyring',
help='keyring for new daemon')
parser_deploy.add_argument(
'--key',
help='key for new daemon')
parser_deploy.add_argument(
'--osd-fsid',
help='OSD uuid, if creating an OSD container')
parser_deploy.add_argument(
'--skip-firewalld',
action='store_true',
help='Do not configure firewalld')
parser_deploy.add_argument(
'--tcp-ports',
help='List of tcp ports to open in the host firewall')
parser_deploy.add_argument(
'--reconfig',
action='store_true',
help='Reconfigure a previously deployed daemon')
parser_deploy.add_argument(
'--allow-ptrace',
action='store_true',
help='Allow SYS_PTRACE on daemon container')
parser_deploy.add_argument(
'--container-init',
action='store_true',
help='Run podman/docker with `--init`')
parser_check_host = subparsers.add_parser(
'check-host', help='check host configuration')
parser_check_host.set_defaults(func=command_check_host)
parser_check_host.add_argument(
'--expect-hostname',
help='Check that hostname matches an expected value')
parser_prepare_host = subparsers.add_parser(
'prepare-host', help='prepare a host for cephadm use')
parser_prepare_host.set_defaults(func=command_prepare_host)
parser_prepare_host.add_argument(
'--expect-hostname',
help='Set hostname')
parser_add_repo = subparsers.add_parser(
'add-repo', help='configure package repository')
parser_add_repo.set_defaults(func=command_add_repo)
parser_add_repo.add_argument(
'--release',
help='use latest version of a named release (e.g.,
{})'.format(LATEST_STABLE_RELEASE))
parser_add_repo.add_argument(
'--version',
help='use specific upstream version (x.y.z)')
parser_add_repo.add_argument(
'--dev',
help='use specified bleeding edge build from git branch or tag')
parser_add_repo.add_argument(
'--dev-commit',
help='use specified bleeding edge build from git commit')
parser_add_repo.add_argument(
'--gpg-url',
help='specify alternative GPG key location')
parser_add_repo.add_argument(
'--repo-url',
default='https://download.ceph.com',
help='specify alternative repo location')
# TODO: proxy?
parser_rm_repo = subparsers.add_parser(
'rm-repo', help='remove package repository configuration')
parser_rm_repo.set_defaults(func=command_rm_repo)
parser_install = subparsers.add_parser(
'install', help='install ceph package(s)')
parser_install.set_defaults(func=command_install)
parser_install.add_argument(
'packages', nargs='*',
default=['cephadm'],
help='packages')
parser_registry_login = subparsers.add_parser(
'registry-login', help='log host into authenticated registry')
parser_registry_login.set_defaults(func=command_registry_login)
parser_registry_login.add_argument(
'--registry-url',
help='url for custom registry')
parser_registry_login.add_argument(
'--registry-username',
help='username for custom registry')
parser_registry_login.add_argument(
'--registry-password',
help='password for custom registry')
parser_registry_login.add_argument(
'--registry-json',
help='json file with custom registry login info (URL, Username, Password)')
parser_registry_login.add_argument(
'--fsid',
help='cluster FSID')
parser_gather_facts = subparsers.add_parser(
'gather-facts', help='gather and return host related information (JSON
format)')
parser_gather_facts.set_defaults(func=command_gather_facts)
return parser
def _parse_args(av):
parser = _get_parser()
args = parser.parse_args(av)
if 'command' in args and args.command and args.command[0] == "--":
args.command.pop(0)
return args
if __name__ == "__main__":
# Logger configuration
if not os.path.exists(LOG_DIR):
os.makedirs(LOG_DIR)
dictConfig(logging_config)
logger = logging.getLogger()
# allow argv to be injected
try:
av = injected_argv # type: ignore
except NameError:
av = sys.argv[1:]
logger.debug("%s\ncephadm %s" % ("-" * 80, av))
args = _parse_args(av)
# root?
if os.geteuid() != 0:
sys.stderr.write('ERROR: cephadm should be run as root\n')
sys.exit(1)
# podman or docker?
if args.func != command_check_host:
if args.docker:
container_path = find_program('docker')
else:
for i in CONTAINER_PREFERENCE:
try:
container_path = find_program(i)
break
except Exception as e:
logger.debug('Could not locate %s: %s' % (i, e))
if not container_path and args.func != command_prepare_host\
and args.func != command_add_repo:
sys.stderr.write('Unable to locate any of %s\n' %
CONTAINER_PREFERENCE)
sys.exit(1)
try:
r = args.func()
except Error as e:
if args.verbose:
raise
sys.stderr.write('ERROR: %s\n' % e)
sys.exit(1)
if not r:
r = 0
sys.exit(r)