To prepare auto-failover and auto-rejoin in docker swarm we need docker , postgres , repmgr , pgbouncer , runit and gluster . You can also use the finished image .To begin with, on two hosts (better than iron ones) docker1 and docker2 we organize docker swarm so that both are managers.Also, on both hosts, install the distributed file system glusterfs and mount it on both hosts in fstab like thislocalhost:/gfs /mnt/gfs glusterfs defaults,_netdev,backupvolfile-server=localhost 0 0
Then on both hosts create a partitiondocker volume create repmgr
and networkdocker network create --attachable --driver overlay docker
Then collect the image using the docker file:DockerfileFROM alpine
RUN set -ex \
&& apk add --no-cache --repository http://dl-cdn.alpinelinux.org/alpine/edge/testing --virtual .locales-rundeps \
musl-locales \
&& apk add --no-cache --virtual .postgresql-rundeps \
openssh-client \
openssh-server \
pgbouncer \
postgresql \
postgresql-contrib \
repmgr \
repmgr-daemon \
rsync \
runit \
shadow \
tzdata \
&& echo done
ADD bin /usr/local/bin
ADD service /etc/service
CMD [ "runsvdir", "/etc/service" ]
ENTRYPOINT [ "docker_entrypoint.sh" ]
ENV HOME=/var/lib/postgresql
ENV GROUP=postgres \
PGDATA="${HOME}/pg_data" \
USER=postgres
VOLUME "${HOME}"
WORKDIR "${HOME}"
RUN set -ex \
&& sed -i -e 's|#PasswordAuthentication yes|PasswordAuthentication no|g' /etc/ssh/sshd_config \
&& sed -i -e 's|# StrictHostKeyChecking ask| StrictHostKeyChecking no|g' /etc/ssh/ssh_config \
&& echo " UserKnownHostsFile=/dev/null" >>/etc/ssh/ssh_config \
&& sed -i -e 's|postgres:!:|postgres::|g' /etc/shadow \
&& chmod -R 0755 /etc/service /usr/local/bin \
&& echo done
Now run the service on each hostservice1.shdocker service create \
--constraint node.hostname==docker1 \
--env GROUP_ID="$(id -g)" \
--env LANG=ru_RU.UTF-8 \
--env TZ=Asia/Yekaterinburg \
--env USER_ID="$(id -u)" \
--hostname tasks.repmgr1 \
--mount type=bind,source=/etc/certs,destination=/etc/certs,readonly \
--mount type=bind,source=/mnt/gfs/repmgr,destination=/var/lib/postgresql/gfs \
--mount type=volume,source=repmgr,destination=/var/lib/postgresql \
--name repmgr1 \
--network name=docker \
--publish target=5432,published=5432,mode=host \
--publish target=5433,published=5433,mode=host \
--replicas-max-per-node 1 \
rekgrpth/repmgr
On the second host, run the second service as well, only instead of docker1 we write docker2 and instead of repmgr1 we write repmgr2If to teach docker to resolve hostname of conveyers,
docker service create \
--env GROUP_ID="$(id -g)" \
--env LANG=ru_RU.UTF-8 \
--env TZ=Asia/Yekaterinburg \
--env USER_ID="$(id -u)" \
--hostname repmgr-{{.Node.Hostname}} \
--mode global \
--mount type=bind,source=/etc/certs,destination=/etc/certs,readonly \
--mount type=bind,source=/mnt/gfs/repmgr,destination=/var/lib/postgresql/gfs \
--mount type=volume,source=repmgr,destination=/var/lib/postgresql \
--name repmgr \
--network name=docker \
--publish target=5432,published=5432,mode=host \
--publish target=5433,published=5433,mode=host \
rekgrpth/repmgr
So, the entrance to the container/usr/local/bin/docker_entrypoint.sh#!/bin/sh
exec 2>&1
set -ex
if [ -n "$GROUP" ] && [ -n "$GROUP_ID" ] && [ "$GROUP_ID" != "$(id -g "$GROUP")" ]; then
groupmod --gid "$GROUP_ID" "$GROUP"
chgrp "$GROUP_ID" "$HOME"
fi
if [ -n "$USER" ] && [ -n "$USER_ID" ] && [ "$USER_ID" != "$(id -u "$USER")" ]; then
usermod --uid "$USER_ID" "$USER"
chown "$USER_ID" "$HOME"
fi
exec "$@"
The supervisor launches the following services1) / etc / service / ssh, switchover.
/etc/service/ssh/run#!/bin/sh
exec 2>&1
realpath "$0"
set -ex
ssh-keygen -A
exec /usr/sbin/sshd -D -e
, ( , )
/etc/service/ssh/log/run#!/bin/sh
exec 2>&1
exec sed 's|^|ssh: |'
2) / etc / service / postgres/etc/service/postgres/run#!/bin/sh
exec 2>&1
install -d -m 0750 -o "$USER" -g "$GROUP" "$PGDATA"
install -d -m 1775 -o "$USER" -g "$GROUP" /run/postgresql /var/log/postgresql
rm -f /run/postgresql/postgres.run
realpath "$0"
set -ex
chmod 755 supervise
chown "$USER":"$GROUP" supervise/ok supervise/control supervise/status
rm -f "$PGDATA/postmaster.pid"
primary="$(test -f "$HOME/gfs/primary" && cat "$HOME/gfs/primary")"
test -n "$primary" || echo -n "$(hostname)" >"$HOME/gfs/primary"
primary="$(test -f "$HOME/gfs/primary" && cat "$HOME/gfs/primary")"
test -n "$primary"
chown "$USER":"$GROUP" "$HOME/gfs/primary"
if [ "$primary" != "$(hostname)" ]; then
test -d "$PGDATA/base" || /etc/service/postgres/standby
test -f "$PGDATA/standby.signal" || /etc/service/postgres/rejoin
else
test -d "$PGDATA/base" || /etc/service/postgres/primary
fi
test -d "$PGDATA/base"
exec chpst -u "$USER":"$GROUP" -L /run/postgresql/postgres.run postmaster
, ( , )
/etc/service/postgres/log/run#!/bin/sh
exec 2>&1
exec sed 's|^|postgres: |'
/etc/service/postgres/standby#!/bin/sh
exec 2>&1
realpath "$0"
set -ex
chpst -u "$USER":"$GROUP" /etc/service/repmgr/conf
primary="$(test -f "$HOME/gfs/primary" && cat "$HOME/gfs/primary")"
test -n "$primary"
chpst -u "$USER":"$GROUP" repmgr standby clone --config-file="$HOME/repmgr.conf" --verbose --fast-checkpoint --dbname="host=$primary user=repmgr dbname=repmgr connect_timeout=2"
/etc/service/postgres/rejoin#!/bin/sh
exec 2>&1
realpath "$0"
set -ex
primary="$(test -f "$HOME/gfs/primary" && cat "$HOME/gfs/primary")"
test -n "$primary"
chpst -u "$USER":"$GROUP" pg_ctl --options="-c listen_addresses=''" --wait start
chpst -u "$USER":"$GROUP" pg_ctl --wait --mode=fast stop
chpst -u "$USER":"$GROUP" repmgr node rejoin --config-file="$HOME/repmgr.conf" --verbose --force-rewind --no-wait --dbname="host=$primary user=repmgr dbname=repmgr connect_timeout=2" || mv -f "$PGDATA" "${PGDATA}_$(date "+%F %T")"
/etc/service/postgres/primary#!/bin/sh
exec 2>&1
realpath "$0"
set -ex
/etc/service/postgres/init
/etc/service/repmgr/init
/etc/service/postgres/init#!/bin/sh
exec 2>&1
realpath "$0"
set -ex
cd "$PGDATA" && chpst -u "$USER":"$GROUP" initdb
chpst -u "$USER":"$GROUP" /etc/service/postgres/conf
chpst -u "$USER":"$GROUP" /etc/service/postgres/hba
/etc/service/postgres/conf#!/bin/sh
exec 2>&1
realpath "$0"
set -ex
cat >>"$PGDATA/postgresql.conf" <<EOF
archive_command = '/bin/true'
archive_mode = on
datestyle = 'iso, dmy'
listen_addresses = '*'
max_logical_replication_workers = 0
max_sync_workers_per_subscription = 0
ssl_ca_file = '/etc/certs/ca.pem'
ssl_cert_file = '/etc/certs/cert.pem'
ssl_key_file = '/etc/certs/key.pem'
ssl = on
EOF
/etc/service/postgres/hba#!/bin/sh
exec 2>&1
realpath "$0"
set -ex
echo >>"$PGDATA/pg_hba.conf"
ip -o -f inet addr show | awk '/scope global/ {print $4}' | sed -E 's|.\d+/|.0/|' | while read -r net; do
echo "host all all $net trust"
done >>"$PGDATA/pg_hba.conf"
3) / etc / service / repmgr/etc/service/repmgr/run#!/bin/sh
exec 2>&1
install -d -m 1775 -o "$USER" -g "$GROUP" /run/postgresql
rm -f /run/postgresql/repmgr.run
test -f /run/postgresql/postgres.run || exit $?
realpath "$0"
set -ex
chmod 755 supervise
chown "$USER":"$GROUP" supervise/ok supervise/control supervise/status
chpst -u "$USER":"$GROUP" pg_ctl status
primary="$(test -f "$HOME/gfs/primary" && cat "$HOME/gfs/primary")"
test -n "$primary"
if [ "$primary" != "$(hostname)" ]; then
test -f "$HOME/repmgr.conf" || /etc/service/repmgr/standby
chpst -u "$USER":"$GROUP" repmgr standby register --config-file="$HOME/repmgr.conf" --verbose --force
else
test -f "$HOME/repmgr.conf" || /etc/service/repmgr/primary
test ! -f "$PGDATA/standby.signal" || /etc/service/repmgr/promote
chpst -u "$USER":"$GROUP" repmgr primary register --config-file="$HOME/repmgr.conf" --verbose --force
fi
exec chpst -u "$USER":"$GROUP" -L /run/postgresql/repmgr.run repmgrd --config-file="$HOME/repmgr.conf" --verbose --daemonize=false
, ( , )
/etc/service/repmgr/log/run#!/bin/sh
exec 2>&1
exec sed 's|^|repmgr: |'
/etc/service/repmgr/standby#!/bin/sh
exec 2>&1
realpath "$0"
set -ex
chpst -u "$USER":"$GROUP" /etc/service/repmgr/conf
/etc/service/repmgr/primary#!/bin/sh
exec 2>&1
realpath "$0"
set -ex
chpst -u "$USER":"$GROUP" createuser --superuser repmgr
chpst -u "$USER":"$GROUP" createdb repmgr --owner=repmgr
chpst -u "$USER":"$GROUP" /etc/service/repmgr/conf
/etc/service/repmgr/promote#!/bin/sh
exec 2>&1
realpath "$0"
set -ex
chpst -u "$USER":"$GROUP" repmgr standby promote --config-file="$HOME/repmgr.conf" --verbose
/etc/service/repmgr/conf#!/bin/sh
exec 2>&1
realpath "$0"
set -ex
cat >"$HOME/repmgr.conf" <<EOF
conninfo='host=$(hostname) user=repmgr dbname=repmgr connect_timeout=2'
data_directory='$(echo -n "$PGDATA")'
event_notification_command='/etc/service/repmgr/event "%n" "%e" "%s" "%t" "%d"'
failover='automatic'
failover_validation_command='/etc/service/repmgr/valid "%n" "%a"'
follow_command='repmgr standby follow --config-file="$(echo -n "$HOME")/repmgr.conf" --wait --upstream-node-id=%n'
node_id=$(hostname | grep -oE '\d+')
node_name='$(hostname)'
promote_command='repmgr standby promote --config-file="$(echo -n "$HOME")/repmgr.conf"'
repmgrd_service_start_command='sv start repmgr'
repmgrd_service_stop_command='sv force-stop repmgr'
service_promote_command='pg_ctl --wait promote'
service_reload_command='sv reload postgres'
service_restart_command='sv force-restart postgres'
service_start_command='sv start postgres'
service_stop_command='sv force-stop postgres'
ssh_options='-q -o ConnectTimeout=10'
standby_disconnect_on_failover=true
use_replication_slots=true
EOF
repmgr
/etc/service/repmgr/init#!/bin/sh
exec 2>&1
realpath "$0"
set -ex
chpst -u "$USER":"$GROUP" /etc/service/repmgr/hba
chpst -u "$USER":"$GROUP" cat >>"$PGDATA/postgresql.conf" <<EOF
hot_standby = on
max_replication_slots = 10
max_wal_senders = 10
shared_preload_libraries = 'repmgr'
wal_level = 'hot_standby'
wal_log_hints = on
EOF
/etc/service/repmgr/hba#!/bin/sh
exec 2>&1
realpath "$0"
set -ex
echo >>"$PGDATA/pg_hba.conf"
ip -o -f inet addr show | awk '/scope global/ {print $4}' | sed -E 's|.\d+/|.0/|' | while read -r net; do
echo "host replication repmgr $net trust"
echo "host repmgr repmgr $net trust"
done >>"$PGDATA/pg_hba.conf"
cat >>"$PGDATA/pg_hba.conf" <<EOF
local replication repmgr trust
host replication repmgr 127.0.0.1/32 trust
host replication repmgr ::1/128 trust
local repmgr repmgr trust
host repmgr repmgr 127.0.0.1/32 trust
host repmgr repmgr ::1/128 trust
EOF