Resep PostgreSQL: kegagalan otomatis dan bergabung kembali secara otomatis di galangan buruh pelabuhan

Untuk mempersiapkan auto-failover dan auto- gabung kembali di docker swarm, kita membutuhkan buruh pelabuhan , postgres , repmgr , pgbouncer , runit dan gluster . Anda juga dapat menggunakan gambar yang sudah jadi .

Untuk mulai dengan, pada dua host (lebih baik daripada yang besi) docker1 dan docker2 kami mengatur buruh pelabuhan sehingga keduanya adalah manajer.

Juga, pada kedua host, instal glusterfs sistem file terdistribusi dan pasang di kedua host di fstab seperti ini

localhost:/gfs /mnt/gfs glusterfs defaults,_netdev,backupvolfile-server=localhost 0 0

Kemudian pada kedua host buat partisi

docker volume create repmgr

dan jaringan

docker network create --attachable --driver overlay docker

Kemudian kumpulkan gambar menggunakan file buruh pelabuhan:

Dockerfile
FROM alpine
RUN set -ex \
    && apk add --no-cache --repository http://dl-cdn.alpinelinux.org/alpine/edge/testing --virtual .locales-rundeps \
        musl-locales \ #   (   ,      )
    && apk add --no-cache --virtual .postgresql-rundeps \
        openssh-client \ #  ssh- (   ,      )
        openssh-server \ #  ssh- (   ,    switchover)
        pgbouncer \ #  
        postgresql \ #  
        postgresql-contrib \ #     (   ,      )
        repmgr \ #  repmgr
        repmgr-daemon \ #    
        rsync \ #   (   ,      )
        runit \ #   
        shadow \ #   
        tzdata \ #   (   ,      )
    && echo done
ADD bin /usr/local/bin #  
ADD service /etc/service #    
CMD [ "runsvdir", "/etc/service" ] #   
ENTRYPOINT [ "docker_entrypoint.sh" ] #   
ENV HOME=/var/lib/postgresql
ENV GROUP=postgres \
    PGDATA="${HOME}/pg_data" \
    USER=postgres
VOLUME "${HOME}"
WORKDIR "${HOME}"
RUN set -ex \
    && sed -i -e 's|#PasswordAuthentication yes|PasswordAuthentication no|g' /etc/ssh/sshd_config \ #    
    && sed -i -e 's|#   StrictHostKeyChecking ask|   StrictHostKeyChecking no|g' /etc/ssh/ssh_config \ #    
    && echo "   UserKnownHostsFile=/dev/null" >>/etc/ssh/ssh_config \ #      
    && sed -i -e 's|postgres:!:|postgres::|g' /etc/shadow \ #    
    && chmod -R 0755 /etc/service /usr/local/bin \
    && echo done


Sekarang jalankan layanan pada setiap host

service1.sh
docker service create \
    --constraint node.hostname==docker1 \ #     
    --env GROUP_ID="$(id -g)" \ #    (       )
    --env LANG=ru_RU.UTF-8 \ #   (   ,      )
    --env TZ=Asia/Yekaterinburg \ #   (   ,      )
    --env USER_ID="$(id -u)" \ #    (       )
    --hostname tasks.repmgr1 \ #  
    --mount type=bind,source=/etc/certs,destination=/etc/certs,readonly \ #     (   ,      )
    --mount type=bind,source=/mnt/gfs/repmgr,destination=/var/lib/postgresql/gfs \ #    
    --mount type=volume,source=repmgr,destination=/var/lib/postgresql \ #  
    --name repmgr1 \ #   
    --network name=docker \ #  
    --publish target=5432,published=5432,mode=host \ #   
    --publish target=5433,published=5433,mode=host \ #   
    --replicas-max-per-node 1 \ #      
    rekgrpth/repmgr #   


Pada host kedua, jalankan layanan kedua juga, hanya bukannya docker1 kita menulis docker2 dan bukannya repmgr1 kita menulis repmgr2

Jika mengajar buruh pelabuhan untuk menyelesaikan hostname konveyor
,
docker service create \
    --env GROUP_ID="$(id -g)" \ #    (       )
    --env LANG=ru_RU.UTF-8 \ #   (   ,      )
    --env TZ=Asia/Yekaterinburg \ #   (   ,      )
    --env USER_ID="$(id -u)" \ #    (       )
    --hostname repmgr-{{.Node.Hostname}} \ #  
    --mode global \ #       
    --mount type=bind,source=/etc/certs,destination=/etc/certs,readonly \ #     (   ,      )
    --mount type=bind,source=/mnt/gfs/repmgr,destination=/var/lib/postgresql/gfs \ #    
    --mount type=volume,source=repmgr,destination=/var/lib/postgresql \ #  
    --name repmgr \ #   
    --network name=docker \ #  
    --publish target=5432,published=5432,mode=host \ #   
    --publish target=5433,published=5433,mode=host \ #   
    rekgrpth/repmgr #   


Jadi, pintu masuk ke wadah

/usr/local/bin/docker_entrypoint.sh
#!/bin/sh

exec 2>&1
set -ex
if [ -n "$GROUP" ] && [ -n "$GROUP_ID" ] && [ "$GROUP_ID" != "$(id -g "$GROUP")" ]; then #          , 
    groupmod --gid "$GROUP_ID" "$GROUP" #   
    chgrp "$GROUP_ID" "$HOME" #      
fi
if [ -n "$USER" ] && [ -n "$USER_ID" ] && [ "$USER_ID" != "$(id -u "$USER")" ]; then #          , 
    usermod --uid "$USER_ID" "$USER" #   
    chown "$USER_ID" "$HOME" #      
fi
exec "$@" #   


Pengawas meluncurkan layanan berikut

1) / etc / service / ssh
, switchover.

/etc/service/ssh/run
#!/bin/sh

exec 2>&1
realpath "$0"
set -ex
ssh-keygen -A #   
exec /usr/sbin/sshd -D -e #  ssh-


, ( , )

/etc/service/ssh/log/run
#!/bin/sh

exec 2>&1
exec sed 's|^|ssh: |'



2) / etc / service / postgres


/etc/service/postgres/run
#!/bin/sh

exec 2>&1
install -d -m 0750 -o "$USER" -g "$GROUP" "$PGDATA" #    
install -d -m 1775 -o "$USER" -g "$GROUP" /run/postgresql /var/log/postgresql #     
rm -f /run/postgresql/postgres.run #   
realpath "$0"
set -ex
chmod 755 supervise
chown "$USER":"$GROUP" supervise/ok supervise/control supervise/status #    
rm -f "$PGDATA/postmaster.pid" #  pid
primary="$(test -f "$HOME/gfs/primary" && cat "$HOME/gfs/primary")" #    
test -n "$primary" || echo -n "$(hostname)" >"$HOME/gfs/primary" #    ,   -  
primary="$(test -f "$HOME/gfs/primary" && cat "$HOME/gfs/primary")" #    
test -n "$primary" # ,    
chown "$USER":"$GROUP" "$HOME/gfs/primary" #  
if [ "$primary" != "$(hostname)" ]; then #     , 
    test -d "$PGDATA/base" || /etc/service/postgres/standby #     -  
    test -f "$PGDATA/standby.signal" || /etc/service/postgres/rejoin #      ,      
else #  (  - )
    test -d "$PGDATA/base" || /etc/service/postgres/primary #     -  
fi
test -d "$PGDATA/base" # ,     
exec chpst -u "$USER":"$GROUP" -L /run/postgresql/postgres.run postmaster #  


, ( , )

/etc/service/postgres/log/run
#!/bin/sh

exec 2>&1
exec sed 's|^|postgres: |'




/etc/service/postgres/standby
#!/bin/sh

exec 2>&1
realpath "$0"
set -ex
chpst -u "$USER":"$GROUP" /etc/service/repmgr/conf #    repmgr
primary="$(test -f "$HOME/gfs/primary" && cat "$HOME/gfs/primary")" #    
test -n "$primary" # ,   
chpst -u "$USER":"$GROUP" repmgr standby clone --config-file="$HOME/repmgr.conf" --verbose --fast-checkpoint --dbname="host=$primary user=repmgr dbname=repmgr connect_timeout=2" #    




/etc/service/postgres/rejoin
#!/bin/sh

exec 2>&1
realpath "$0"
set -ex
primary="$(test -f "$HOME/gfs/primary" && cat "$HOME/gfs/primary")" #    
test -n "$primary" # ,   
chpst -u "$USER":"$GROUP" pg_ctl --options="-c listen_addresses=''" --wait start #    (   pg_rewind)
chpst -u "$USER":"$GROUP" pg_ctl --wait --mode=fast stop #   (   pg_rewind)
chpst -u "$USER":"$GROUP" repmgr node rejoin --config-file="$HOME/repmgr.conf" --verbose --force-rewind --no-wait --dbname="host=$primary user=repmgr dbname=repmgr connect_timeout=2" || mv -f "$PGDATA" "${PGDATA}_$(date "+%F %T")" #      (   -       )




/etc/service/postgres/primary
#!/bin/sh

exec 2>&1
realpath "$0"
set -ex
/etc/service/postgres/init #  
/etc/service/repmgr/init #  repmgr




/etc/service/postgres/init
#!/bin/sh

exec 2>&1
realpath "$0"
set -ex
cd "$PGDATA" && chpst -u "$USER":"$GROUP" initdb #  
chpst -u "$USER":"$GROUP" /etc/service/postgres/conf #   
chpst -u "$USER":"$GROUP" /etc/service/postgres/hba #   




/etc/service/postgres/conf
#!/bin/sh

exec 2>&1
realpath "$0"
set -ex
cat >>"$PGDATA/postgresql.conf" <<EOF

archive_command = '/bin/true' #  
archive_mode = on #  - 
datestyle = 'iso, dmy' #   
listen_addresses = '*' #  
max_logical_replication_workers = 0 #   
max_sync_workers_per_subscription = 0 #   
ssl_ca_file = '/etc/certs/ca.pem' #   
ssl_cert_file = '/etc/certs/cert.pem' #  
ssl_key_file = '/etc/certs/key.pem' #   
ssl = on #  ssl
EOF




/etc/service/postgres/hba
#!/bin/sh

exec 2>&1
realpath "$0"
set -ex
echo >>"$PGDATA/pg_hba.conf"
ip -o -f inet addr show | awk '/scope global/ {print $4}' | sed -E 's|.\d+/|.0/|' | while read -r net; do #    
    echo "host all all $net trust" #   
done >>"$PGDATA/pg_hba.conf"



3) / etc / service / repmgr


/etc/service/repmgr/run
#!/bin/sh

exec 2>&1
install -d -m 1775 -o "$USER" -g "$GROUP" /run/postgresql #   
rm -f /run/postgresql/repmgr.run #   
test -f /run/postgresql/postgres.run || exit $? # ,    
realpath "$0"
set -ex
chmod 755 supervise
chown "$USER":"$GROUP" supervise/ok supervise/control supervise/status #    
chpst -u "$USER":"$GROUP" pg_ctl status # ,   
primary="$(test -f "$HOME/gfs/primary" && cat "$HOME/gfs/primary")" #    
test -n "$primary" # ,   
if [ "$primary" != "$(hostname)" ]; then #     , 
    test -f "$HOME/repmgr.conf" || /etc/service/repmgr/standby #    ,   
    chpst -u "$USER":"$GROUP" repmgr standby register --config-file="$HOME/repmgr.conf" --verbose --force #     
else #  (  - )
    test -f "$HOME/repmgr.conf" || /etc/service/repmgr/primary #    ,   
    test ! -f "$PGDATA/standby.signal" || /etc/service/repmgr/promote #       ,     
    chpst -u "$USER":"$GROUP" repmgr primary register --config-file="$HOME/repmgr.conf" --verbose --force #    
fi
exec chpst -u "$USER":"$GROUP" -L /run/postgresql/repmgr.run repmgrd --config-file="$HOME/repmgr.conf" --verbose --daemonize=false #   repmgr


, ( , )

/etc/service/repmgr/log/run
#!/bin/sh

exec 2>&1
exec sed 's|^|repmgr: |'


/etc/service/repmgr/standby
#!/bin/sh

exec 2>&1
realpath "$0"
set -ex
chpst -u "$USER":"$GROUP" /etc/service/repmgr/conf #  




/etc/service/repmgr/primary
#!/bin/sh

exec 2>&1
realpath "$0"
set -ex
chpst -u "$USER":"$GROUP" createuser --superuser repmgr #  
chpst -u "$USER":"$GROUP" createdb repmgr --owner=repmgr #  
chpst -u "$USER":"$GROUP" /etc/service/repmgr/conf #  




/etc/service/repmgr/promote
#!/bin/sh

exec 2>&1
realpath "$0"
set -ex
chpst -u "$USER":"$GROUP" repmgr standby promote --config-file="$HOME/repmgr.conf" --verbose #     


/etc/service/repmgr/conf
#!/bin/sh

exec 2>&1
realpath "$0"
set -ex
cat >"$HOME/repmgr.conf" <<EOF
conninfo='host=$(hostname) user=repmgr dbname=repmgr connect_timeout=2' #      
data_directory='$(echo -n "$PGDATA")' #    
event_notification_command='/etc/service/repmgr/event "%n" "%e" "%s" "%t" "%d"' #   
failover='automatic' #   failover
failover_validation_command='/etc/service/repmgr/valid "%n" "%a"' #   
follow_command='repmgr standby follow --config-file="$(echo -n "$HOME")/repmgr.conf" --wait --upstream-node-id=%n' #   
node_id=$(hostname | grep -oE '\d+') #   
node_name='$(hostname)' #   
promote_command='repmgr standby promote --config-file="$(echo -n "$HOME")/repmgr.conf"' #   
repmgrd_service_start_command='sv start repmgr' #    
repmgrd_service_stop_command='sv force-stop repmgr' #    
service_promote_command='pg_ctl --wait promote' #    
service_reload_command='sv reload postgres' #     
service_restart_command='sv force-restart postgres' #    
service_start_command='sv start postgres' #    
service_stop_command='sv force-stop postgres' #    
ssh_options='-q -o ConnectTimeout=10' #   ssl
standby_disconnect_on_failover=true #   failover
use_replication_slots=true #   
EOF


repmgr

/etc/service/repmgr/init
#!/bin/sh

exec 2>&1
realpath "$0"
set -ex
chpst -u "$USER":"$GROUP" /etc/service/repmgr/hba #    
chpst -u "$USER":"$GROUP" cat >>"$PGDATA/postgresql.conf" <<EOF #    

hot_standby = on #   
max_replication_slots = 10 #     
max_wal_senders = 10 #    
shared_preload_libraries = 'repmgr' #  repmgr
wal_level = 'hot_standby' #    
wal_log_hints = on #    pg_rewind
EOF




/etc/service/repmgr/hba
#!/bin/sh

exec 2>&1
realpath "$0"
set -ex
echo >>"$PGDATA/pg_hba.conf"
ip -o -f inet addr show | awk '/scope global/ {print $4}' | sed -E 's|.\d+/|.0/|' | while read -r net; do #    
    echo "host replication repmgr $net trust" #    repmgr
    echo "host repmgr repmgr $net trust" #  repmgr  repmgr
done >>"$PGDATA/pg_hba.conf"
cat >>"$PGDATA/pg_hba.conf" <<EOF

local replication repmgr trust #    repmgr
host replication repmgr 127.0.0.1/32 trust #    repmgr
host replication repmgr ::1/128 trust #    repmgr

local repmgr repmgr trust #  repmgr  repmgr
host repmgr repmgr 127.0.0.1/32 trust #  repmgr  repmgr
host repmgr repmgr ::1/128 trust #  repmgr  repmgr
EOF



All Articles