代码改变世界

【PostgreSQL】PostgreSQL复制的监控

2022-06-30 09:04  abce  阅读(789)  评论(0编辑  收藏  举报

创建物理复制槽:(在master端执行)

postgres=# select pg_create_physical_replication_slot('abce');

  

删除复制槽:

postgres=# select pg_drop_replication_slot('abce');

  

1.主库端

(1)查看复制连接的状态

pg_stat_replication

=# select * from pg_stat_replication;
-[ RECORD 1 ]----+------------------------------
pid             | 2327
usesysid         | 10
usename         | mdevan
application_name | walreceiver
client_addr     |
client_hostname |
client_port     | -1
backend_start   | 2022-06-02 06:04:48.536888-04
backend_xmin     |
state           | streaming
sent_lsn         | 0/277628F8
write_lsn       | 0/277628F8
flush_lsn       | 0/277628F8
replay_lsn       | 0/277628F8
write_lag       | 00:00:00.000049
flush_lag       | 00:00:00.00063
replay_lag       | 00:00:00.000662
sync_priority   | 0
sync_state       | async
-[ RECORD 2 ]----+------------------------------
pid             | 2085
usesysid         | 10
usename         | mdevan
application_name | walreceiver
client_addr     |
client_hostname |
client_port     | -1
backend_start   | 2022-06-02 04:35:36.559838-04
backend_xmin     |
state           | streaming
sent_lsn         | 0/277628F8
write_lsn       | 0/277628F8
flush_lsn       | 0/277628F8
replay_lsn       | 0/277628F8
write_lag       |
flush_lag       |
replay_lag       |
sync_priority   | 0
sync_state       | async

(2)查看复制槽的状态

pg_replication_slots;

=# select * from pg_replication_slots;
-[ RECORD 1 ]-------+--------------
slot_name           | mylogslot
plugin             | test_decoding
slot_type           | logical
datoid             | 16384
database           | bench
temporary           | f
active             | t
active_pid         | 2607
xmin               |
catalog_xmin       | 356948
restart_lsn         | 0/3B44A5A0
confirmed_flush_lsn | 0/3B44A5A0
-[ RECORD 2 ]-------+--------------
slot_name           | myreplslot1
plugin             |
slot_type           | physical
datoid             |
database           |
temporary           | f
active             | t
active_pid         | 2598
xmin               |
catalog_xmin       |
restart_lsn         | 0/3B44A5D8
confirmed_flush_lsn |

2.从库端

(1)日志接收的状态

pg_stat_wal_receiver

=# select * from pg_stat_wal_receiver;
-[ RECORD 1 ]---------+------------------------------
pid                   | 2084
status               | streaming
receive_start_lsn     | 0/3000000
receive_start_tli     | 1
received_lsn         | 0/11233E20
received_tli         | 1
last_msg_send_time   | 2022-06-02 04:47:53.334477-04
last_msg_receipt_time | 2022-06-02 04:47:53.334519-04
latest_end_lsn       | 0/11233E20
latest_end_time       | 2022-06-02 04:46:53.261754-04
slot_name             |
conninfo             | [removed for brevity]

last_msg_receipt_time - last_msg_send_time 表示复制wal接收的延迟;这里slot_name的值为空,表示没有使用复制槽。

这个就是使用复制槽的例子:

=# select * from pg_stat_wal_receiver;
-[ RECORD 1 ]---------+------------------------------
pid                   | 2326
status               | streaming
receive_start_lsn     | 0/15000000
receive_start_tli     | 1
received_lsn         | 0/15000060
received_tli         | 1
last_msg_send_time   | 2022-06-02 06:04:48.537842-04
last_msg_receipt_time | 2022-06-02 06:04:48.537887-04
latest_end_lsn       | 0/15000060
latest_end_time       | 2022-06-02 06:04:48.537842-04
slot_name             | myreplslot1
conninfo             | [removed for brevity]

(2)日志应用状态

pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn()查看从库日志应用的状态

=# select pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn(), pg_last_xact_replay_timestamp();
-[ RECORD 1 ]-----------------+------------------------------
pg_last_wal_receive_lsn       | 0/1A000000
pg_last_wal_replay_lsn       | 0/19FFE928
pg_last_xact_replay_timestamp | 2022-06-02 06:13:42.141768-04

pg_last_wal_replay_lsn - pg_last_wal_receive_lsn 表示日志应用的延迟。

 

推荐使用工具pgmetrics查看

 

其它

#主库端执行

select pid,
      application_name,
      pg_wal_lsn_diff(pg_current_wal_lsn(), sent_lsn) sending_lag,
      pg_wal_lsn_diff(sent_lsn, flush_lsn) receiving_lag,
      pg_wal_lsn_diff(flush_lsn, replay_lsn) replaying_lag,
      pg_wal_lsn_diff(pg_current_wal_lsn(), replay_lsn) total_lag
from pg_stat_replication;

#从库端执行

SELECT CASE
        WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN
        0
        ELSE
        EXTRACT(EPOCH FROM now() - pg_last_xact_replay_timestamp())
      END AS log_delay,
      pg_last_wal_receive_lsn(),
      pg_last_wal_replay_lsn(),
      pg_last_xact_replay_timestamp();