6. redis 定时任务手动执行AOF重写(附完整代码)
造成这个问题的原因是我们redis集群选用的AOF持久化方式,大家知道AOF文件是文本存储,且不断增长,故redis提供了重写AOF文件的功能,并且我们选用了非自动触发重写的方式。巧合总是不期而遇,出问题的机器磁盘空间是redis集群其它机器的一半,不到1T,且当前集群版本是刚从从3.2升级到7.x,新集群的cluster nodes命令获取的结果不同于老版本(不知道为啥不向下兼容,具体原因后面程序会谈),导致上个月重写AOF的调度程序失败,并且由于redis集群负责的人员离职交接,并未来的及更新告警收件人信息,次失败任务被忽略。。由于不清楚定时任务的代码逻辑,不敢贸然执行(主要是害怕它是并发执行bgrewriteaof),临时解决方法:逐个节点手动执行bfrewriteaof命令,使其重写AOF文件,降低被重写文件大小。
二、方案描述
目前使用的是7.x版本,但是没有开启多线程功能,还是使用的单线程模式,Redis 单线程架构导致无法充分利用多核 CPU 特性,所以可以在一台机器上部署多个 Redis 实例,故设计了三台机器(单台16核),每台16个节点的分布情况,目前选用AOF持久化方式进行存储。关于持久化的详细内容,可参考我之前写的文章:https://www.cnblogs.com/chenmingming0225/articles/14525456.html,本文主要介绍如何手动实现AOF重写功能,以及顺便实现了手动执行RDB备份的程序。
三、实现细节
3.1 AOF方式重写
本部分主要细节是7.x版本cluster node命令执行结果和3.2版本相比,多出下面标红部分,这也是AOF重写调度任务执行失败的原因:
具体其它的细节可参考如下完整代码:

1 public class HxBgRewriteAofTimer { 2 public static void main(String[] args) { 3 DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); 4 String hosts = "192.168.21.116:7310"; 5 if (args.length > 0) { 6 hosts = args[0]; 7 } 8 String taskId = UUID.randomUUID().toString(); 9 System.out.println("任务开始:" + taskId + " hosts:" + hosts + " 当前时间为:" + LocalDateTime.now().format(formatter)); 10 try 11 { 12 String[] firstHosts = StringUtils.split(StringUtils.substringBefore(hosts, ","), ":"); 13 Jedis zj = new Jedis(firstHosts[0], Integer.parseInt(firstHosts[1]), 2000); 14 System.out.println(zj.ping()); 15 List<String> ips = getClusterNodes(zj); 16 System.out.println("当前集群IP:" + Arrays.toString(ips.toArray())); 17 18 String host; 19 int port; 20 for (String ad : ips) { 21 String[] ads = StringUtils.split(ad, ":"); 22 host = ads[0]; 23 port = Integer.parseInt(ads[1]); 24 String hostAndPort = host + ":" + port; 25 System.out.println("本次执行的节点为:"+ hostAndPort); 26 27 Jedis jedis = new Jedis(host, port, 30000); 28 String[] persistenceInfo = jedis.info("persistence").split("\n"); 29 long aofCurrentSize = 0L; 30 long aofBaseSize = 0L; 31 for (String i : persistenceInfo) { 32 if (i.contains("aof_current_size")) { 33 aofCurrentSize = Long.parseLong(i.split(":")[1].trim()); 34 } 35 if (i.contains("aof_base_size")) { 36 aofBaseSize = Long.parseLong(i.split(":")[1].trim()); 37 } 38 } 39 //考虑第一次执行aofBaseSize==0为0的情况 40 if (((aofCurrentSize > 0L) && aofBaseSize==0) || ((aofCurrentSize > 0L) && (aofBaseSize > 0L) && ((aofCurrentSize - aofBaseSize) / aofBaseSize > 1L))) { 41 System.out.println(hostAndPort + " bgrewriteaof:" + jedis.bgrewriteaof()); 42 isFinish(jedis, hostAndPort); 43 } 44 } 45 System.out.println("此次任务完成 :" + taskId + " 当前时间为:" + LocalDateTime.now().format(formatter)); 46 } catch (Exception e) { 47 e.printStackTrace(); 48 System.out.println("此次任务失败 :" + taskId + " hosts:" + hosts + " 当前时间为:" + LocalDateTime.now().format(formatter)); 49 System.exit(-1); 50 } 51 } 52 53 private static List<String> getClusterNodes(Jedis jedis) { 54 ArrayList<String> rs = new ArrayList<>(); 55 String clusterNodes = jedis.clusterNodes(); 56 String[] servers = StringUtils.split(clusterNodes, "\n"); 57 for (String s : servers) 58 { 59 String[] ss = StringUtils.split(s, " "); 60 if (ss.length > 1) { 61 //7.x版本cluster nodes命令执行后的集群节点信息是ip:port@71350 62 rs.add(ss[1].split("@")[0]); 63 } 64 } 65 return rs; 66 } 67 68 private static void isFinish(Jedis jedis, String hostAndPort) { 69 long start = System.currentTimeMillis(); 70 for (;;) { 71 try { 72 Thread.sleep(5000L); 73 } 74 catch (InterruptedException e) { 75 e.printStackTrace(); 76 } 77 String persistenceInfo = jedis.info("persistence"); 78 if (persistenceInfo.contains("aof_rewrite_in_progress:0")) { 79 System.out.println("Background AOF rewrite finished successfully"); 80 break; 81 } 82 } 83 long useTime = System.currentTimeMillis() - start; 84 System.out.println(hostAndPort + " 耗时 " + useTime + " ms"); 85 try { 86 Thread.sleep(5000L); 87 } 88 catch (InterruptedException e) { 89 e.printStackTrace(); 90 } 91 }
3.2、RDB方式备份
既然都实现了AOF重写的程序,顺便也实现一下RDB快照备份的功能吧,很简单,主要思路是逐个节点执行,节点成功的标识是通过命令LASTSAVE判断最新的一条数据刷盘时间(也可同AOF,通过info persistence中的rdb_bgsave_in_progress判断),代码如下:

1 public class HxBgSaveTimer { 2 public static void main(String[] args) { 3 String hosts = "192.168.21.166:7310"; 4 boolean onlySlaveBgSave = true; 5 if (args.length > 0) { 6 hosts = args[0]; 7 } 8 if (args.length > 1) { 9 onlySlaveBgSave = Boolean.parseBoolean(args[1]); 10 } 11 SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 12 String taskId = UUID.randomUUID().toString(); 13 System.out.println("任务开始:" + taskId + " hosts:" + hosts + " 当前时间为:" + format.format(new Date())); 14 try { 15 String[] firstHosts = StringUtils.split(StringUtils.substringBefore(hosts, ","), ":"); 16 Jedis zj = new Jedis(firstHosts[0], Integer.parseInt(firstHosts[1]), 2000); 17 System.out.println(zj.ping()); 18 List<String> ips = getClusterNodes(zj); 19 System.out.println("当前集群IP:" + Arrays.toString(ips.toArray())); 20 21 String host = ""; 22 int port = 7310; 23 for (String ad : ips) { 24 String[] ads = StringUtils.split(ad, ":"); 25 host = ads[0]; 26 port = Integer.parseInt(ads[1]); 27 String hostAndPort = host + ":" + port; 28 29 Jedis jedis = new Jedis(host, port, 30000); 30 System.out.println("本次执行的节点为:"+ hostAndPort); 31 jedis.configSet("save", ""); 32 long lastSave = jedis.lastsave(); 33 if (onlySlaveBgSave) { 34 String replication = jedis.info("Replication"); 35 if (StringUtils.contains(replication, "role:slave")) { 36 System.out.println(hostAndPort + " bgsave:" + jedis.bgsave()); 37 isFinish(jedis, lastSave, hostAndPort); 38 } 39 } else { 40 System.out.println(hostAndPort + " bgsave:" + jedis.bgsave()); 41 isFinish(jedis, lastSave, hostAndPort); 42 } 43 } 44 System.out.println("此次任务完成 :" + taskId + " 当前时间为:" + format.format(new Date())); 45 } 46 catch (Exception e) { 47 e.printStackTrace(); 48 System.out.println("此次任务失败 :" + taskId + " hosts:" + hosts + " 当前时间为:" + format.format(new Date())); 49 System.exit(-1); 50 } 51 } 52 53 private static List<String> getClusterNodes(Jedis jedis) { 54 ArrayList<String> rs = new ArrayList<>(); 55 String clusterNodes = jedis.clusterNodes(); 56 String[] servers = StringUtils.split(clusterNodes, "\n"); 57 for (String s : servers) 58 { 59 String[] ss = StringUtils.split(s, " "); 60 if (ss.length > 1) { 61 //7.x版本cluster nodes命令执行后的集群节点信息是ip:port@71350 62 rs.add(ss[1].split("@")[0]); 63 } 64 } 65 return rs; 66 } 67 68 private static void isFinish(Jedis jedis, long lastsave, String hostAndPort) { 69 long start = System.currentTimeMillis(); 70 do { 71 try { 72 Thread.sleep(1000L); 73 } 74 catch (InterruptedException e) { 75 e.printStackTrace(); 76 } 77 } while (jedis.lastsave() == lastsave); 78 System.out.println(" Background saving terminated with success"); 79 80 long useTime = System.currentTimeMillis() - start; 81 System.out.println(hostAndPort + " 耗时 " + useTime + " ms"); 82 try { 83 Thread.sleep(5000L); 84 } 85 catch (InterruptedException e) { 86 e.printStackTrace(); 87 } 88 } 89 }
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?
· 如何调用 DeepSeek 的自然语言处理 API 接口并集成到在线客服系统