qemu-kvm-ev-2.12.0-44.1.el7_8.1 线上热迁移qemu-kvm coredump问题分析

配置libvirt coredumo后(https://www.cnblogs.com/maojun1998/p/17215214.html)

让测试一直迁移vm,一段时间后出现coredump

root 错误码: coredump.sh 严重错误,进程 coredump,dumpfile:/run/fs_mount//hdd/core.26956

  1  
  2 qemu关于这块代码非常绕最后core在object_get_class
  3 
  4  Chardev *s = be->chr;
  5  src = CHARDEV_GET_CLASS(s)->chr_add_watch(s, cond);
  6  
  7  
  8  #define CHARDEV_GET_CLASS(obj) \
  9     OBJECT_GET_CLASS(ChardevClass, (obj), TYPE_CHARDEV)
 10 
 11 #define OBJECT_GET_CLASS(class, obj, name) \
 12     OBJECT_CLASS_CHECK(class, object_get_class(OBJECT(obj)), name)
 13     
 14 #define OBJECT(obj) \
 15     ((Object *)(obj))
 16 #define OBJECT_CLASS_CHECK(class_type, class, name) \
 17     ((class_type *)object_class_dynamic_cast_assert(OBJECT_CLASS(class), (name), \
 18                                                __FILE__, __LINE__, __func__))
 19 #define OBJECT_CLASS(class) \
 20     ((ObjectClass *)(class))
 21     
 22 ObjectClass *object_get_class(Object *obj)
 23 {
 24     return obj->class;
 25 }
 26 struct Chardev {
 27     Object parent_obj;
 28 
 29     QemuMutex chr_write_lock;
 30     CharBackend *be;
 31     char *label;
 32     char *filename;
 33     int logfd;
 34     int be_open;
 35     GSource *gsource;
 36     GMainContext *gcontext;
 37     DECLARE_BITMAP(features, QEMU_CHAR_FEATURE_LAST);
 38 };
 39 struct Object
 40 {
 41     /*< private >*/
 42     ObjectClass *class;
 43     ObjectFree *free;
 44     GHashTable *properties;
 45     uint32_t ref;
 46     Object *parent;
 47 };
 48 struct ObjectClass
 49 {
 50     /*< private >*/
 51     Type type;
 52     GSList *interfaces;
 53 
 54     const char *object_cast_cache[OBJECT_CLASS_CAST_CACHE];
 55     const char *class_cast_cache[OBJECT_CLASS_CAST_CACHE];
 56 
 57     ObjectUnparent *unparent;
 58 
 59     GHashTable *properties;
 60 };
 61 typedef struct ChardevClass {
 62     ObjectClass parent_class;
 63 
 64     bool internal; /* TODO: eventually use TYPE_USER_CREATABLE */
 65     void (*parse)(QemuOpts *opts, ChardevBackend *backend, Error **errp);
 66 
 67     void (*open)(Chardev *chr, ChardevBackend *backend,
 68                  bool *be_opened, Error **errp);
 69 
 70     int (*chr_write)(Chardev *s, const uint8_t *buf, int len);
 71     int (*chr_sync_read)(Chardev *s, const uint8_t *buf, int len);
 72     GSource *(*chr_add_watch)(Chardev *s, GIOCondition cond);
 73     void (*chr_update_read_handler)(Chardev *s);
 74     int (*chr_ioctl)(Chardev *s, int cmd, void *arg);
 75     int (*get_msgfds)(Chardev *s, int* fds, int num);
 76     int (*set_msgfds)(Chardev *s, int *fds, int num);
 77     int (*chr_add_client)(Chardev *chr, int fd);
 78     int (*chr_wait_connected)(Chardev *chr, Error **errp);
 79     void (*chr_disconnect)(Chardev *chr);
 80     void (*chr_accept_input)(Chardev *chr);
 81     void (*chr_set_echo)(Chardev *chr, bool echo);
 82     void (*chr_set_fe_open)(Chardev *chr, int fe_open);
 83     void (*chr_be_event)(Chardev *s, int event);
 84     /* Return 0 if succeeded, 1 if failed */
 85     int (*chr_machine_done)(Chardev *chr);
 86 } ChardevClass;
 87 =========================================================================
 88 print *((CharBackend *)0x55873895f4c0)->chr
 89 $32 = {
 90   parent_obj = {
 91     class = 0x5587353beb40,
 92     free = 0x7f59ebf707e0 <g_free>,
 93 Python Exception <class 'gdb.error'> There is no member named keys.:
 94     properties = 0x558735469b00,
 95     ref = 1,
 96     parent = 0x558735470960
 97   },
 98   chr_write_lock = {
 99     lock = {
100       __data = {
101         __lock = 0,
102         __count = 0,
103         __owner = 0,
104         __nusers = 0,
105         __kind = 0,
106         __spins = 0,
107         __elision = 0,
108         __list = {
109           __prev = 0x0,
110           __next = 0x0
111         }
112       },
113       __size = '\000' <repeats 39 times>,
114       __align = 0
115     },
116     initialized = true
117   },
118   be = 0x55873895f4c0,
119   label = 0x5587353af7b0 "charserial0",
120   filename = 0x55873543fea0 "disconnected:unix:/dev/shm/kvm_unix_ch/dev-8f0d86,server",
121   logfd = -1,
122   be_open = 0,
123   gsource = 0x0,
124   gcontext = 0x0,
125   features = {1}
126 }
127 
128 
129 print ((CharBackend *)0x55873895f4c0)->chr
130 $27 = (Chardev *) 0x5587354645a0
131 
132 print  ((Object *)(0x5587354645a0))->class
133 $29 = (ObjectClass *) 0x5587353beb40
134 
135 print (ChardevClass *)0x5587353beb40
136 (gdb) print *(ChardevClass *)0x5587353beb40
137 $31 = {
138   parent_class = {
139     type = 0x558735406300,
140     interfaces = 0x0,
141     object_cast_cache = {0x0, 0x0, 0x0, 0x0},
142     class_cast_cache = {0x0, 0x0, 0x0, 0x0},
143     unparent = 0x0,
144 Python Exception <class 'gdb.error'> There is no member named keys.:
145     properties = 0x558735444aa0
146   },
147   internal = false,
148   parse = 0x55873293a120 <qemu_chr_parse_socket>,
149   open = 0x55873293b210 <qmp_chardev_open_socket>,
150   chr_write = 0x55873293b130 <tcp_chr_write>,
151   chr_sync_read = 0x55873293b080 <tcp_chr_sync_read>,
152   chr_add_watch = 0x558732939860 <tcp_chr_add_watch>,
153   chr_update_read_handler = 0x55873293b740 <tcp_chr_update_read_handler>,
154   chr_ioctl = 0x0,
155   get_msgfds = 0x55873293a750 <tcp_get_msgfds>,
156   set_msgfds = 0x558732939f50 <tcp_set_msgfds>,
157   chr_add_client = 0x55873293bb10 <tcp_chr_add_client>,
158   chr_wait_connected = 0x55873293bb60 <tcp_chr_wait_connected>,
159   chr_disconnect = 0x55873293a9d0 <tcp_chr_disconnect>,
160   chr_accept_input = 0x0,
161   chr_set_echo = 0x0,
162   chr_set_fe_open = 0x0,
163   chr_be_event = 0x5587329333a0 <chr_be_event>,
164   chr_machine_done = 0x55873293b010 <tcp_chr_machine_done_hook>
165 }
166 
167 ================================================================
168 static GSource *tcp_chr_add_watch(Chardev *chr, GIOCondition cond)
169 {
170     SocketChardev *s = SOCKET_CHARDEV(chr);
171     return qio_channel_create_watch(s->ioc, cond);
172 }
173 
174 
175 
176 
177 guint qemu_chr_fe_add_watch(CharBackend *be, GIOCondition cond,
178                             GIOFunc func, void *user_data)
179 {
180     Chardev *s = be->chr;
181     GSource *src;
182     guint tag;
183 
184     if (!s || CHARDEV_GET_CLASS(s)->chr_add_watch == NULL) {
185         return 0;
186     }
187 
188     src = CHARDEV_GET_CLASS(s)->chr_add_watch(s, cond);
189     if (!src) {
190         return 0;
191     }
192 
193     g_source_set_callback(src, (GSourceFunc)func, user_data, NULL);
194     tag = g_source_attach(src, s->gcontext);
195     g_source_unref(src);
196 
197     return tag;
198 }
199 (gdb) bt
200 #0  0x00005587328b34b0 in object_get_class (obj=obj@entry=0x0) at qom/object.c:759
201 #1  0x0000558732946440 in qio_channel_create_watch (ioc=0x0, condition=(G_IO_OUT | G_IO_HUP)) at io/channel.c:280
202 #2  0x0000558732936577 in qemu_chr_fe_add_watch (be=be@entry=0x55873895f4c0, cond=cond@entry=(G_IO_OUT | G_IO_HUP), func=func@entry=
203     0x5587327b7700 <serial_watch_cb>, user_data=user_data@entry=0x55873895f4a0) at chardev/char-fe.c:355
204 #3  0x00005587327b7a97 in serial_post_load (opaque=0x55873895f4a0, version_id=<optimized out>) at hw/char/serial.c:679
205 #4  0x000055873285eb47 in vmstate_load_state (f=f@entry=0x558738c26000, vmsd=0x558732ff4d40 <vmstate_serial>, opaque=0x55873895f4a0, version_id=3) at migration/vmstate.c:165
206 #5  0x000055873285ec5c in vmstate_load_state (f=0x558738c26000, vmsd=0x558732ff50c0 <vmstate_isa_serial>, opaque=0x55873895f400, version_id=3) at migration/vmstate.c:137
207 #6  0x000055873285abf5 in qemu_loadvm_state_main (mis=<optimized out>, f=0x558738c26000) at migration/savevm.c:1970
208 #7  0x000055873285abf5 in qemu_loadvm_state_main (f=f@entry=0x558738c26000, mis=0x55873320dc00 <mis_current.30332>) at migration/savevm.c:2078
209 #8  0x000055873285ca2e in qemu_loadvm_state (f=0x558738c26000) at migration/savevm.c:2162
210 #9  0x0000558732856bf2 in process_incoming_migration_co (opaque=<optimized out>) at migration/migration.c:388
211 #10 0x00005587329a944a in coroutine_trampoline (i0=<optimized out>, i1=<optimized out>) at util/coroutine-ucontext.c:116
212 #11 0x00007f59d3086190 in __start_context () at /lib64/libc.so.6
213 #12 0x00007ffecf01f990 in  ()
214 #13 0x0000000000000000 in  ()
215 
216 修复办法:
217 
218 diff  -Nuar a/chardev/char-socket.c  b/chardev/char-socket.c
219 --- a/chardev/char-socket.c     2023-03-14 20:36:09.910360722 +0800
220 +++ b/chardev/char-socket.c     2023-03-14 20:36:34.551591389 +0800
221 @@ -348,6 +348,9 @@
222  static GSource *tcp_chr_add_watch(Chardev *chr, GIOCondition cond)
223  {
224      SocketChardev *s = SOCKET_CHARDEV(chr);
225 +    if (!s->ioc) {
226 +        return NULL;
227 +    }
228      return qio_channel_create_watch(s->ioc, cond);
229  }
230  
231 =============================================================
232 重新编译rpm包后测试,需要测试配合迁移测试

 参考补丁:版本5.20修复

https://gitlab.com/qemu-project/qemu/-/commit/6585b1627899a3fcaf1cf62bfb659b04371ca9ec

https://gitlab.com/qemu-project/qemu/-/tree/6585b1627899a3fcaf1cf62bfb659b04371ca9ec

posted @ 2023-03-14 22:43  maojun1998  阅读(149)  评论(0编辑  收藏  举报