表格单元格格式化 ------ 邻接矩阵生成
报个单元格邻接矩阵生成
def bbox2adj(bboxes_non):
"""Calculating row and column adjacent relationships according to bboxes of non-empty aligned cells
Args:
bboxes_non(np.array): (n x 4).bboxes of non-empty aligned cells
Returns:
np.array: (n x n).row adjacent relationships of non-empty aligned cells
np.array: (n x n).column adjacent relationships of non-empty aligned cells
"""
# 分别声明行、列的邻接矩阵
adjr = np.zeros([bboxes_non.shape[0], bboxes_non.shape[0]], dtype='int')
adjc = np.zeros([bboxes_non.shape[0], bboxes_non.shape[0]], dtype='int')
# 分别计算每个box的横坐标、纵坐标中心
x_middle = bboxes_non[:, ::2].mean(axis=1)
y_middle = bboxes_non[:, 1::2].mean(axis=1)
for i, box in enumerate(bboxes_non):
# 任意一个box若y_middle落入其中,则是同行;若x_middle落入其中则是同列
indexr = np.where((bboxes_non[:, 1] < y_middle[i]) & (bboxes_non[:, 3] > y_middle[i]))[0]
indexc = np.where((bboxes_non[:, 0] < x_middle[i]) & (bboxes_non[:, 2] > x_middle[i]))[0]
# 邻接矩阵是对称的
adjr[indexr, i], adjr[i, indexr] = 1, 1
adjc[indexc, i], adjc[i, indexc] = 1, 1
# 确定是否存在特殊的行关系
for j, box2 in enumerate(bboxes_non):
# (box2[1] + 4 >= box[3] or box[1] + 4 >= box2[3])参考下面图示1,保证了两个框在行维度上有交集
if not (box2[1] + 4 >= box[3] or box[1] + 4 >= box2[3]):
indexr2 = np.where((max(box[1], box2[1]) < y_middle[:]) & (y_middle[:] < min(box[3], box2[3])))[0]
if len(indexr2): # 参考图示2
adjr[j, i], adjr[i, j] = 1, 1
# Determine if there are special column relationship
for j, box2 in enumerate(bboxes_non):
if not (box2[0] + 0 >= box[2] or box[0] + 0 >= box2[2]):
indexc2 = np.where((max(box[0], box2[0]) < x_middle[:]) & (x_middle[:] < min(box[2], box2[2])))[0]
if len(indexc2):
adjc[j, i], adjc[i, j] = 1, 1
return adjr, adjc


更多关于LGPMA边框转html的信息参考:https://github.com/firstelfin/BBox2Html
清澈的爱,只为中国