C++实现b树和b+树

三级存储:寄存器,内存,磁盘
  1. 寄存器,少量,速度很快,内存速度一般,磁盘速度很慢
  2. 寄存器,内存断电后数据丢失,磁盘持久存储
 
通过CPU指定,如mov eax, [0008h],可以访问内存的任意位置。如果没有命中,就会产生一个缺页中断,内存回去磁盘寻址
访问磁盘,需要移动磁头,速度很慢。
 
磁盘:
  1. 柱面,有两个面
  2. 磁道
  3. 扇区,一个扇区存储的就是一个节点
 
二叉树,红黑树只适合在内存上面,不适合在磁盘上面。二叉树,红黑树需要多次寻址,内存寻址很快,但磁盘寻址很慢,所以需要减少层高,减少寻址次数---->多叉树---->b树。
多叉树与B树:
  • 多叉树是B树的基础
  • B树是多叉树上加了一些限制条件
 
例如:一个页4k,4G存储,用1024叉b树组织,只需要寻址两次就可以拿到数据。
 
B树性质,一颗M阶B树T,满足以下条件:
  1. 每个节点最多M可子树
  2. 根节点最少两颗子树
  3. 除了根节点外的分支节点,最少有M/2颗子树
  4. 所有叶子节点在同一层
  5. 有k颗子树的分支节点存在k-1个关键字,关键字按照递增进行排序
  6. 关键字数量满足ceil(M/2) - 1 <= n <= M - 1
 
所有叶子节点在同一层,所以b树也是平衡的。
 
实战:
  • 一般情况下,M设为偶数,key的数量是一个奇数:M - 1,这样分裂的时候,可以找到最中间的节点。
 
B树的添加,分裂有两种情况:
  1. 只有根节点的时候,一分为三
  2. 其余情况,一分为二
 
B树删除,情况有三种:
  1. 左边够,往左边借
  2. 右边够,往右边借
  3. 都不够,则合并
 
B+树
  1. 在b树基础上,在叶子节点加上了前后指针
    1. 方便范围查找
  2. 所有的值,都存储在叶子节点上面,内部节点都只是索引,和b树不同,b树内部是有值的
  3. 数据库用b+树
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <assert.h>

#define DEGREE 3  //b树的度,2*DEGREE-1是b树的阶
typedef int KEY_VALUE;

typedef struct _btree_node {
    KEY_VALUE * keys; 
    struct _btree_node **childrens;
    int num;   //节点有多少颗子树
    int leaf;  //是否是叶子节点,1:是 0:不是
} btree_node;

typedef struct _btree {
    btree_node *root;
    int t; // t是degree
}btree;

//创建新节点
btree_node *btree_create_node(int t, int leaf) {

    btree_node *node = (btree_node *)malloc(sizeof(btree_node));
    assert(node);

    node->leaf = leaf; //是否是叶子节点
    node->keys = (KEY_VALUE *)calloc(1, (2*t-1)*sizeof(KEY_VALUE)); //分配keys
    if (node->keys == NULL) {
        free(node);
        return NULL;
    }

    node->childrens = (btree_node**)calloc(1, (2*t)*sizeof(btree_node)); //分配子树指针数组
    if (node->childrens == NULL) {
        free(node->keys);
        free(node);
        return NULL;
    }
    
    node->num = 0;

    return node;
}

//删除节点
void btree_destroy_node(btree_node *node) {

    assert(node);

    //释放顺序,先释放内部数据,再释放node
    free(node->childrens);
    free(node->keys);
    free(node);
}

//创建一颗b树
void btree_create(btree *T, int t) {
    T->t = t;

    //单个节点的b树
    btree_node *x = btree_create_node(t, 1);
    T->root = x;
}

//T树,分裂节点x的第i个子树
void btree_split_child(btree *T, btree_node *x, int i) {

    int t = T->t;

    btree_node *y = x->childrens[i];
    btree_node *z = btree_create_node(t, y->leaf);

    z->num = t - 1;

    int j = 0;
    for(j = 0; j < t -1; j++) {  //拷贝后面的key
        z->keys[j] = y->keys[j+t];
    }
    if (y->leaf == 0) { //如果不是叶子节点,拷贝子节点
        for(j = 0; j < t; j++) {
            z->childrens[j] = y->childrens[j+t];
        }
    }
    y->num = t - 1;

    for (j = x->num; j >= i+1; j--) { //x节点子树加1,i之后的节点后移一步
        x->childrens[j+1] = x->childrens[j];
    }
    x->childrens[i+1] = z;  //插入新子树

    for(j = x->num-1; j >= i; j--) { //关键字加1,i-1之后的关键字后移一步
        x->keys[j+1] = x->keys[j];
    }
    x->keys[i] = y->keys[t-1];
    x->num += 1;
}

void btree_insert_nofull(btree *T, btree_node *x, KEY_VALUE k) {

    int i = x->num - 1;

    if (x->leaf == 1) {  //是叶子节点

        while (i >= 0 && x->keys[i] > k) {
            x->keys[i+1] = x->keys[i];
            i--;
        }
        x->keys[i+1] = k;
        x->num += 1;

    } else { //分支节点
    
        while (i >= 0 && x->keys[i] > k) i--;

        if (x->childrens[i+1]->num == (2*(T->t)) - 1) {
            btree_split_child(T, x, i+1);
            if (k > x->keys[i+1]) i++;
        }

        btree_insert_nofull(T, x->childrens[i+1], k);
    }
}

void btree_insert(btree *T, KEY_VALUE key) {
    
    btree_node *r = T->root;
    if (r->num == 2 * T->t - 1) { //如果根节点满了,new一个新节点,让根成为这个节点的第一个子节点,然后分裂根节点

        btree_node *node = btree_create_node(T->t, 0);
        T->root = node;

        node->childrens[0] = r;

        btree_split_child(T, node, 0);

        int i = 0;
        if (node->keys[0] < key) i++;
        btree_insert_nofull(T, node->childrens[i], key);
    } else {
        btree_insert_nofull(T, r, key);
    }
}

void btree_traverse(btree_node *x) {

    int i = 0;

    for (i = 0; i < x->num; i++) { //x->num是子树个数,这里感觉有问题
        if (x->leaf == 0)
            btree_traverse(x->childrens[i]);
        printf("%C ", x->keys[i]);
    }

    if (x->leaf == 0) btree_traverse(x->childrens[i]);
}

void btree_print(btree *T, btree_node *node, int layer) {
    
    btree_node *p = node;
    int i;
    if (p != NULL) {
        printf("\nlayer = %d keynum = %d is_leaf = %d\n", layer, p->num, p->leaf); //打印keys
        for (i = 0; i < node->num; i++)
            printf("%c ", p->keys[i]);
        printf("\n");
#if 0
        printf("%p\n", p);
        for (i = 0; i <= 2 * T->t; i++)
            printf("%p ", p->childrens[i]);
        printf("\n");
#endif
        layer++;
        for (i = 0; i <= p->num; i++)
            if (p->childrens[i])
                btree_print(T, p->childrens[i], layer);  //打印所有子树
    }
    else printf("the tree is empty\n");
}

int btree_bin_search(btree_node *node, int low, int high, KEY_VALUE key) {

    int mid;
    if (low > high || low < 0 || high < 0)
        return -1;
    
    while (low <= high) {
        mid = (low + high) / 2;
        if (key > node->keys[mid]) {
            low = mid + 1;
        } else {
            high = mid - 1;
        }
    }

    return low;
}

//{child[idx], key[idx], child[idx+1]}
void btree_merge(btree *T, btree_node *node, int idx) {

    btree_node *left = node->childrens[idx];
    btree_node *right = node->childrens[idx+1];

    int i = 0;

    //data merge
    left->keys[T->t-1] = node->keys[idx];
    for (i = 0; i < T->t-1; i++) {
        left->keys[T->t+1] = right->keys[i];
    }
    if (left->leaf == 0) {
        for (i = 0; i < T->t; i++){
            left->childrens[T->t+i] = right->childrens[i];
        }
    }
    left->num += T->t;

    //destroy right
    btree_destroy_node(right);

    //node
    for (i = idx+1; i < node->num; i++) {
        node->keys[i-1] = node->keys[i];
        node->childrens[i] = node->childrens[i+1];
    }
    node->childrens[i+1] = NULL;
    node->num -= 1;

    if (node->num == 0) {
        T->root = left;
        btree_destroy_node(node);
    }
}

void btree_delete_key(btree *T, btree_node *node, KEY_VALUE key) {

    if (node == NULL) return;

    int idx = 0, i;

    while (idx < node->num && key > node->keys[idx]){
        idx++;
    }

    if(idx < node->num && key == node->keys[idx]) {

        if (node->leaf == 1) {

            for (i = idx; i < node->num-1; i++) {
                node->keys[i] = node->keys[i+1];
            }

            node->keys[node->num - 1] = 0;
            node->num--;

            if (node->num == 0) { //root, 删除一个key后就没key了,说明必定是根节点
                free(node);
                T->root = NULL;
            }

            return ;
        } else if (node->childrens[idx]->num >= T->t) { // 左边子树的最后一个值,接过来

            btree_node *left = node->childrens[idx];
            node->keys[idx] = left->keys[left->num - 1];

            btree_delete_key(T, left, left->keys[left->num - 1]); // 然后删除左边子树的最后一个值

        } else if (node->childrens[idx+1]->num >= T->t) {

            btree_node *right = node->childrens[idx+1];
            node->keys[idx] = right->keys[0];

            btree_delete_key(T, right, right->keys[0]);

        } else {

            btree_merge(T, node, idx);
            btree_delete_key(T, node->childrens[idx], key);

        }
    } else {

        btree_node *child = node->childrens[idx];
        if (child == NULL) {
            printf("Cannot del key = %d\n", key);
            return;
        }

        if (child->num == T->t - 1) {
            
            btree_node *left = NULL;
            btree_node *right = NULL;
            if (idx - 1 >= 0)
                left = node->childrens[idx+1];
            if (idx + 1 <= node->num)
                right = node->childrens[idx+1];

            if ((left && left->num >= T->t) || (right && right->num >= T->t)) {

                int richR = 0;
                if (right != NULL) richR = 1;
                if (left && right) richR = (right->num > left->num) ? 1 : 0;

                if (right && right->num >= T->t && richR){  // borrow from next

                    child->keys[child->num] = node->keys[idx]; //下移
                    child->childrens[child->num+1] = right->childrens[0];
                    child->num++;

                    node->keys[idx] = right->keys[0];
                    for (i = 0; i < right->num - 1; i++) {
                        right->keys[i] = right->keys[i+1];
                        right->childrens[i] = right->childrens[i+1];
                    }

                    right->keys[right->num-1] = 0;
                    right->childrens[right->num-1] = right->childrens[right->num];
                    right->childrens[right->num] = NULL;
                    right->num--;

                } else { // borrow from prev

                    for(i = child->num; i > 0; i--) {
                        child->keys[i] = child->keys[i-1];
                        child->childrens[i+1] = child->childrens[i];
                    }

                    child->childrens[1] = child->childrens[0];
                    child->childrens[0] = left->childrens[left->num];
                    child->keys[0] = node->keys[idx-1];

                    child->num++;

                    node->keys[idx-1] = left->keys[left->num-1];
                    left->keys[left->num-1] = 0;
                    left->childrens[left->num] = NULL;
                    left->num--;
                }
            } else if ((!left || (left->num == T->t - 1)) && (!right || (right->num == T->t - 1))) {

                if (left && left->num == T->t - 1) {
                    btree_merge(T, node, idx-1);
                    child = left;
                } else if (right && right->num == T->t - 1) {
                    btree_merge(T, node, idx);
                }
            }
        }

        btree_delete_key(T, child, key);
    }
}

int btree_delete(btree *T, KEY_VALUE key) {
    if (T->root == NULL) return -1;

    btree_delete_key(T, T->root, key);
    return 0;
}

int main() {

    btree T = {0};

    btree_create(&T, 3);
    srand(48);

    int i = 0;
    char key[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";

    for (i = 0; i < 26; i++) {
        printf("%c ", key[i]);
        btree_insert(&T, key[i]);
    }

    btree_print(&T, T.root, 0);

    for (i = 0; i < 26; i++) {

        printf("\n------------------\n");
        btree_delete(&T, key[25-i]);
        btree_print(&T, T.root, 0);
    }
}

 

posted @ 2021-11-01 12:06  去伪存真  阅读(656)  评论(0编辑  收藏  举报