Java实现B+树

参考:

https://zhuanlan.zhihu.com/p/54084335

https://zhuanlan.zhihu.com/p/54102723

https://www.cnblogs.com/nullzx/p/8729425.html

B+树是在B-树的基础上修改得来,比B-树更简单,广泛用于数据库索引中。

m阶的b+树的特征: 
0.根结点至少有两个子女。
1.有n棵子树的非叶子结点中含有n个关键字(b树是n-1个),
2.这些关键字不保存数据,只用来索引,
3.所有数据都保存在叶子节点(b树是每个关键字都保存数据)。
4.所有的叶子结点中包含了全部关键字的信息,及指向含这些关键字记录的指针,
5.叶子结点本身依关键字的大小自小而大顺序链接。
6.所有的非叶子结点可以看成是索引部分,结点中仅含其子树中的最大(或最小)关键字。
7.通常在b+树上有两个头指针,一个指向根结点,一个指向关键字最小的叶子结点。叶子节点之间有指针相连

与B树的中间节点和叶子节点都存储数据和指针不同,B+树的中间节点只存指针,只有叶子节点存储指针和数据,所以我们需要两种Node类型来表示B+树的节点
@Data
public class BPlusTree<K extends Comparable<K>, V> {
    //阶数=4,最多孩子=4,索引节点存储的是每个孩子节点的最大索引
    private final static int ORDER = 4;
    private final static int MIN_ORDER = ORDER / 2;
    //抽象节点类
    private BPNode<K, V> root;

    /**
     * B+树的叶子节点和非叶子节点差别较大,需要区分处理
     *
     * @param <K>
     */
    @Data
    abstract static class BPNode<K extends Comparable<K>, V> {
        //key数量
        private int n;

        public BPNode() {
        }

        public BPNode(int n) {
            this.n = n;
        }

        public abstract K getMaxKey();

        public void increaseN() {
            this.n++;
        }

        public void increaseN(int m) {
            this.n = this.n + m;
        }

        public void reduceN() {
            this.n--;
        }

        public void reduceN(int m) {
            this.n = this.n - m;
        }

        public abstract void setParent(BPIndexNode<K, V> parent);

        public abstract BPIndexNode<K, V> getParent();
    }

    /**
     * 索引节点类,不存数据,有索引指向子节点
     *
     * @param <K>
     * @param <V>
     */
    @Data
    @AllArgsConstructor
    @NoArgsConstructor
    static final class BPIndexNode<K extends Comparable<K>, V> extends BPNode<K, V> {
        //keys列表
        private List<K> keys;
        //孩子可能是索引节点,也可能是叶子节点
        private List<BPNode<K, V>> children;
        //索引的父亲一定也是索引
        private BPIndexNode<K, V> parent;

        @Override
        public void setParent(BPIndexNode<K, V> parent) {
            this.parent = parent;
        }

        @Override
        public BPIndexNode<K, V> getParent() {
            return parent;
        }

        public BPIndexNode(int n) {
            super(n);
        }

        public BPIndexNode(int n, List<K> keys, List<BPNode<K, V>> children) {
            super(n);
            this.keys = keys;
            this.children = children;
        }

        @Override
        public K getMaxKey() {
            int n = getN();
            return keys.get(n - 1);
        }

        /**
         * 通过children指针更新keys
         * 需要考虑删除某个子节点后,children数和keys不相同的情况
         */
        public void updateKeys() {
            int childSize = children.size();
            int keySize = keys.size();
            if (keySize != childSize) {
                keys = children.stream().map(BPNode::getMaxKey).collect(Collectors.toList());
            } else {
                for (int i = 0; i < childSize; i++) {
                    BPNode<K, V> child = children.get(i);
                    //获取孩子的最大key,更新到父亲
                    K maxK = child.getMaxKey();
                    if (!keys.get(i).equals(maxK)) {
                        keys.set(i, maxK);
                    }
                }
            }

        }

        /**
         * 拆分一个索引节点
         *
         * @return
         */
        private BPIndexNode<K, V> splitIndexNode() {
            BPIndexNode<K, V> newParent = new BPIndexNode<>();
            int n1 = this.getN();
            int parentMid = n1 / 2;
            List<K> newKeyList = new ArrayList<>();
            List<BPNode<K, V>> newChildList = new ArrayList<>();
            for (int i = parentMid; i < n1; i++) {
                newKeyList.add(this.getKeys().get(i));
                BPNode<K, V> child = this.getChildren().get(i);
                child.setParent(newParent);
                newChildList.add(child);
            }
            newParent.setN(n1 - parentMid);
            newParent.setKeys(newKeyList);
            newParent.setParent(this.parent);
            newParent.setChildren(newChildList);

            //更新老父亲的keys,children和n
            this.getKeys().removeAll(newKeyList);
            this.getChildren().removeAll(newChildList);
            this.reduceN(n1 - parentMid);
            return newParent;
        }
    }

    @Data
    @AllArgsConstructor
    static class Entry<K extends Comparable<K>, V> {
        private K key;
        private V value;
    }

    /**
     * 对于树的搜索而已,要插入的时候一定要给出一个目标节点
     * 如果是要删除,找不到就返回null
     */


    @Data
    @AllArgsConstructor
    static final class BPLeafNode<K extends Comparable<K>, V> extends BPNode<K, V> {

        //既有索引又有数据
        private List<Entry<K, V>> entries;
        //父节点指针和前后节点指针,有前后索引方便范围查找和借数据
        private BPIndexNode<K, V> parent;
        private BPLeafNode<K, V> prev;
        private BPLeafNode<K, V> next;

        public BPLeafNode() {
        }

        public BPLeafNode(int n) {
            super(n);
        }

        @Override
        public void setParent(BPIndexNode<K, V> parent) {
            this.parent = parent;
        }

        @Override
        public BPIndexNode<K, V> getParent() {
            return parent;
        }

        @Override
        public K getMaxKey() {
            int n = getN();
            return entries.get(n - 1).getKey();
        }

        /**
         * 根据kv构造一个叶子节点
         *
         * @param key
         * @param value
         * @return
         */
        public BPLeafNode<K, V> build(K key, V value) {
            BPLeafNode<K, V> node = new BPLeafNode<>();
            List<Entry<K, V>> entries = new ArrayList<>();
            entries.add(new Entry<K, V>(key, value));
            node.setEntries(entries);
            node.setN(1);
            return node;
        }

        /**
         * 在叶子节点增加一个kv,需要判断插入的位置
         *
         * @param entry
         */
        public void addEntry(Entry<K, V> entry) {
            int n = this.getN();
            //找到直接插入
            List<Entry<K, V>> entries = this.getEntries();
            K key = entry.getKey();
            int index = 0;
            while (index < n && key.compareTo(entries.get(index).getKey()) > 0) {
                index++;
            }
            entries.add(index, entry);
            this.increaseN();
        }

        /**
         * 在叶子节点删除一个kv
         *
         * @param key
         */
        public void remove(K key) {
            List<Entry<K, V>> entries = this.getEntries();
            entries.removeIf(entry -> entry.getKey().equals(key));
            this.reduceN();
        }

        /**
         * 根据现有叶子分裂出一个新叶子
         *
         * @return
         */
        private BPLeafNode<K, V> splitLeafNode() {
            int n = this.getN();
            int mid = n / 2;
            BPLeafNode<K, V> newLeaf = new BPLeafNode<>();
            List<Entry<K, V>> newEntryList = new ArrayList<>();
            for (int i = mid; i < n; i++) {//2-4
                newEntryList.add(this.getEntries().get(i));
            }
            newLeaf.setEntries(newEntryList);
            newLeaf.setN(n - mid);
            newLeaf.setParent(this.parent);
            newLeaf.setPrev(this);
            newLeaf.setNext(this.next);
            //更新老叶子节点
            this.getEntries().removeAll(newEntryList);
            this.setN(mid);
            this.setNext(newLeaf);
            return newLeaf;
        }
    }

    /**
     * 单个key搜索value
     *
     * @param key
     * @return
     */
    public V search(K key) {
        BPNode<K, V> cursor = this.root;
        if (cursor == null) {
            return null;
        }
        while (true) {
            if (cursor instanceof BPLeafNode) {
                BPLeafNode<K, V> leaf = (BPLeafNode<K, V>) cursor;
                List<Entry<K, V>> entries = leaf.entries;
                for (Entry<K, V> entry : entries) {
                    if (entry.key.equals(key)) {
                        return entry.value;
                    }
                }
                return null;
            } else {
                BPIndexNode<K, V> index = (BPIndexNode<K, V>) cursor;
                int n = index.getN();
                List<K> keys = index.keys;
                int i = 0;
                while (i < n && key.compareTo(keys.get(i)) > 0) {
                    i++;
                }
                //越界了,没找到
                if (i == n) {
                    return null;
                } else {
                    cursor = index.children.get(i);
                }

            }
        }
    }

    /**
     * 给新插入数据搜索合适的插入位置,一定要有返回值
     * 如果要寻找待删除数据的位置,可以找不到
     *
     * @param key
     * @return
     */
    public BPLeafNode<K, V> searchNode(K key, Strategy strategy) {
        BPNode<K, V> cursor = this.root;
        if (cursor == null) {
            return null;
        }
        while (true) {
            if (cursor instanceof BPLeafNode) {
                return (BPLeafNode<K, V>) cursor;
            } else {
                BPIndexNode<K, V> index = (BPIndexNode<K, V>) cursor;
                int n = index.getN();
                List<K> keys = index.keys;
                int i = 0;
                while (i < n && key.compareTo(keys.get(i)) > 0) {
                    i++;
                }
                //越界了,没找到,如果是插入那也得给个位置,那就是递归获取右子树最右叶子节点
                if (i == n) {
                    if (strategy.equals(Strategy.INSERT)) {
                        cursor = index.children.get(i - 1);
                    } else {//如果是要删除,那就返回null
                        return null;
                    }
                } else {
                    cursor = index.children.get(i);
                }

            }
        }
    }

    /**
     * 范围查找
     * 结果集包含start,不包含end
     *
     * @param start
     * @param end
     * @return
     */
    public List<V> search(K start, K end) {
        BPNode<K, V> cursor = this.root;
        if (cursor == null) {
            return null;
        }

        List<V> result = new ArrayList<>();
        while (true) {
            if (cursor instanceof BPLeafNode) {
                BPLeafNode<K, V> leaf = (BPLeafNode<K, V>) cursor;
                List<Entry<K, V>> entries = leaf.entries;
                for (Entry<K, V> entry : entries) {
                    if (entry.key.equals(start) || (entry.key.compareTo(start) > 0 && entry.key.compareTo(end) < 0)) {
                        result.add(entry.value);
                    }
                }
                BPLeafNode<K, V> nextLeaf = leaf.next;
                while (nextLeaf != null && nextLeaf.entries.get(0).getKey().compareTo(end) < 0) {
                    List<Entry<K, V>> entryList = nextLeaf.entries;
                    for (Entry<K, V> entry : entryList) {
                        if (entry.key.equals(start) || (entry.key.compareTo(start) > 0 && entry.key.compareTo(end) < 0)) {
                            result.add(entry.value);
                        }
                    }
                    nextLeaf = nextLeaf.next;
                }
                return result;
            } else {
                BPIndexNode<K, V> index = (BPIndexNode<K, V>) cursor;
                int n = index.getN();
                List<K> keys = index.keys;
                int i = 0;
                while (i < n && start.compareTo(keys.get(i)) > 0) {
                    i++;
                }
                //越界了,没找到
                if (i == n) {
                    return null;
                } else {
                    cursor = index.children.get(i);
                }
            }
        }
    }

    /**
     * 向树插入新数据
     */
    public void insert(K key, V value) {
        //空树要自造一个root
        if (root == null) {
            root = new BPLeafNode<K, V>().build(key, value);
            return;
        }
        //根据key搜索应该插入的节点位置
        BPLeafNode<K, V> leafNode = searchNode(key, Strategy.INSERT);
        Entry<K, V> entry = new Entry<>(key, value);
        leafNode.addEntry(entry);

        //超过阶数,需要做分裂
        if (leafNode.getN() > ORDER) {
            //拆分出新叶子节点
            BPLeafNode<K, V> newLeaf = leafNode.splitLeafNode();
            //更新父节点的children,需要考虑新孩子不一定是追加的最后
            //函数返回值是参数的父亲
            BPIndexNode<K, V> parent = updateParent(leafNode, newLeaf);
            //递归处理父节点可能需要拆分的问题
            while ( parent.getN() > ORDER) {
                //初始化一个新的父亲
                BPIndexNode<K, V> newParent = parent.splitIndexNode();

                //parent更新之后,返回的是祖父节点
                parent = updateParent(parent, newParent);
            }
        }

    }

    /**
     * 如何一遍遍历就给出结果
     *
     * @param key
     */
    public void remove(K key) {
        BPLeafNode<K, V> node = searchNode(key, Strategy.REMOVE);
        //不存在的节点无需remove
        if (node == null) {
            return;
        }
        //在叶子节点删除一个key-value
        node.remove(key);
        //同时需要考虑当前节点没有父亲的情况,啥也不需要考虑了,直接返回
        if (node == root) {
            return;
        }

        //需要考虑删除之后小于最小孩子数的情况
        //先跟同父左右兄弟借,实在借不到,只好合并
        if (node.getN() < MIN_ORDER) {
            if (node.next != null && node.parent == node.next.parent && node.next.getN() > MIN_ORDER) {
                borrow(node, node.next);
                //跟兄弟借数据,父亲只需要更新keys
            } else if (node.prev != null && node.parent == node.prev.parent && node.prev.getN() > MIN_ORDER) {
                borrow(node, node.prev);
            } else if (node.next != null && node.parent == node.next.parent) {
                merge(node, node.next);
                //合并兄弟节点后,父亲需要删除对应的孩子指针
                node.parent.getChildren().remove(node.next);
                node.parent.reduceN();
            } else {
                merge(node, node.prev);
                node.parent.getChildren().remove(node);
                node.parent.reduceN();
            }
            //合并需要考虑把父亲和掉的情况
            BPIndexNode<K, V> parent = node.parent;
            //不管借也好,合并也罢,最后都需要更新父节点的keys
            parent.updateKeys();
            while (parent != null && parent.getN() < MIN_ORDER) {
                //索引节点如果少了先考虑从兄弟借,因为没有指向兄弟的指针,只能从索引的父亲找
                //左右都借不到,再考虑合并
                BPIndexNode<K, V> grandfather = parent.parent;
                if (grandfather != null) {//有祖父才可能有兄弟
                    int i = grandfather.getChildren().indexOf(parent);
                    int n = grandfather.getN();

                    boolean borrowSuccess = false;
                    BPIndexNode<K, V> uncle;
                    Location location;
                    if (i + 1 < n) {//有右兄弟
                        uncle = (BPIndexNode<K, V>) grandfather.getChildren().get(i + 1);
                        location = Location.RIGHT;
                        if (uncle.getN() > MIN_ORDER) {//借到了
                            borrowSuccess = true;
                            borrow(parent, uncle, Location.RIGHT);
                            //借完兄弟的孩子需要更新当前节点的父亲
                        }
                    } else {
                        uncle = (BPIndexNode<K, V>) grandfather.getChildren().get(i - 1);
                        location = Location.LEFT;
                        if (uncle.getN() > MIN_ORDER) {
                            borrowSuccess = true;
                            borrow(parent, uncle, Location.LEFT);
                        }
                    }

                    /**
                     * 考虑没借到的情况
                     */
                    if (!borrowSuccess) {
                        merge(parent, uncle, location);
                        if (location.equals(Location.RIGHT)) {
                            grandfather.getChildren().remove(uncle);
                        } else {
                            grandfather.getChildren().remove(parent);
                        }
                        grandfather.reduceN();
                    }
                    //不管父亲节点是借了兄弟还是合并了兄弟,都会影响到祖父节点
                    grandfather.updateKeys();
                }
                //递归判断祖父节点是否符合性质,没有grandfather,就会退出循环
                parent = grandfather;
            }
        }
    }

    /**
     * 从兄弟索引借数据
     *
     * @param node
     * @param brother
     * @param location
     */
    private void borrow(BPIndexNode<K, V> node, BPIndexNode<K, V> brother, Location location) {

        if (location.equals(Location.RIGHT)) {
            //从右兄弟借最小的孩子
            BPNode<K, V> removeChild = brother.getChildren().remove(0);
            K removeKey = brother.getKeys().remove(0);
            brother.reduceN();
            //自己多了一个孩子,添加到最后
            node.getKeys().add(removeKey);
            node.getChildren().add(removeChild);
            node.increaseN();
            //给借来的孩子上户口,更新孩子的父亲为自己
            removeChild.setParent(node);
        } else {//从左兄弟借最大的孩子
            int index = brother.getN();
            BPNode<K, V> removeChild = brother.getChildren().remove(index - 1);
            K removeKey = brother.getKeys().remove(index - 1);
            brother.reduceN();
            //加在最前头
            node.getKeys().add(0, removeKey);
            node.getChildren().add(0, removeChild);
            node.increaseN();
            //给借来的孩子上户口,更新孩子的父亲为自己
            removeChild.setParent(node);
        }

    }

    /**
     * 合并两个索引节点
     * 需要从父亲节点删除被合并的节点
     *
     * @param node
     * @param brother
     */
    private void merge(BPIndexNode<K, V> node, BPIndexNode<K, V> brother, Location location) {
        //合并右孩子
        if (location.equals(Location.RIGHT)) {
            int m = brother.getN();
            List<BPNode<K, V>> children = brother.getChildren();
            for (BPNode<K, V> child : children) {
                child.setParent(node);
            }
            node.getChildren().addAll(children);
            node.getKeys().addAll(brother.getKeys());
            node.increaseN(m);
        } else {//把自己合并给左兄弟
            int m = node.getN();
            List<BPNode<K, V>> children = node.getChildren();
            List<K> keys = node.getKeys();
            for (BPNode<K, V> child : children) {
                child.setParent(brother);
            }
            brother.getKeys().addAll(keys);
            brother.getChildren().addAll(children);
            brother.increaseN(m);
        }

        //考虑root节点可能会被合掉的情况,节点是root,而且只有2个孩子,现在要被合并一个
        if (node.parent == root && root.getN() == 2) {
            if (location.equals(Location.RIGHT)) {
                root = node;
            } else {
                root = brother;
            }
        }

    }

    /**
     * 从兄弟叶子节点借个数据
     *
     * @param node
     * @param brother
     */
    private void borrow(BPLeafNode<K, V> node, BPLeafNode<K, V> brother) {
        //从右兄弟借最小的数据
        Entry<K, V> remove;
        if (node.next == brother) {
            remove = brother.getEntries().remove(0);
        } else {//从左兄弟借最大的元素
            int n = brother.getN();
            remove = brother.getEntries().remove(n - 1);
        }
        brother.reduceN();
        node.addEntry(remove);
        node.increaseN();
    }

    /**
     * 合并两个叶子节点
     *
     * @param node
     * @param brother
     */
    private void merge(BPLeafNode<K, V> node, BPLeafNode<K, V> brother) {
        //合并右孩子
        if (node.next == brother) {
            node.getEntries().addAll(brother.getEntries());
            node.setN(node.getEntries().size());
            node.next = brother.next;
            brother.next.prev = node;
        } else {//将自己合并到左孩子
            brother.getEntries().addAll(node.getEntries());
            brother.setN(brother.getEntries().size());
            brother.next = node.next;
            node.next.prev = brother;
        }
    }


    /**
     * 更新节点的父亲节点
     * 考虑当前节点是root,同时原来父亲为空的情况
     *
     * @param oldLeaf
     * @param newLeaf
     */
    private BPIndexNode<K, V> updateParent(BPNode<K, V> oldLeaf, BPNode<K, V> newLeaf) {
        BPIndexNode<K, V> parent = oldLeaf.getParent();
        if (parent != null) {
            int oldIndex = parent.children.indexOf(oldLeaf);
            parent.children.add(oldIndex + 1, newLeaf);
            //更新父节点keys
            parent.updateKeys();
            //更新父节点节点数
            parent.increaseN();
        } else {//原来的父亲为空,说明要更新root
            parent = new BPIndexNode<>();
            List<K> newKeyList = new ArrayList<>();
            List<BPNode<K, V>> newChildList = new ArrayList<>();
            newKeyList.add(oldLeaf.getMaxKey());
            newKeyList.add(newLeaf.getMaxKey());
            newChildList.add(oldLeaf);
            newChildList.add(newLeaf);
            parent.setChildren(newChildList);
            parent.setKeys(newKeyList);
            parent.setN(2);
            parent.setParent(null);
            root = parent;
        }

        return parent;
    }


    public static void main(String[] args) {
        BPlusTree<Integer, Integer> plusTree = new BPlusTree<>();
        for (int i = 0; i < 10; i++) {
            plusTree.insert(i, i);
        }
        Integer search = plusTree.search(5);
        System.out.println(search);
        List<Integer> search1 = plusTree.search(4, 9);
        System.out.println(search1);
    }


}

enum Strategy {
    INSERT, REMOVE;
}

/**
 * 判断是左兄弟,还是右兄弟
 */
enum Location {
    LEFT, RIGHT;
}

 

posted @ 2021-10-13 17:05  Mars.wang  阅读(138)  评论(0编辑  收藏  举报