Data Structure笔记

单调栈

主要题型

给定一个序列，求在这个序列中每一个数左边/右边，离他最近的数但比他小/大的数是什么

下一个更大元素

简单分析：

先从nums2数组中找出每一个数右边第一个比它大的数（使用单调栈）
遍历nums1找到其存在于nums2中的元素并进行答案输出（使用哈希表）

假设有7, 22, 20, 8这样的数列，从右往左遍历。8入栈后，遍历到20这个元素。因为20比8大，所以比8小的数一定比20小（满足题目求的下一个大数字的条件），且20比8更近，也就是说这个8一点用也没有了，一切风头都被20占了去，所以8出栈。

......

当遍历到22，依照上方描述，20也出栈，栈空了，此时代表22的右边没有比它大的数，所以返回-1。最后22入栈

class Solution {
public:
    vector<int> nextGreaterElement(vector<int>& nums1, vector<int>& nums2) {
        // 创建哈希表 记录num2中的元素Key和其对应的右边最大数Value
        unordered_map<int, int> hash;
        // 创建单调栈 栈中存的是比 当前遍历到的元素 大的数
        stack<int> stk;
        // 因为是从右边开始找 所以逆序遍历数组
        for (int i = nums2.size() - 1; i >= 0; --i)
        {
            // 找出比它大的数 == 求栈顶元素
            // 因为数组的遍历顺序是逐渐往左走 所以如果遍历到了一个更大的数 就说明这个数不仅能满足条件而且满足优先级更高(更近)的条件
            // 题目中给出了数互不相同的条件 所以无需 >=
            while (!stk.empty() && nums2[i] > stk.top())
                stk.pop();
            // 栈不空 证明有结果 将对应的结果存入哈希表
            if (!stk.empty())
                hash.insert(make_pair(nums2[i], stk.top()));
            else
                hash.insert(make_pair(nums2[i], -1));
            // 遍历到的元素入栈
            stk.push(nums2[i]);
        }
        // 将nums1中的数和哈希表中的比对 有记录则输出结果
        int nums1Size = nums1.size();
        for (int i = 0; i < nums1Size; ++i)
        {
            auto iterator = hash.find(nums1[i]);
            // nums1再利用 节省空间
            if (iterator != hash.end())
                nums1[i] = iterator->second;
        }
        return nums1;
    }
};

每日温度

题目分析：题目要求的是某一个数的右边第一个比它大的数

所以使用单调栈，逆序遍历数组

class Solution {
public:
    vector<int> dailyTemperatures(const vector<int>& temperatures) {
        // 单调栈中存的是数的index
        stack<int> stk;
        vector<int> result(temperatures.size());
        for (int i = temperatures.size() - 1; i >= 0; i--)
        {
            // 将不可能用到的元素出栈
            while (!stk.empty() && temperatures[i] >= temperatures[stk.top()])
                stk.pop();
            // 计算结果
            result[i] = stk.empty() ? 0 : stk.top() - i;
            // 将遍历到的元素的index入栈
            stk.push(i);
        }
        return result;
    }
};

小技巧

如果是求某个数左边，那么正序遍历数组；如果求的是右边，那么逆序遍历数组
出栈的条件为：栈非空 且 大于或小于

如何判断大于还是小于，把遍历到的那个数写在左边，栈顶的数写在右边，中间的符号为题目给的要求（例如求右边第一个比他大的数，那么符号为>）；是否写等于号根据题意判断
```
temperatures[i] >= temperatures[stk.top()]
```

单调队列

滑动窗口

class Solution {
public:
    void slidingWindow(const vector<int>& nums, int k)
    {
        list<int> q;
        int numsSize = nums.size();
        for (int i = 0; i < numsSize; i++)
        {
            // 检测队头是否已经滑出滑动窗口
            if (!q.empty() && i - k == q.front())
                q.pop_front();
            // 扩充活动窗口
            q.push_back(i);
            // 输出窗口内容
            for (auto item : q)
                cout << nums[item] << ends;
            cout << endl;
        }
    }
};

使用list模拟queue，目的是遍历方便，适合Debug

int main()
{
    Solution s;
    // 窗口大小为3
    s.slidingWindow({1,3,-1,-3,5,3,6,7}, 3);
    return 0;
}

但是一般情况下，我们通常使用右标识来控制滑动窗口。因为实际做题下，双端队列一般优化为单调队列，记录的数据不一定完全按着窗口顺序来

class Solution {
public:
    void maxSlidingWindow(const vector<int>& nums, int k) 
    {
        // 用于Debug输出值才使用的双向链表 实际会使用双端队列
        list<int> l;
        for (int right = 0; right < nums.size(); right++)
        {
            // 加入新元素
            l.push_back(right);
            // 当队列不为空 且滑动窗口的大小超出了界限时 才需要出队
            int left = right - k + 1;
            if (!l.empty() && l.front() < left)
                l.pop_front();
            // 遍历输出
            for (int i = l.front(); i <= right; i++)
                cout << nums[i] << ends;
            cout << endl;
        }
    }
};

滑动窗口最大值

单纯的从滑动窗口的角度出发，每次窗口移动时遍历其中元素，找出最大值，这种是暴力做法

而采用单调队列的方法能够优化滑动窗口问题，降低时间复杂度

先判断是否需要循环出队尾，使deque满足单调性
新元素入队（窗口的增长）
窗口过长部分裁剪

class Solution {
public:
    vector<int> maxSlidingWindow(const vector<int>& nums, int k) {
        // 求最大数 == 求单调队列的队头 => 单调队列为递减队列
        // 单调队列中存放的是index
        deque<int> dq;
        int numsSize = nums.size();
        vector<int> result(numsSize - k + 1);
        for (int right = 0; right < numsSize; right++)
        {
            // 确保队列是单调队列 如果即将加入的元素比队尾的元素要大 则循环出队尾
            while (!dq.empty() && nums[right] >= nums[dq.back()])
                dq.pop_back();
            // 新元素入队
            dq.push_back(right);
            // 当队列不为空 且滑动窗口的大小超出了界限时 才需要出队
            int left = right - k + 1;
            if (!dq.empty() && dq.front() < left)
                dq.pop_front();
            // 当滑动窗口大小增长至k时 才开始记录结果
            // 队头的元素就是最大值的index
            if (right + 1 >= k)
                result[right - k + 1] = nums[dq.front()];
        }
        return result;
    }
};

Trie树-字典树

用来快速存储字符串集合的数据结构

假设只存储小写字母，那么一个节点最多会有26个子节点，那么在初始化的时候先分配这26个空间，但是先置为nullptr

class Trie
{
private:
    struct TrieNode
    {
        bool isEnd;
        vector<shared_ptr<TrieNode>> children;

        TrieNode() : isEnd(false), children(26) {}
    };

    shared_ptr<TrieNode> root;

    shared_ptr<TrieNode> searchPrefix(const string& prefix)
    {
        shared_ptr<TrieNode> current = root;
        for (const char& c : prefix)
        {
            int index = c - 'a';
            if (current->children[index] == nullptr)
                return nullptr;
            current = current->children[index];
        }
        return current;
    }

public:
    Trie() : root(make_shared<TrieNode>()) {}

    void insert(const string& word)
    {
        shared_ptr<TrieNode> current = root;
        for (const char& c : word)
        {
            int index = c - 'a';
            if (current->children[index] == nullptr)
                current->children[index] = make_shared<TrieNode>();
            current = current->children[index];
        }
        current->isEnd = true;
    }

    bool search(const string& word)
    {
        shared_ptr<TrieNode> p = searchPrefix(word);
        return p != nullptr && p->isEnd == true;
    }

    bool startsWith(const string& prefix)
    {
        shared_ptr<TrieNode> p = searchPrefix(prefix);
        return p != nullptr;
    }
};

并查集

简单的示例代码，不考虑合并或者查找的树不在集合中的情况

class UnionFindSet
{
private:
    unordered_map<int, int> parent;
    unordered_map<int, int> size;
public:
    void add(int x)
    {
        parent[x] = x;
        size[x] = 1;
    }

    int find(int x)
    {
        // 路径压缩
        if (parent[x] != x)
            parent[x] = find(parent[x]);
        return parent[x];
    }

    int rootSize(int x)
    {
        return size[find(x)];
    }

    void merge(int x, int y)
    {
        int xRoot = find(x);
        int yRoot = find(y);
        // 如果二者是不同集合的元素
        if (xRoot != yRoot)
        {
            // yRoot成为新的父
            parent[xRoot] = yRoot;
            size[yRoot] += size[xRoot];
        }
    }
};

二叉堆

堆是一个数据集合，是一颗完全二叉树。可以用来实现优先队列

小根堆

小根堆特点：父节点小于两个孩子节点
堆的构建：从最后一个非叶子节点开始到堆顶元素结束，执行下沉操作
up操作接受一个index，小根堆中直接将该位置和父节点比较，如果比父节点小，则说明优先级更高，和父节点互换，然后递归上述操作，称作上浮
down操作接受一个index，小根堆中先和两个孩子比较（如果有的话），然后选出一个最小的进行互换（除了是自己），最后递归上述操作，称作下沉

template<typename DataType>
class Heap
{
private:
    std::vector<DataType> heap;
public:
    // 小顶堆
    Heap() : heap(1) {}

    Heap(const initializer_list<DataType>& vec) : heap()
    {
        heap.reserve(vec.size() + 1);
        heap.emplace_back();
        std::copy(vec.begin(),  vec.end(), std::back_inserter(heap));
        for (std::size_t index = vec.size() / 2; index > 0; index--)
            down(index);
    }
public:

    void pop()
    {
        if (heap.size() <= 1)
            throw exception("Heap is Empty");
        heap[1] = heap[heap.size() - 1];
        heap.pop_back();
        down(1);
    }

    void push(const DataType& newData)
    {
        heap.push_back(newData);
        up(heap.size() - 1);
    }

    const DataType& front() const
    {
        if (heap.size() <= 1)
            throw exception("Heap is Empty");
        return heap[1];
    }

    std::size_t size() const
    {
        return heap.size() - 1;
    }
private:
    void down(std::size_t index)
    {
        std::size_t minIndex = index;
        // 与左右孩子节点继续进行比较 找到三者中的最小值
        if (index * 2 < heap.size() && heap[index * 2] < heap[minIndex])
            minIndex = index * 2;
        if (index * 2 + 1 < heap.size() && heap[index * 2 + 1] < heap[minIndex])
            minIndex = index * 2 + 1;
        // 如果存在一个最小孩子节点
        if (minIndex != index)
        {
            std::swap(heap[minIndex], heap[index]);
            down(minIndex);
        }
    }

    void up(std::size_t index)
    {
        // 比较当前节点和父节点的大小
        std::size_t fatherIndex = index / 2;
        if (fatherIndex > 0 && heap[fatherIndex] > heap[index])
        {
            std::swap(heap[fatherIndex], heap[index]);
            up(fatherIndex);
        }
    }
};

int main()
{
    Heap<int> h({1, 2, 3, 4, 2, -1});
    std::size_t heapSize = h.size();
    for (std::size_t i = 0; i < heapSize; i++)
    {
        cout << h.front() << ends;
        h.pop();
    }
}

大根堆的特点是：父节点大于两个孩子节点，实现略

利用堆查找无序数组中最小K个数

class Solution {
public:
    vector<int> smallestK(vector<int>& arr, int k) {
        if (k == 0)
            return {};
        // 建立大根堆
        std::priority_queue<int> maxHeap;
        std::vector<int> result;
        result.reserve(k);
        
        // 大根堆中只维护k个数据
        for (int i = 0; i < k; i++)
            maxHeap.push(arr[i]);

        // 遍历无序数组 如果遍历到的值比堆中的最大值要小 则堆顶出堆 该值入堆
        for (int i = k; i < arr.size(); i++)
        {
            if (arr[i] < maxHeap.top())
            {
                maxHeap.pop();
                maxHeap.push(arr[i]);
            }
        }

        // 将堆中的数据记录到结果数组中
        int maxHeapSize = maxHeap.size();
        for (int i = 0; i < maxHeapSize; i++) {
            result.push_back(maxHeap.top());
            maxHeap.pop();
        }
        return result;
    }
};

哈希表

哈希表基类

template<typename realType, typename storeType = realType>
class BaseHashSet
{
protected:
    // 储存int类型的数据
    vector<vector<storeType>> hashVec;

    inline virtual int get_hash_code(const realType& data) = 0;
    inline virtual storeType get_mapping_value(const realType& data) = 0;
public:
    BaseHashSet() = default;

    void insert(realType data)
    {
        int code = get_hash_code(data);
        hashVec[code].push_back(get_mapping_value(data));
    }

    bool find(realType data)
    {
        int code = get_hash_code(data);
        return any_of(hashVec[code].begin(), hashVec[code].end(), [&data, this](const storeType& i) 
        {
            return i == get_mapping_value(data); 
        });
    }
};

template<typename T>
class HashSet : public BaseHashSet<T>
{
};

int类型的哈希表

说是哈希表，其实是简陋到家的unordered_set<int>，采用链地址法解决冲突。modValue的取值遵循以下几点

最好是质数，且离2^n越远越好
movValue的值不能过大也不能过小（大概是当前数量级的一半？例如10^9和105）
哈希数组的大小和modValue的大小一致

template<>
class HashSet<int> : public BaseHashSet<int>
{
private:
    int modValue;
protected:
    inline virtual int get_hash_code(const int& data) override
    {
        return (data % modValue + modValue) % modValue;
    }

    inline virtual int get_mapping_value(const int& data) override
    {
        return data;
    }

public:
    HashSet(int _modValue = 13) : modValue(_modValue), hashVec(_modValue) {}
};

string类型的哈希表

先将string映射为unsigned long long，当种子取质数133时，可以近似认为映射得来的数是独一无二的。然后再将映射后的值取模求index，最后存入哈希数组中

template<>
class HashSet<string> : public BaseHashSet<string, unsigned long long>
{
private:
    const int seed = 133;
    int modValue;
protected:
    inline virtual int get_hash_code(const string& data) override
    {
        // 将映射值取模后存入哈希表中
        unsigned long long mappingValue = get_mapping_value(data);
        return (int)(mappingValue % modValue);
    }

    inline virtual unsigned long long get_mapping_value(const string& data) override
    {
        // 映射为unsigned long long
        unsigned long long result = 0;
        for (const char& c : data)
            result = result * seed + c;
        return result;
    }
public:
    HashSet(int _modValue = 13) : modValue(_modValue), hashVec(_modValue) {}
};

树

树的专有名词解析（待完成）

度

节点的度标识它的孩子节点个数，比如二叉树中所有节点的度都是2

树的度是所有节点的度的最大值

高度与深度

平衡

如果树中所有的节点的左右子树的高度差的绝对值不超过1，那么它是平衡的

如下图中的树，明显是不平衡的

二叉树

前序中序后序遍历

二叉树的解题模板基本都是围绕三种遍历方式展开的，用的最多的是先序遍历和后序遍历。递归遍历的代码很简单，这里就不演示了，需要掌握的是各种遍历方式究竟能遍历个什么东西出来

前序遍历：C A B E F D H G （根左右）
中序遍历：B A F E C H D G （左根右）
后序遍历：B F E A H G D C （左右根）

层序遍历

解题思路：

“根节点”入队，队列的首部为当前应该处理的节点
将当前应该处理的节点的左右孩子节点入队，然后对当前应该处理的节点进行各种操作（根据题意），然后当前操作节点出队

class Solution {
public:
    std::vector<int> levelOrder(TreeNode* root) {
        std::queue<TreeNode*> nodesQueue;
        std::vector<int> result;
        nodesQueue.push(root);
        // 层序遍历
        while (!nodesQueue.empty())
        {
            TreeNode* currentNode = nodesQueue.front();
            if (currentNode != nullptr)
            {
                nodesQueue.push(currentNode->left);
                nodesQueue.push(currentNode->right);
                result.push_back(currentNode->val);
            }
            nodesQueue.pop();
        }
        return result;
    }
};

根据前中序遍历构建树

两个知识点

前序遍历的第一位是根节点，后续遍历最后一位是根节点
中序遍历中某个节点将其左右两端分为左右子树

设有前序遍历为ACDEFHGB，中序遍历为DECAHFBG树，将它构建出来

首先前序遍历的第一位是A，所以根节点是A
然后在中序中找到A，观察它的左右两边，分别为左右两个子树（左子树的元素有DEC，右子树的元素有HFBG，不分顺序）
在左右两个子树的元素中，找到最先在前序遍历中出现的，为子树的根（DEC三者中C最先在前序中出现，HFBG中F最先出现）
循环第二第三步骤

且后序遍历为：EDCHBGFA

class Solution {
public:
    unordered_map<int, int> inorderMap;
    TreeNode* buildTree(vector<int>& preorder, vector<int>& inorder) {
        int inorderSize = inorder.size();
        // 记录中序遍历索引
        for (int i = 0; i < inorderSize; i++)
            inorderMap.insert(make_pair(inorder[i], i));
        return build(preorder, 0, inorderSize - 1, inorder, 0, inorderSize - 1);
    }

    TreeNode* build(vector<int>& preorder, int preStart, int preEnd,
        vector<int>& inorder, int inStart, int inEnd)
    {
        if (preStart > preEnd)
            return nullptr;
        int rootValue = preorder[preStart];
        int rootValueIndex = inorderMap[rootValue];
        TreeNode* newNode = new TreeNode(rootValue);
        int leftChildNum = rootValueIndex - inStart;
        int rightChildNum = inEnd - rootValueIndex;
        
        newNode->left = build(preorder, preStart + 1, preStart + leftChildNum, inorder, inStart, rootValueIndex - 1);
        int newPreEnd = preStart + leftChildNum;
        newNode->right = build(preorder, newPreEnd + 1, newPreEnd + rightChildNum, inorder, rootValueIndex + 1, inEnd);
        return newNode;
    }
};

寻找重复的子树

框架规划很简单，分为两个步骤

分清需要使用哪种遍历方式。前序是不知道孩子节点的情况的；而后序是知道孩子节点的情况的。前序访问的第一个点为根节点
两行递归的前/后写出的代码是针对”根节点“的操作，具体是哪里的”根“，要看递归的深度

class Solution {
public:
    unordered_map<string, int> cachedMap;
    vector<TreeNode*> result;

    vector<TreeNode*> findDuplicateSubtrees(TreeNode* root) {
        Traverse(root);
        return result;        
    }

    string Traverse(TreeNode* node)
    {
        if (node == nullptr)
            return "#";
        string leftStr = Traverse(node->left);
        string rightStr = Traverse(node->right);

        string newTree = to_string(node->val) + "," + leftStr + "," + rightStr;
        auto it = cachedMap.find(newTree);
        if (it != cachedMap.end())
        {
            if (it->second == 1)
                result.push_back(node);
            it->second++;
        }
        else
            cachedMap.insert(make_pair(newTree, 1));
        return newTree;
    }
};

这种做法是将树转换为字符串的形式储存在哈希表中，记录的信息为：

优化解法为

class Solution {
public:
    int index = 1;
    unordered_map<string, int> str2Index;
    unordered_map<int, int> index2Num;
    vector<TreeNode*> result;

    vector<TreeNode*> findDuplicateSubtrees(TreeNode* root) {
        Traverse(root);
        return result;        
    }

    int Traverse(TreeNode* node)
    {
        if (node == nullptr)
            return 0;
        int leftIndex = Traverse(node->left);
        int rightIndex = Traverse(node->right);
        string str = to_string(node->val) + to_string(leftIndex) + to_string(rightIndex);
        // 树第一次出现
        if (str2Index.find(str) == str2Index.end())
            str2Index.insert(make_pair(str, index++));
        // 获取树的编号
        int oldIndex = str2Index[str];
        if (index2Num.find(oldIndex) == index2Num.end())
            index2Num.insert(make_pair(oldIndex, 0));
        if (++index2Num[oldIndex] == 2)
            result.push_back(node);
        return oldIndex;            
    }
};

假设有这么一颗树

经过遍历后，哈希表中存的数据编程一个“3位的int值”，同时value为该树的编号。比如二号树，它的根节点为值20，左孩子为编号为1的树，右孩子为编号为0的树，即没有右孩子

完全二叉树

完全二叉树：每一层都是有序的从左向右排列

满二叉树：每一层都是满的

如何求二叉树的节点

由于二叉树的节点排列没有顺序可言，所以只能暴力遍历，时间复杂度为O(n)

int countNodes(TreeNode* node)
{
    if (node == nullptr)
        return 0;
    return countNodes(node->left) + countNodes(node->right) + 1;
}

如何求满二叉树的节点

由于满二叉树每一层的节点数都是满的，时间复杂度为O(logn)

int countNodes(TreeNode* node)
{
    // 求满二叉树的高度
    int height = 0;
    while (node != nullptr)
    {
        height++;
        // 因为是满二叉树 所以遍历左节点和右节点都一样
        node = node->left;
    }
    return ::pow(2, height) - 1;
}

如何求完全二叉树的节点

不管是什么样的完全二叉树，它的左右子树中一定有一个是完全二叉树，一个是满二叉树。而对于完全二叉子树而言，一定又有完全二叉子子树和满二叉子子树。

其中的完全二叉树还可能是一颗满二叉树

int countNodes(TreeNode* root) 
{
    int leftHeight = 0, rightHeight = 0;
    TreeNode* leftNode = root;
    TreeNode* rightNode = root;
    while (leftNode != nullptr)
    {
        leftNode = leftNode->left;
        leftHeight++;
    }
    while (rightNode != nullptr)
    {
        rightNode = rightNode->right;
        rightHeight++;
    }
    // 是满二叉树
    if (leftHeight == rightHeight)
        return pow(2, leftHeight) - 1;
    return countNodes(root->left) + countNodes(root->right) + 1;
}

二叉搜索树（BST）

Binary-Search-Tree

对于任意一个节点来说，它的值必须大于左子树所有的节点，且必须小于右子树所有的节点。并且整棵树中没有相同的数据

拓展性质：

从某根节点沿着左下方一直延申，数据越来越小；沿着右下方一直延申，数据越来越大
任何子树都是BST
BST进行中序遍历将会得到有序数列（升序）

验证树是二叉树

class Solution {
public:
    bool isValidBST(TreeNode* root) {
        // 使用long防止被卡数据
        return isValid(root, LONG_MIN, LONG_MAX);
    }

    bool isValid(TreeNode* root, long min, long max)
    {
        if (root == nullptr)
            return true;
        if (root->val <= min || root->val >= max)
            return false;
        return isValid(root->left, min, root->val) && isValid(root->right, root->val, max);
    }
};

删除操作

如果是叶子节点则直接删除
如果只有一个节点则直接将子节点替换上去
如果有两个节点则通过中序遍历找到待删除节点的下一个节点，然后替换

class Solution {
public:
    TreeNode* deleteNode(TreeNode* root, int key) {
        if (root == nullptr)
            return nullptr;
        if (root->val > key)
            root->left = deleteNode(root->left, key);
        else if (root->val < key)
            root->right = deleteNode(root->right, key);
        else
        {
            if (root->left == nullptr)
                return root->right;
            if (root->right == nullptr)
                return root->left;
            auto minNode = getMin(root->right);
            root->val = minNode->val;
            root->right = deleteNode(root->right, minNode->val);
        }
        return root;
    }

    TreeNode* getMin(TreeNode* curNode)
    {
        while (curNode->left != nullptr)
            curNode = curNode->left;
        return curNode;
    }
};

插入操作

class Solution {
public:
    TreeNode* insertIntoBST(TreeNode* root, int val) {
        if (root == nullptr)
            root = new TreeNode(val);
        if (root->val > val)
            root->left = insertIntoBST(root->left, val);
        else if (root->val < val)
            root->right = insertIntoBST(root->right, val);
        return root;
    }
};

查找操作

最好的情况：该树为二叉平衡搜索树，查找时间复杂度为O(logn)，与二分查找相同
最坏的情况：该树为链表（所有节点只有一个孩子），查找时间复杂度为O(n)

二叉平衡搜索树（AVL树）

由于二叉搜索树（BST）是不平衡的，极端情况下它的查找效率可能是 O(n)，因此我们需要一个更高效的数据结构，即AVL树

那么如何将BST树转变为AVL树，这就需要提到AVL树的自平衡操作，AVL树的自平衡操作会在插入节点和删除节点时进行。自平衡操作无外乎四种

Left-Rotation

如图这是一个AVL树

当插入新节点200时，平衡就被打破了，这个时候就需要进行自平衡操作

对于“根节点”50来说，左子树的高度是0，右子树的高度是2，右子树更“重”一些，因此需要对根节点做一次左旋

左旋的执行步骤是：

将“根节点”的右子树断开，以及右子树的左子树断开（如果有的话）
将“根节点”的右子树设置为右子树的左子树（把80设置为50的右子树），然后将右子树的左子树设置为根节点（把100的左子树设置为50），最后把右子树设置为“根节点”（把100当成新的“根节点”）

Right-Rotation

如图这是一个AVL树

当插入一个节点5后

平衡被打破，对于“根节点”100来说，左子树的高度是2，右子树的高度是0，因此左子树更“重”一些，需要对根节点做一次右旋

首先将“根节点”的左子树断开，将左子树的右子树断开（如果有的话）
将“根节点”的左子树设置为左子树的右子树（将100的左子树设置为80），然后将左子树的右子树设置为“根节点”（将50的右子树设置为100），最后把左节点设置为“根节点”（把50设置成新的“根节点”）

Left-Right-Rotation

上文中演示的两种都是单旋。Left-Right-Rotation是在Left-Rotation基础上做出的双旋转，其实就是先做Left-Rotation，再做Right-Rotation

如下图是添加了节点70之后的树。对于“根节点”而言，左子树的高度为2，右子树的高度为0，即左子树比较“重”，再看左子树（50），左子树的左子树（10）高度为0，左子树的右子树（80）的高度为1，即右边比较“重”。一左一右，“重的并不绝对”，因此需要对“根节点”进行Left-Right-Rotation

首先对“根节点”的左节点（50）进行左旋操作，结果如下

然后再对“根节点”（100）执行右旋

Right-Left-Rotaion

在Right-Rotation基础上做出的旋转，其实就是先做Right-Rotation，此时还没平衡，接着再做Left-Rotation

假设有一颗AVL树

此时插入节点90，AVL树的平衡被打破，对于“根节点”来说，左子树的高度为0，右子树的高度为2，因此右子树比较“重”。再看右子树（100），右子树的左子树（80）高度为1，右子树的右子树（120）的高度为0，即左边（80）更“重”。一右一左，“重的并不绝对”，因此需要对“根节点”进行Right-Left-Rotation

首先对“根节点”的右节点节点（100）进行右旋，结果如下

然后对“根节点”进行左旋

了解应使用哪种旋转

上文中所说的对比左右字数的高度，观察是“重的彻底”还是“重的不彻底”的过程，实际上就是在寻找最小失衡树

最小失衡树：从新插入节点逐个向上查找，找到第一个不平衡的节点，以该节点为根的子树称作最小失衡树

下图中插入节点70，根节点100是第一个不平衡的节点，因此以100为根节点的子树称作LR型最小失衡树

根据插入节点和最小失衡树跟结尾的位置关系，可以将最小失衡树分为四类

LL型：在最小失衡树的左子树的左子树插入节点，即左边“重”，且重得彻底，使用Left-Rotation
RR型：在最小失衡树的右子树的右子树插入节点，即右边“重”，且重得彻底，使用Right-Rotation
LR型：在最小失衡树的左子树的右子树插入节点，即左边“重”，但重得并不彻底，使用Left-Right-Rotation
RL型：在最小失衡树的右子树的左子树插入节点，即右边“重”，但重得并不彻底，使用Right-Left-Rotation

得知根据不同的最小失衡树选择不同的旋转方式后，我们需要判断子树的高度来判定是哪种最小失衡树，因此我们需要在树的节点中额外记录高度信息

代码

template<typename T>
struct Node
{
    static_assert(!std::is_pointer_v<T>, "Cant Store Pointer");

    explicit Node(const T& _data) : height(0), leftChild(nullptr),  rightChild(nullptr), data(new T(_data)) {}

    std::size_t height;
    std::shared_ptr<Node> leftChild;
    std::shared_ptr<Node> rightChild;
    std::shared_ptr<T> data;
};

template<typename T>
class AVLTree
{
public:
    AVLTree();

protected:
    std::shared_ptr<Node<T>> root;

    std::shared_ptr<Node<T>> Left_Rotation(const std::shared_ptr<Node<T>>& subRoot)
    {
        std::shared_ptr<Node<T>> subRootRight = subRoot->rightChild;
        subRoot->rightChild = subRootRight->leftChild;
        subRootRight->leftChild = subRoot;

        subRoot->height = std::max(GetHeight(subRoot->leftChild), GetHeight(subRoot->rightChild)) + 1;
        subRootRight->height = std::max(GetHeight(subRootRight->leftChild), GetHeight(subRootRight->rightChild)) + 1;
        // 返回新的根节点
        return subRootRight;
    }

    std::shared_ptr<Node<T>> Right_Rotation(const std::shared_ptr<Node<T>>& subRoot)
    {
        std::shared_ptr<Node<T>> subRootLeft = subRoot->leftChild;
        subRoot->leftChild = subRootLeft->rightChild;
        subRootLeft->rightChild = subRoot;

        subRoot->height = std::max(GetHeight(subRoot->leftChild), GetHeight(subRoot->rightChild)) + 1;
        subRootLeft->height = std::max(GetHeight(subRootLeft->leftChild), GetHeight(subRootLeft->rightChild)) + 1;
        // 返回新的根节点
        return subRootLeft;
    }

    void Left_Right_Rotation(const std::shared_ptr<Node<T>>& subRoot)
    {
        subRoot->leftChild = Left_Rotation(subRoot->leftChild);
        return Right_Rotation(subRoot);
    }

    void Right_Left_Rotation(const std::shared_ptr<Node<T>>& subRoot)
    {
        subRoot->rightChild = Right_Rotation(subRoot->rightChild);
        return Left_Rotation(subRoot);
    }


    inline std::int32_t GetHeight(const std::shared_ptr<Node<T>>& node)
    {
        return node == nullptr ? -1 : node->height;
    }
};

插入与删除的代码，下次一定

LRU结构

少壮不努力老大徒伤悲，LRU结构使用双向链表和哈希表来实现。通过维护节点顺序来列出最久未使用节点，通过哈希表加速链表的访问，以空间换时间

使用STL中的容器实现，不管是速度还是空间上表现都比较一般

class LRUCache {
public:
    struct MyData
    {
        int key;
        int value;
        MyData() : key(0), value(0) {}
        MyData(int _key, int _value) : key(_key), value(_value) {}
    };

    list<MyData> cacheList;
    unordered_map<int, list<MyData>::iterator> key2ListMap;
    int cacheSize;

    LRUCache(int capacity) : cacheSize(capacity) {}

    int get(int key)
    {
        if (key2ListMap.find(key) != key2ListMap.end())
        {
            // 调用到数据 优先级上升 插到队头
            auto& pData = key2ListMap[key];
            cacheList.emplace_front(pData->key, pData->value);
            cacheList.erase(pData);
            pData = cacheList.begin();
            return pData->value;
        }
        return -1;
    }

    void put(int key, int value)
    {
        // 已经在列表中 更新值 移动到队尾
        if (key2ListMap.find(key) != key2ListMap.end())
        {
            auto& pData = key2ListMap[key];
            pData->value = value;
            cacheList.emplace_front(pData->key, pData->value);
            cacheList.erase(pData);
            pData = cacheList.begin();
        }
        else
        {
            // 还有空间
            if (cacheList.size() < cacheSize)
            {
                cacheList.emplace_front(key, value);
                key2ListMap[key] = cacheList.begin();
            }
            // 没有空间
            else
            {
                // 链表队尾的为最久未使用的元素
                key2ListMap.erase(cacheList.back().key);
                cacheList.pop_back();
                cacheList.emplace_front(key, value);
                key2ListMap[key] = cacheList.begin();
            }
        }
    }
};

使用自建的数据结构实现，哈希表的执行速率和占用空间都不如STL自带的。因此推荐自己重写双向链表，然后用STL的unordered_map

双向链表的设计使用了“虚拟”的头尾节点，在插入和删除的时候方便很多

struct LRUData
{
    int key;
    int value;
    LRUData() = default;
    LRUData(int _key, int _value) : key(_key), value(_value) {}
    LRUData(LRUData&& _lruData) : key(_lruData.key), value(_lruData.value) {}
};

template<typename T>
struct DoubleLinkNode
{
    T data;
    DoubleLinkNode* next;
    DoubleLinkNode* pre;
    DoubleLinkNode() = default;
    explicit DoubleLinkNode(T&& _data, DoubleLinkNode* _next = nullptr, DoubleLinkNode* _pre = nullptr) :
        data(std::move(_data)), next(_next), pre(_pre) {}
};

template<typename T>
class DoubleLinkList
{
public:
    using Node = DoubleLinkNode<T>;
private:
    int listSize;
    Node* begin;
    Node* end;

public:
    DoubleLinkList() : listSize(0), begin(new Node()), end(new Node())
    {
        begin->next = end;
        end->pre = begin;
    }

    ~DoubleLinkList()
    {
        Node* pCurrent = begin;
        while (pCurrent->next != nullptr)
        {
            pCurrent = pCurrent->next;
            delete pCurrent->pre;
        }
        delete end;
    }

    inline int size() { return listSize; }

    void move_to_front(Node* node)
    {
        if (begin->next == node)
            return;
        node->pre->next = node->next;
        node->next->pre = node->pre;

        node->next = begin->next;
        node->pre = begin;

        begin->next->pre = node;
        begin->next = node;
    }

    Node* push_front(T&& data)
    {
        Node* pInsertNode = new Node(std::move(data), begin->next, begin);
        listSize++;
        pInsertNode->next->pre = pInsertNode;
        begin->next = pInsertNode;
        return pInsertNode;
    }


    void pop_back()
    {
        if (listSize == 0)
            return;
        Node* pDeleteNode = end->pre;
        // 上一节点指向结尾
        pDeleteNode->pre->next = end;
        // 结尾指向上一节点
        end->pre = pDeleteNode->pre;
        listSize--;
        delete pDeleteNode;
    }

    T& back()
    {
        // 判断节点数量
        if (listSize > 0)
            return end->pre->data;
        throw exception();
    }
};

template<typename T>
class HashMap
{
private:
    int modValue;
    vector<forward_list<pair<int, T>>> hashVec;

    inline int get_hash_code(int key)
    {
        return (key % modValue + modValue) % modValue;
    }

public:
    explicit HashMap(int _modValue = 53) : modValue(_modValue), hashVec(_modValue) {}

    bool find(int key)
    {
        int hashCode = get_hash_code(key);
        return any_of(hashVec[hashCode].begin(), hashVec[hashCode].end(), [&key](const pair<int, T>& dataPair)
        {
            return dataPair.first == key;
        });
    }

    void erase(int key)
    {
        int hashCode = get_hash_code(key);
        typename forward_list<pair<int, T>>::iterator p = hashVec[hashCode].before_begin();
        for (auto iterator = hashVec[hashCode].begin(); iterator != hashVec[hashCode].end(); ++iterator)
        {
            if (iterator->first == key)
            {
                hashVec[hashCode].erase_after(p);
                return;
            }
            p++;
        }
    }

    void insert(int key, T value)
    {
        int hashCode = get_hash_code(key);
        hashVec[hashCode].emplace_front(key, value);
    }

    T& operator[](int key)
    {
        for (pair<int, T>& dataPair : hashVec[get_hash_code(key)])
        {
            if (dataPair.first == key)
                return dataPair.second;
        }
        throw exception();
    }

};

class LRUCache {
public:
    DoubleLinkList<LRUData> linkList;
    HashMap<DoubleLinkNode<LRUData>*> key2PointerMap;
    int maxCapacity;

    LRUCache(int capacity) : maxCapacity(capacity), linkList(), key2PointerMap(503) {}

    int get(int key)
    {
        if (key2PointerMap.find(key) == true)
        {
            // 将该数据更新为最近刚刚使用过
            linkList.move_to_front(key2PointerMap[key]);
            // 返回找到的数据
            return key2PointerMap[key]->data.value;
        }
        return -1;
    }

    void put(int key, int value)
    {
        if (key2PointerMap.find(key) == true)
        {
            // 数据更新
            key2PointerMap[key]->data.value = value;
            // 将该数据更新为最近刚刚使用过
            linkList.move_to_front(key2PointerMap[key]);
        }
        else
        {
            // 最新数据 插入到链表头
            key2PointerMap.insert(key, linkList.push_front(LRUData(key, value)));
            // 判断是否过长
            if (linkList.size() > maxCapacity)
            {
                // 删除标记
                key2PointerMap.erase(linkList.back().key);
                // 删除尾部元素
                linkList.pop_back();
            }
        }
    }
};

posted @ 2021-09-26 14:45 _FeiFei 阅读(121) 评论(1) 收藏举报

刷新页面返回顶部

Loading

_FeiFei