【LeetCode】004. Median of Two Sorted Arrays

题目:

There are two sorted arrays nums1 and nums2 of size m and n respectively.

Find the median of the two sorted arrays. The overall run time complexity should be O(log (m+n)).

Example 1:

nums1 = [1, 3]
nums2 = [2]

The median is 2.0

Example 2:

nums1 = [1, 2]
nums2 = [3, 4]

The median is (2 + 3)/2 = 2.5

题解:

  第一次解Hard级的题,果然不是现在的我能做得了的。暴力解肯定不行(即用分治法将两个数组排序,然后计算中位数),题目规定了时间复杂度,只能从网上看大神的了。一是将问题扩大成寻找第K大的元素,先简单的实现,不考虑题目规定的时间复杂度。(Two Pointers思想)

Solution 1(46ms)

 1 class Solution {
 2 public:
 3     int findKth(vector<int> nums1, vector<int> nums2, int k) {
 4         int m = nums1.size(), n = nums2.size();
 5         int p = 0, q = 0;
 6         for(int i=0; i<k-1; i++) {
 7             if(p>=m && q<n) q++;
 8             else if(q>=n && p<m) p++;
 9             else if(nums1[p]>nums2[q]) q++;
10             else p++;
11         }
12         if(p>=m) return nums2[q];
13         else if(q>=n) return nums1[p];
14         else return min(nums1[p],nums2[q]);
15     }
16     int findKth2(vector<int> nums1, vector<int> nums2, int k) {
17         int m = nums1.size(), n = nums2.size();
18         int i = 0, j = 0, cur = 0;
19         while(i<m&&j<n) {
20             if(nums1[i]<nums2[j]) {
21                 cur++;
22                 if(cur==k) return nums1[i];
23                 i++;
24             } 
25             else {
26                 cur++;
27                 if(cur==k) return nums2[j];
28                 j++;
29             }
30         }
31         while(i<m) {
32             cur++;
33             if(cur==k) return nums1[i];
34             i++;
35         }
36         while(j<n) {
37             cur++;
38             if(cur==k) return nums2[j];
39             j++;
40         }
41     }    
42     double findMedianSortedArrays(vector<int> nums1, vector<int> nums2) {
43         int total = nums1.size() + nums2.size();
44         if(total % 2==0){
45             return (findKth(nums1,nums2,total/2)+findKth(nums1,nums2,total/2+1))/2.0;
46         } else {
47             return findKth(nums1,nums2,total/2+1);
48         }
49     }
50 };

  时间复杂度为O(k)=O(m+n),原因在于findKth函数。那么怎么进行优化呢?实际上与O(logn)有关的算法第一个想到的就是二分思想。

  如果想要时间复杂度将为O(log(m+n))。我们可以考虑从K入手。如果我们每次能够删除一个一定在第K个元素之前的元素,那么我们需要进行K次,但是如果每次我们都删除一半呢?由于两个数组都是有序的,我们应该充分利用这个信息。
  假设A B 两数组的元素都大于K/2,我们将A B两数组的第K/2个元素进行比较。比较的结果有三种情况。
    A[K/2] == B[K/2];     A[K/2] > B[K/2];    A[K/2] <= B[K/2]
  如果 A[K/2] < B[K/2] 意味着 A[0] 到 A[K/2] 肯定在A∪B的前k个元素中。因此我们可以放心删除A数组的这个k/2个元素。同理A[K/2] > B[K/2]。
  如果 A[K/2] == B[K/2] 说明已经找到了第K个元素,直接返回A[K/2]或者B[K/2]。(from here)

  另外,需要处理边界条件:

  如果A或者B为空,则直接返回B[k-1]或者A[k-1];如果k为1,我们只需要返回A[0]和B[0]中的较小值;如果A[k/2-1]=B[k/2-1],返回其中一个;(from here

  上述的描述不太准确,知道这个思想就好了。

Solution 2 ()

class Solution {
public:
    double findKth(vector<int> nums1, vector<int> nums2, int k) {
        int m = nums1.size(), n = nums2.size();
        if(m <= 0) return nums2[k-1];
        if(n <=0 ) return nums1[k-1];
        if(k <= 1) return min(nums1[0], nums2[0]);
        if(nums2[n/2] >= nums1[m/2]) {
            if((m/2 + 1 + n/2) >= k) {
                return findKth(nums1, vector<int> (nums2.begin(), nums2.begin()+n/2), k);
            }
            else 
                return findKth(vector<int> (nums1.begin()+m/2+1, nums1.end()), nums2, k-(m/2+1));
        }
        else {
            if((m/2 + 1 + n/2) >= k) {
                return findKth(vector<int> (nums1.begin(), nums1.begin()+m/2), nums2, k);
            }
            else 
                return findKth(nums1, vector<int> (nums2.begin()+n/2+1, nums2.end()),k-(n/2+1));
        } 
    }
    double findMedianSortedArrays(vector<int> nums1, vector<int> nums2) {
           int m = nums1.size(), n = nums2.size();
           if((m + n) % 2 == 0)
               return (findKth(nums1, nums2, (m+n)/2) + findKth(nums1, nums2, (m+n)/2+1))/2.0;
           else
               return findKth(nums1, nums2, (m+n)/2+1);         
    } 
};

  Solution 2 代码简化

Solution 3 (82ms)

 1 class Solution {
 2 public:
 3     int findKth(vector<int> nums1, vector<int> nums2, int k) {
 4         int m = nums1.size(), n = nums2.size();
 5         //确保m<=n
 6         if (m > n) 
 7             return findKth(nums2, nums1, k);
 8         if (m == 0) 
 9             return nums2[k - 1];
10         if (k == 1) 
11             return min(nums1[0], nums2[0]);
12         int i = min(m, k / 2), j = k-i;
13         if (nums1[i - 1] > nums2[j - 1]) 
14             return findKth(nums1, vector<int>(nums2.begin() + j, nums2.end()), k - j); 
15         else 
16             return findKth(vector<int>(nums1.begin() + i, nums1.end()), nums2, k - i);
17 
18     }    
19     double findMedianSortedArrays(vector<int> nums1, vector<int> nums2) {
20         int total = nums1.size() + nums2.size();
21         if(total % 2==0){
22             return (findKth(nums1,nums2,total/2)+findKth(nums1,nums2,total/2+1))/2.0;
23         } else {
24             return findKth(nums1,nums2,total/2+1);
25         }
26     }
27 };

    减少vector数组的复制开销,

Solution 4

 1 class Solution {
 2 public:
 3     double findMedianSortedArrays(vector<int>& nums1, vector<int>& nums2) {
 4         int m = nums1.size(), n = nums2.size();
 5         int total = m + n;
 6         if (total % 2 == 0) {
 7             return (findKth(nums1, 0, m, nums2, 0, n, total / 2) + findKth(nums1, 0, m, nums2, 0, n, total / 2 + 1)) / 2.0;
 8         } else {
 9             return (findKth(nums1, 0, m, nums2, 0, n, total / 2 + 1));
10         }
11     }
12     
13     double findKth(vector<int>& nums1, int begin1, int end1, vector<int>& nums2, int begin2, int end2, int K) {
14         int len1 = end1 - begin1, len2 = end2 - begin2;
15         if (len1 > len2)
16             return findKth(nums2, begin2, end2, nums1, begin1, end1, K);
17         if (len1 < 1) return nums2[begin2 + K - 1];
18         if (len2 < 1) return nums1[begin1 + K - 1];
19         if (K < 2)    return min(nums1[begin1], nums2[begin2]);
20         
21         int mid1 = min(len1, K / 2), mid2 = K - mid1;
22         if (nums1[begin1 + mid1 - 1] > nums2[begin2 + mid2 - 1]) 
23             return findKth(nums1, begin1, end1, nums2, begin2 + mid2, end2, K - mid2);
24         else
25             return findKth(nums1, begin1 + mid1, end1, nums2, begin2, end2, K - mid1);
26         
27     }
28 };

 

 

Solution 5 (76ms) 

  这个就比较牛了,具体细节在这里。 

This problem is notoriously hard to implement due to all the corner cases. Most implementations consider odd-lengthed and even-lengthed arrays as two different cases and treat them separately. As a matter of fact, with a little mind twist. These two cases can be combined as one, leading to a very simple solution where (almost) no special treatment is needed.

First, let's see the concept of 'MEDIAN' in a slightly unconventional way. That is:

"if we cut the sorted array to two halves of EQUAL LENGTHS, then
median is the AVERAGE OF Max(lower_half) and Min(upper_half), i.e. the
two numbers immediately next to the cut".
For example, for [2 3 5 7], we make the cut between 3 and 5:

[2 3 / 5 7]
then the median = (3+5)/2. Note that I'll use '/' to represent a cut, and (number / number) to represent a cut made through a number in this article.

for [2 3 4 5 6], we make the cut right through 4 like this:

[2 3 (4/4) 5 7]

Since we split 4 into two halves, we say now both the lower and upper subarray contain 4. This notion also leads to the correct answer: (4 + 4) / 2 = 4;

For convenience, let's use L to represent the number immediately left to the cut, and R the right counterpart. In [2 3 5 7], for instance, we have L = 3 and R = 5, respectively.

We observe the index of L and R have the following relationship with the length of the array N:

N        Index of L / R
1               0 / 0
2               0 / 1
3               1 / 1  
4               1 / 2      
5               2 / 2
6               2 / 3
7               3 / 3
8               3 / 4
It is not hard to conclude that index of L = (N-1)/2, and R is at N/2. Thus, the median can be represented as

(L + R)/2 = (A[(N-1)/2] + A[N/2])/2
To get ready for the two array situation, let's add a few imaginary 'positions' (represented as #'s) in between numbers, and treat numbers as 'positions' as well.

[6 9 13 18]  ->   [# 6 # 9 # 13 # 18 #]    (N = 4)
position index     0 1 2 3 4 5  6 7  8     (N_Position = 9)
          
[6 9 11 13 18]->   [# 6 # 9 # 11 # 13 # 18 #]   (N = 5)
position index      0 1 2 3 4 5  6 7  8 9 10    (N_Position = 11)
As you can see, there are always exactly 2*N+1 'positions' regardless of length N. Therefore, the middle cut should always be made on the Nth position (0-based). Since index(L) = (N-1)/2 and index(R) = N/2 in this situation, we can infer that index(L) = (CutPosition-1)/2, index(R) = (CutPosition)/2.

Now for the two-array case:

A1: [# 1 # 2 # 3 # 4 # 5 #]    (N1 = 5, N1_positions = 11)

A2: [# 1 # 1 # 1 # 1 #]     (N2 = 4, N2_positions = 9)
Similar to the one-array problem, we need to find a cut that divides the two arrays each into two halves such that

"any number in the two left halves" <= "any number in the two right
halves".
We can also make the following observations:

There are 2N1 + 2N2 + 2 position altogether. Therefore, there must be exactly N1 + N2 positions on each side of the cut, and 2 positions directly on the cut.

Therefore, when we cut at position C2 = K in A2, then the cut position in A1 must be C1 = N1 + N2 - k. For instance, if C2 = 2, then we must have C1 = 4 + 5 - C2 = 7.

 [# 1 # 2 # 3 # (4/4) # 5 #]    

 [# 1 / 1 # 1 # 1 #]   
When the cuts are made, we'd have two L's and two R's. They are

 L1 = A1[(C1-1)/2]; R1 = A1[C1/2];
 L2 = A2[(C2-1)/2]; R2 = A2[C2/2];
In the above example,

    L1 = A1[(7-1)/2] = A1[3] = 4; R1 = A1[7/2] = A1[3] = 4;
    L2 = A2[(2-1)/2] = A2[0] = 1; R2 = A1[2/2] = A1[1] = 1;
Now how do we decide if this cut is the cut we want? Because L1, L2 are the greatest numbers on the left halves and R1, R2 are the smallest numbers on the right, we only need

L1 <= R1 && L1 <= R2 && L2 <= R1 && L2 <= R2
to make sure that any number in lower halves <= any number in upper halves. As a matter of fact, since
L1 <= R1 and L2 <= R2 are naturally guaranteed because A1 and A2 are sorted, we only need to make sure:

L1 <= R2 and L2 <= R1.

Now we can use simple binary search to find out the result.

If we have L1 > R1, it means there are too many large numbers on the left half of A1, then we must move C1 to the left (i.e. move C2 to the right); 
If L2 > R1, then there are too many large numbers on the left half of A2, and we must move C2 to the left.
Otherwise, this cut is the right one. 
After we find the cut, the medium can be computed as (max(L1, L2) + min(R1, R2)) / 2;
Two side notes:

A. since C1 and C2 can be mutually determined from each other, we might as well select the shorter array (say A2) and only move C2 around, and calculate C1 accordingly. That way we can achieve a run-time complexity of O(log(min(N1, N2)))

B. The only edge case is when a cut falls on the 0th(first) or the 2Nth(last) position. For instance, if C2 = 2N2, then R2 = A2[2*N2/2] = A2[N2], which exceeds the boundary of the array. To solve this problem, we can imagine that both A1 and A2 actually have two extra elements, INT_MAX at A[-1] and INT_MAX at A[N]. These additions don't change the result, but make the implementation easier: If any L falls out of the left boundary of the array, then L = INT_MIN, and if any R falls out of the right boundary, then R = INT_MAX.
View Code

 

 1 class Solution {
 2 public:
 3     double findMedianSortedArrays(vector<int> nums1, vector<int> nums2) {
 4         int m = nums1.size();
 5         int n = nums2.size();
 6         if (m < n) return findMedianSortedArrays(nums2, nums1);    
 7         if (n == 0) return ((double)nums1[(m-1)/2] + (double)nums1[m/2])/2;    
 8         int i = 0, j = n * 2;
 9         while (i <= j) {
10             int mid2 = (i + j) / 2; 
11             int mid1 = m + n - mid2;             
12             double L1 = (mid1 == 0) ? INT_MIN : nums1[(mid1-1)/2];
13             double L2 = (mid2 == 0) ? INT_MIN : nums2[(mid2-1)/2];
14             double R1 = (mid1 == m * 2) ? INT_MAX : nums1[(mid1)/2];
15             double R2 = (mid2 == n * 2) ? INT_MAX : nums2[(mid2)/2];        
16             if (L1 > R2) i = mid2 + 1;    
17             else if (L2 > R1) j = mid2 - 1;
18             else return (max(L1,L2) + min(R1, R2)) / 2;
19         }
20         return -1;
21     } 
22 };

 

posted @ 2017-04-16 13:16  Vincent丶丶  阅读(258)  评论(0编辑  收藏  举报