算法图解笔记-二分查找 - 淡泊明志/宁静致远

一个简单的二分算法，竟然搞不定，惭愧。binary_search()函数返回待查找数的位置，找不到返回NA。需要首先对数据排好序，然后再用二分法进行查找，适应于检索等场景。

二分查找最多需要 $\log_{2}^{n}$ 次查找。用大O计数法，运行时间写为O(logn)。二分查找，性能非常强劲。如果列表包括40亿个数字，线性检索最多需要检索40亿次，二分查找最多只需要32次。

binary_search = function(list_v, item_s) {
  low_s = 1
  high_s = length(list_v)
  # 待查找数值，大于最大值、小于最小值、或者查找不到，
  # 都会产生low_s > high_s，导致循环退出。
  # 也可以理解为，即便是只有1个元素，也要进行检查。
  while (low_s <= high_s) {
    med_s = floor((low_s + high_s) / 2)
    guess_s = list_v[med_s]
    if (guess_s == item_s) {
      return(med_s)
    }
    # 如果中值大于item_s
    if (guess_s > item_s) {
      # 关键：如果中间数值大于给出的待查找值，最大位置等于中间数值位置-1
      high_s = med_s - 1
    }
    if (guess_s < item_s) {
      # 关键：如果中间数值小于给出的待查找值，最小位置等于中间数值位置+1
      low_s = med_s + 1
    }
  }
  return(NA)
}

# 最简单的线性查找函数
linear_search = function(list_v,item_s) {
  low_s = 1
  high_s = length(list_v)
  # 待查找数值，大于最大值、小于最小值、或者查找不到，
  # 都会产生low_s > high_s，导致循环退出。
  # 也可以理解为，即便是只有1个元素，也要进行检查。
  while (low_s <= high_s) {
    guess_s = list_v[low_s]
    if (guess_s == item_s) {
      return(low_s)
    }
    low_s = low_s + 1
  }
  return(NA)
}

binary_search(1:100, -1)

## [1] NA

binary_search(1:100, 1)

## [1] 1

binary_search(1:100, 55)

## [1] 55

binary_search(1:100, 55.5)

## [1] NA

binary_search(1:100, 100)

## [1] 100

binary_search(1:100, 101)

## [1] NA

linear_search(1:100, -1)

## [1] NA

linear_search(1:100, 1)

## [1] 1

linear_search(1:100, 55)

## [1] 55

linear_search(1:100, 55.5)

## [1] NA

linear_search(1:100, 100)

## [1] 100

linear_search(1:100, 101)

## [1] NA

测试一下函数的性能。从结果中可以看到，二分法查找40亿位数字，比线性查找第104万位数字还要快几万倍。二者所花费的时间，一个是微秒级，一个是毫秒级。注：1秒=1000毫秒；1毫秒=1000微秒。

x1 <- 1:2^33
x2 <- 1:2^20
library(microbenchmark)
microbenchmark(linear_search(x2,1040000))

## Unit: milliseconds
##                        expr     min      lq     mean   median       uq      max
##  linear_search(x2, 1040000) 83.9305 98.6428 110.2421 110.1595 119.9042 188.2421
##  neval
##    100

microbenchmark(binary_search(x1,4000000000))

## Unit: microseconds
##                      expr min  lq  mean median  uq  max neval
##  binary_search(x1, 4e+09) 4.4 4.6 5.088   4.65 4.7 24.8   100