js前端搜索组件flexsearch使用

说明文档往往有搜索框,可以根据关键字检索文档内容。我有时疑惑这种检索是后端DB检索还是其它的呢?

拿bootstrap-table的说明文档看下:是由algolia提供的检索服务api

那简单的内容页面如果不想用检索服务,不用Elasticsearch、Apache Solr、Sphinx这些全文搜索引擎,也不用MySQL全文索引、MongoDB、PostgreSQL这些有全文搜索能力的DB,有没有可能实现前端检索呢?

查资料,找到了flexsearch这个js检索组件,试用下:

说明及常用方法

有三种类型的索引:
Index是一个扁平的高性能索引,用于存储id内容对。
Worker/WorkerIndex也是一个平面索引,它存储id内容对,但在后台作为专用工作线程运行。
Document是一个多字段索引,可以存储复杂的JSON文档(也可能存在worker索引)。

worker继承自Index类型,不继承自Document类型。因此,WorkerIndex的工作原理基本上类似于标准的FlexSearch索引。文档中的Worker支持需要在创建过程中通过传递适当的选项来启用{Worker:true}。
在Worker索引上调用的每个方法都被视为异步方法。您将返回一个Promise,或者您也可以提供一个回调函数作为最后一个参数。 

查看代码
     const index = new FlexSearch.Index();
    const document = new FlexSearch.Document();
    const worker = new FlexSearch.Worker();

    index.add(id, text);
    index.search(text);
    index.search(text, limit);
    index.search(text, options);
    index.search(text, limit, options);
    index.search(options);

    document.add(doc);
    document.add(id, doc);
    document.search(text);
    document.search(text, limit);
    document.search(text, options);
    document.search(text, limit, options);
    document.search(options);

    worker.add(id, text);
    worker.search(text);
    worker.search(text, limit);
    worker.search(text, options);
    worker.search(text, limit, options);
    worker.search(text, limit, options, callback);
    worker.search(options);

简单使用

<script src="../plugins/flexsearch.bundle.min.js"></script>
<script>
  //  英文检索
  const movie_en = new FlexSearch.Index();
  movie_en.add(1, "明天");
  movie_en.add(2, "this today");
  movie_en.add(3, "tomorow");
  movie_en.add(4, "hello world");

  movie_en.append(2, "some appended content");
  movie_en.update(3, "tomorow is anthor day");
  movie_en.remove(4);

   //不支持英文单词中部分字母的模糊检索和中文检索
  const arr = ["today", "天", "row", "content", "world"];
  for (let str of arr) {
    console.log(str, movie_en.search(str));
  }
  console.log("-----------------");
</script>

中文检索

查看代码
  // 中文检索,指定分词器,不支持英文检索
  const movie_cn = FlexSearch.Index({
    encode: (str) => str.replace(/[\x00-\x7F]/g, "").split(""),
  });
  movie_cn.add(1, "明天是新的一天,出去游玩");
  movie_cn.add(2, "西游记");
  movie_cn.add(3, "红楼梦");
  movie_cn.add(4, "hello world");

  const arr1 = ["天", "游记", "world"];
  for (let str of arr1) {
    console.log(str, movie_cn.search(str));
  }
  console.log("-----------------");

 

 中英文混合检索

分词算法来自于  https://liaoxuefeng.com/blogs/all/2024-01-05-js-full-text-search/index.html 

查看代码
  const ALPHABETS = [
    [0x30, 0x39], // 0-9
    [0x41, 0x5a], // A-Z
    [0x61, 0x7a], // a-z
    [0xc0, 0x2af], // part of Latin-1 supplement / Latin extended A/B / IPA
    [0x370, 0x52f], // Greek / Cyrillic / Cyrillic supplement
  ];

  const SINGLE_CHARS = [
    [0xe00, 0x0e5b], // Thai
    [0x3040, 0x309f], // Hiragana
    [0x4e00, 0x9fff], // CJK
    [0xac00, 0xd7af], // Hangul syllables
  ];

  function isAlphabet(n) {
    for (let range of ALPHABETS) {
      if (n >= range[0] && n <= range[1]) {
        return true;
      }
    }
    return false;
  }

  function isSingleChar(n) {
    for (let range of SINGLE_CHARS) {
      if (n >= range[0] && n <= range[1]) {
        return true;
      }
    }
    return false;
  }

  function tokenizer(str) {
    const length = str.length;
    const tokens = [];
    let last = "";
    for (let i = 0; i < length; i++) {
      let code = str.charCodeAt(i);
      if (isSingleChar(code)) {
        if (last) {
          if (last.length > 1) {
            tokens.push(last.toLowerCase());
          }
          last = "";
        }
        tokens.push(str[i]);
      } else if (isAlphabet(code)) {
        last = last + str[i];
      } else {
        if (last) {
          if (last.length > 1) {
            tokens.push(last.toLowerCase());
          }
          last = "";
        }
      }
    }
    if (last) {
      if (last.length > 1) {
        tokens.push(last.toLowerCase());
      }
      last = "";
    }
    //console.log(str, tokens);
    return tokens;
  }

  const movie = new FlexSearch.Index({
    encode: tokenizer,
  });
  movie.add(1, "明天,又是新的一天");
  movie.add(2, "The Lock Artist");
  movie.add(3, "明天,The Lock Artist");
  movie.add(4, "天空很蓝");
  const arr2 = ["天", "明天", "artist"];
  for (let str of arr2) {
    console.log(str, movie.search(str));
  }

 

 使用全唐诗测试  

查看代码
 <?php
try {
  $dsn = "mysql:host=127.0.0.1;port=3306;dbname=test;charset=utf8";
  $user = "root";
  $password = "";
  $pdo = new PDO($dsn, $user, $password, [PDO::ATTR_PERSISTENT => true]);
  $pdo->setAttribute(PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_ASSOC);
  $pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
} catch (Exception $error) {
  die('connect error');
}

$sql = "select title,author,content from tb_chinese_poems";
$stmt = $pdo->prepare($sql);
$stmt->execute();
$data = [];
while ($row = $stmt->fetch()) {
  $data[] = str_replace(["\n", "\r"], '', $row['title'] . ' - ' . $row['author'] . ' - ' . $row['content']);
}

?>
<script type="text/javascript" src="../plugins/alpine.min.js"></script>
<script src="../plugins/flexsearch.bundle.min.js"></script>
<script>
  const word = {
    ALPHABETS: [
      [0x30, 0x39], // 0-9
      [0x41, 0x5a], // A-Z
      [0x61, 0x7a], // a-z
      [0xc0, 0x2af], // part of Latin-1 supplement / Latin extended A/B / IPA
      [0x370, 0x52f], // Greek / Cyrillic / Cyrillic supplement
    ],
    SINGLE_CHARS: [
      [0xe00, 0x0e5b], // Thai
      [0x3040, 0x309f], // Hiragana
      [0x4e00, 0x9fff], // CJK
      [0xac00, 0xd7af], // Hangul syllables
    ],
    isAlphabet: function(n) {
      for (let range of this.ALPHABETS) {
        if (n >= range[0] && n <= range[1]) {
          return true;
        }
      }
      return false;
    },
    isSingleChar: function(n) {
      for (let range of this.SINGLE_CHARS) {
        if (n >= range[0] && n <= range[1]) {
          return true;
        }
      }
      return false;
    }
  }

  function tokenizer(str) {
    const length = str.length;
    const tokens = [];
    let last = "";
    for (let i = 0; i < length; i++) {
      let code = str.charCodeAt(i);
      if (word.isSingleChar(code)) {
        if (last) {
          if (last.length > 1) {
            tokens.push(last.toLowerCase());
          }
          last = "";
        }
        tokens.push(str[i]);
      } else if (word.isAlphabet(code)) {
        last = last + str[i];
      } else {
        if (last) {
          if (last.length > 1) {
            tokens.push(last.toLowerCase());
          }
          last = "";
        }
      }
    }
    if (last) {
      if (last.length > 1) {
        tokens.push(last.toLowerCase());
      }
      last = "";
    }
    return tokens;
  }

  let documents = JSON.parse('<?= json_encode($data) ?>');
  const doc_index = FlexSearch.Index({
    encode: tokenizer,
  });
  for (let i in documents) {
    doc_index.add(parseInt(i), documents[i])
  }
</script>
<script>
  function search() {
    return {
      keyword: "",
      items: documents,
      num: 0,
      get filteredItems() {
        res = doc_index.search(this.keyword);
        this.num = res.length;
        return this.items.filter((item, i) => {
          return res.indexOf(i) >= 0
        });
      },
    };
  }
</script>
<div x-data="search()">
  <input x-model="keyword" placeholder="Search..." />

  <p>共检索到<span x-text="num"></span>条</p>
  <ul>
    <template x-for="(item,index) in filteredItems" :key="index">
      <li x-text="item"></li>
    </template>
  </ul>
</div>

全唐诗数据表有14.5k数据,前端检索的速度很快,结果几乎是秒出。

没有更多或者更长文本的数据可供测试,因此不知道这两种情况下的表现怎么样。

posted @ 2024-12-05 17:15  carol2014  阅读(61)  评论(0编辑  收藏  举报