es 分词器介绍
按照单词切分,不做处理
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | GET _analyze { "analyzer" : "standard" , "text" : "2 running Quick brawn-foxes leap over lazy dogs in the summer evening." } { "tokens" : [ { "token" : "2" , "start_offset" : 0,分词 "end_offset" : 1, "type" : "<NUM>" , "position" : 0 }, { "token" : "running" , "start_offset" : 2, "end_offset" : 9, "type" : "<ALPHANUM>" , "position" : 1 }, { "token" : "quick" , "start_offset" : 10, "end_offset" : 15, "type" : "<ALPHANUM>" , "position" : 2 }, { "token" : "brawn" , "start_offset" : 16, "end_offset" : 21, "type" : "<ALPHANUM>" , "position" : 3 }, { "token" : "foxes" , "start_offset" : 22, "end_offset" : 27, "type" : "<ALPHANUM>" , "position" : 4 }, { "token" : "leap" , "start_offset" : 28, "end_offset" : 32, "type" : "<ALPHANUM>" , "position" : 5 }, { "token" : "over" , "start_offset" : 33, "end_offset" : 37, "type" : "<ALPHANUM>" , "position" : 6 }, { "token" : "lazy" , "start_offset" : 38, "end_offset" : 42, "type" : "<ALPHANUM>" , "position" : 7 }, { "token" : "dogs" , "start_offset" : 43, "end_offset" : 47, "type" : "<ALPHANUM>" , "position" : 8 }, { "token" : "in" , "start_offset" : 48, "end_offset" : 50, "type" : "<ALPHANUM>" , "position" : 9 }, { "token" : "the" , "start_offset" : 51, "end_offset" : 54, "type" : "<ALPHANUM>" , "position" : 10 }, { "token" : "summer" , "start_offset" : 55, "end_offset" : 61, "type" : "<ALPHANUM>" , "position" : 11 }, { "token" : "evening" , "start_offset" : 62, "end_offset" : 69, "type" : "<ALPHANUM>" , "position" : 12 } ] } |
按照非字母的字符切分
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | GET _analyze { "analyzer" : "simple" , "text" : "2 running Quick brawn-foxes leap over lazy dogs in the summer evening." } { "tokens" : [ { "token" : "running" , "start_offset" : 2, "end_offset" : 9, "type" : "word" , "position" : 0 }, { "token" : "quick" , "start_offset" : 10, "end_offset" : 15, "type" : "word" , "position" : 1 }, { "token" : "brawn" , "start_offset" : 16, "end_offset" : 21, "type" : "word" , "position" : 2 }, { "token" : "foxes" , "start_offset" : 22, "end_offset" : 27, "type" : "word" , "position" : 3 }, { "token" : "leap" , "start_offset" : 28, "end_offset" : 32, "type" : "word" , "position" : 4 }, { "token" : "over" , "start_offset" : 33, "end_offset" : 37, "type" : "word" , "position" : 5 }, { "token" : "lazy" , "start_offset" : 38, "end_offset" : 42, "type" : "word" , "position" : 6 }, { "token" : "dogs" , "start_offset" : 43, "end_offset" : 47, "type" : "word" , "position" : 7 }, { "token" : "in" , "start_offset" : 48, "end_offset" : 50, "type" : "word" , "position" : 8 }, { "token" : "the" , "start_offset" : 51, "end_offset" : 54, "type" : "word" , "position" : 9 }, { "token" : "summer" , "start_offset" : 55, "end_offset" : 61, "type" : "word" , "position" : 10 }, { "token" : "evening" , "start_offset" : 62, "end_offset" : 69, "type" : "word" , "position" : 11 } ] } |
按照空格切分不做任何处理
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | GET _analyze { "analyzer" : "whitespace" , "text" : "2 running Quick brawn-foxes leap over lazy dogs in the summer evening." } { "tokens" : [ { "token" : "2" , "start_offset" : 0, "end_offset" : 1, "type" : "word" , "position" : 0 }, { "token" : "running" , "start_offset" : 2, "end_offset" : 9, "type" : "word" , "position" : 1 }, { "token" : "Quick" , "start_offset" : 10, "end_offset" : 15, "type" : "word" , "position" : 2 }, { "token" : "brawn-foxes" , "start_offset" : 16, "end_offset" : 27, "type" : "word" , "position" : 3 }, { "token" : "leap" , "start_offset" : 28, "end_offset" : 32, "type" : "word" , "position" : 4 }, { "token" : "over" , "start_offset" : 33, "end_offset" : 37, "type" : "word" , "position" : 5 }, { "token" : "lazy" , "start_offset" : 38, "end_offset" : 42, "type" : "word" , "position" : 6 }, { "token" : "dogs" , "start_offset" : 43, "end_offset" : 47, "type" : "word" , "position" : 7 }, { "token" : "in" , "start_offset" : 48, "end_offset" : 50, "type" : "word" , "position" : 8 }, { "token" : "the" , "start_offset" : 51, "end_offset" : 54, "type" : "word" , "position" : 9 }, { "token" : "summer" , "start_offset" : 55, "end_offset" : 61, "type" : "word" , "position" : 10 }, { "token" : "evening." , "start_offset" : 62, "end_offset" : 70, "type" : "word" , "position" : 11 } ] } |
按词切分去掉修饰词
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | GET _analyze { "analyzer" : "stop" , "text" : "2 running Quick brawn-foxes leap over lazy dogs in the summer evening." } { "tokens" : [ { "token" : "running" , "start_offset" : 2, "end_offset" : 9, "type" : "word" , "position" : 0 }, { "token" : "quick" , "start_offset" : 10, "end_offset" : 15, "type" : "word" , "position" : 1 }, { "token" : "brawn" , "start_offset" : 16, "end_offset" : 21, "type" : "word" , "position" : 2 }, { "token" : "foxes" , "start_offset" : 22, "end_offset" : 27, "type" : "word" , "position" : 3 }, { "token" : "leap" , "start_offset" : 28, "end_offset" : 32, "type" : "word" , "position" : 4 }, { "token" : "over" , "start_offset" : 33, "end_offset" : 37, "type" : "word" , "position" : 5 }, { "token" : "lazy" , "start_offset" : 38, "end_offset" : 42, "type" : "word" , "position" : 6 }, { "token" : "dogs" , "start_offset" : 43, "end_offset" : 47, "type" : "word" , "position" : 7 }, { "token" : "summer" , "start_offset" : 55, "end_offset" : 61, "type" : "word" , "position" : 10 }, { "token" : "evening" , "start_offset" : 62, "end_offset" : 69, "type" : "word" , "position" : 11 } ] } |
不进行切分直接输出
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | GET _analyze { "analyzer" : "keyword" , "text" : "2 running Quick brawn-foxes leap over lazy dogs in the summer evening." } { "tokens" : [ { "token" : "2 running Quick brawn-foxes leap over lazy dogs in the summer evening." , "start_offset" : 0, "end_offset" : 70, "type" : "word" , "position" : 0 } ] } |
通过正则表达式方式进行切割,默认非字符的方式切割
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | GET _analyze { "analyzer" : "pattern" , "text" : "2 running Quick brawn-foxes leap over lazy dogs in the summer evening." } { "tokens" : [ { "token" : "2" , "start_offset" : 0, "end_offset" : 1, "type" : "word" , "position" : 0 }, { "token" : "running" , "start_offset" : 2, "end_offset" : 9, "type" : "word" , "position" : 1 }, { "token" : "quick" , "start_offset" : 10, "end_offset" : 15, "type" : "word" , "position" : 2 }, { "token" : "brawn" , "start_offset" : 16, "end_offset" : 21, "type" : "word" , "position" : 3 }, { "token" : "foxes" , "start_offset" : 22, "end_offset" : 27, "type" : "word" , "position" : 4 }, { "token" : "leap" , "start_offset" : 28, "end_offset" : 32, "type" : "word" , "position" : 5 }, { "token" : "over" , "start_offset" : 33, "end_offset" : 37, "type" : "word" , "position" : 6 }, { "token" : "lazy" , "start_offset" : 38, "end_offset" : 42, "type" : "word" , "position" : 7 }, { "token" : "dogs" , "start_offset" : 43, "end_offset" : 47, "type" : "word" , "position" : 8 }, { "token" : "in" , "start_offset" : 48, "end_offset" : 50, "type" : "word" , "position" : 9 }, { "token" : "the" , "start_offset" : 51, "end_offset" : 54, "type" : "word" , "position" : 10 }, { "token" : "summer" , "start_offset" : 55, "end_offset" : 61, "type" : "word" , "position" : 11 }, { "token" : "evening" , "start_offset" : 62, "end_offset" : 69, "type" : "word" , "position" : 12 } ] } |
英语分词器
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | GET _analyze { "analyzer" : "english" , "text" : "2 running Quick brawn-foxes leap over lazy dogs in the summer evening." } { "tokens" : [ { "token" : "2" , "start_offset" : 0, "end_offset" : 1, "type" : "<NUM>" , "position" : 0 }, { "token" : "run" , "start_offset" : 2, "end_offset" : 9, "type" : "<ALPHANUM>" , "position" : 1 }, { "token" : "quick" , "start_offset" : 10, "end_offset" : 15, "type" : "<ALPHANUM>" , "position" : 2 }, { "token" : "brawn" , "start_offset" : 16, "end_offset" : 21, "type" : "<ALPHANUM>" , "position" : 3 }, { "token" : "fox" , "start_offset" : 22, "end_offset" : 27, "type" : "<ALPHANUM>" , "position" : 4 }, { "token" : "leap" , "start_offset" : 28, "end_offset" : 32, "type" : "<ALPHANUM>" , "position" : 5 }, { "token" : "over" , "start_offset" : 33, "end_offset" : 37, "type" : "<ALPHANUM>" , "position" : 6 }, { "token" : "lazi" , "start_offset" : 38, "end_offset" : 42, "type" : "<ALPHANUM>" , "position" : 7 }, { "token" : "dog" , "start_offset" : 43, "end_offset" : 47, "type" : "<ALPHANUM>" , "position" : 8 }, { "token" : "summer" , "start_offset" : 55, "end_offset" : 61, "type" : "<ALPHANUM>" , "position" : 11 }, { "token" : "even" , "start_offset" : 62, "end_offset" : 69, "type" : "<ALPHANUM>" , "position" : 12 } ] } |
中文分词器,一个字符一个字符切分
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | POST _analyze { "analyzer" : "standard" , "text" : "他说的确实在理" } { "tokens" : [ { "token" : "他" , "start_offset" : 0, "end_offset" : 1, "type" : "<IDEOGRAPHIC>" , "position" : 0 }, { "token" : "说" , "start_offset" : 1, "end_offset" : 2, "type" : "<IDEOGRAPHIC>" , "position" : 1 }, { "token" : "的" , "start_offset" : 2, "end_offset" : 3, "type" : "<IDEOGRAPHIC>" , "position" : 2 }, { "token" : "确" , "start_offset" : 3, "end_offset" : 4, "type" : "<IDEOGRAPHIC>" , "position" : 3 }, { "token" : "实" , "start_offset" : 4, "end_offset" : 5, "type" : "<IDEOGRAPHIC>" , "position" : 4 }, { "token" : "在" , "start_offset" : 5, "end_offset" : 6, "type" : "<IDEOGRAPHIC>" , "position" : 5 }, { "token" : "理" , "start_offset" : 6, "end_offset" : 7, "type" : "<IDEOGRAPHIC>" , "position" : 6 } ] } |
草都可以从石头缝隙中长出来更可况你呢
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· 葡萄城 AI 搜索升级:DeepSeek 加持,客户体验更智能
· 什么是nginx的强缓存和协商缓存
· 一文读懂知识蒸馏