Dottext关键字控制
最近使用dottext的关键字功能,设置了中文的关键字,发现不起作用,但是前后加空格就起作用了,这个对于英语国家来说是为了防止单词间的关键字重复问题,比如如果设置了eat关键字,不加单词位置判断的话,就会把heat中的eat也关键字转换了,这显然是个错误,但是对于我们中文用户,是没有把词语间加空格的,所以对于中文blog这个设定就变得累赘了,我分析了.text关于设置关键字这段代码。
关键字扫描scan函数
1 private static string Scan(string source, string oldValue, string newValue, bool isFormat, bool onlyFirstMatch, bool CaseSensitive)
2 {
3 const char tagOpen = '<';
4 const char tagClose = '>';
5 const string anchorOpen = "<a ";
6 const string anchorClose = "</a";
7
8 source += " ";
9
10 bool lastIterMatched = false;
11
12 ScanState state = ScanState.Replace;
13 StringBuilder outputBuffer = new StringBuilder(source.Length);
14
15 CharQueue tagstack =
16 new CharQueue(anchorOpen.Length >= anchorClose.Length ? anchorOpen.Length : anchorClose.Length);
17
18 for (int i = 0; i < source.Length; i++)
19 {
20 char nextChar = source[i];
21 tagstack.Enqueue(nextChar);
22
23 switch (state)
24 {
25 case ScanState.Replace:
26 if (anchorOpen == tagstack.ToString(anchorOpen.Length))
27 {
28 state = ScanState.InAnchor;
29 break;
30 }
31 else
32 {
33 if (tagOpen == nextChar)
34 {
35 state = ScanState.InTag;
36 break;
37 }
38 else
39 {
40 string matchTarget;
41 if (source.Length - (i + tagstack.Length + oldValue.Length) > 0)
42 {
43 // peek a head the next target length chunk + 1 boundary char
44 matchTarget = source.Substring(i + tagstack.Length, oldValue.Length);
45 //Do we want a case insesitive comparison?
46 if(string.Compare(matchTarget,oldValue,!CaseSensitive) == 0)
47 //if (matchTarget == oldValue)
48 {
49 int index= tagstack.Length - i;
50 if(index != 0) //Skip if we are at the start of the block
51 {
52 char prevBeforeMatch = source[(i + tagstack.Length)-1];
53 if(prevBeforeMatch != '>' && prevBeforeMatch != '"' /**//*&& !Char.IsWhiteSpace(prevBeforeMatch)*/)
54 {
55 break;
56 }
57 }
58
59 // check for word boundary
60 char nextAfterMatch = source[i + tagstack.Length + oldValue.Length];
61 /**//*if (!CharIsWordBoundary(nextAfterMatch))
62 break;
63 */
64
65 // format old with specifier else it's a straight replace
66 if (isFormat)
67 outputBuffer.AppendFormat(newValue, oldValue);
68 else
69 outputBuffer.Append(newValue);
70
71 // if we're onlyFirstMatch, tack on remainder of source and return
72 if (onlyFirstMatch)
73 {
74 outputBuffer.AppendFormat(source.Substring(i + oldValue.Length,
75 source.Length - (i + oldValue.Length + 1)));
76 return outputBuffer.ToString();
77 }
78 else // pop index ahead to end of match and continue
79 i += oldValue.Length - 1;
80
81 lastIterMatched = true;
82 break;
83 }
84 }
85 }
86 }
87
88 break;
89
90 case ScanState.InAnchor:
91 if (anchorClose == tagstack.ToString(anchorClose.Length))
92 state = ScanState.Replace;
93 break;
94
95 case ScanState.InTag:
96 if (anchorOpen == tagstack.ToString(anchorOpen.Length))
97 state = ScanState.InAnchor;
98 else if (tagClose == nextChar)
99 state = ScanState.Replace;
100 break;
101
102 default:
103 break;
104 }
105
106 if (!lastIterMatched)
107 {
108 outputBuffer.Append(nextChar);
109 }
110 else
111 lastIterMatched = false;
112 }
113
114 return outputBuffer.ToString().Trim();
115 }
1 private static string Scan(string source, string oldValue, string newValue, bool isFormat, bool onlyFirstMatch, bool CaseSensitive)
2 {
3 const char tagOpen = '<';
4 const char tagClose = '>';
5 const string anchorOpen = "<a ";
6 const string anchorClose = "</a";
7
8 source += " ";
9
10 bool lastIterMatched = false;
11
12 ScanState state = ScanState.Replace;
13 StringBuilder outputBuffer = new StringBuilder(source.Length);
14
15 CharQueue tagstack =
16 new CharQueue(anchorOpen.Length >= anchorClose.Length ? anchorOpen.Length : anchorClose.Length);
17
18 for (int i = 0; i < source.Length; i++)
19 {
20 char nextChar = source[i];
21 tagstack.Enqueue(nextChar);
22
23 switch (state)
24 {
25 case ScanState.Replace:
26 if (anchorOpen == tagstack.ToString(anchorOpen.Length))
27 {
28 state = ScanState.InAnchor;
29 break;
30 }
31 else
32 {
33 if (tagOpen == nextChar)
34 {
35 state = ScanState.InTag;
36 break;
37 }
38 else
39 {
40 string matchTarget;
41 if (source.Length - (i + tagstack.Length + oldValue.Length) > 0)
42 {
43 // peek a head the next target length chunk + 1 boundary char
44 matchTarget = source.Substring(i + tagstack.Length, oldValue.Length);
45 //Do we want a case insesitive comparison?
46 if(string.Compare(matchTarget,oldValue,!CaseSensitive) == 0)
47 //if (matchTarget == oldValue)
48 {
49 int index= tagstack.Length - i;
50 if(index != 0) //Skip if we are at the start of the block
51 {
52 char prevBeforeMatch = source[(i + tagstack.Length)-1];
53 if(prevBeforeMatch != '>' && prevBeforeMatch != '"' /**//*&& !Char.IsWhiteSpace(prevBeforeMatch)*/)
54 {
55 break;
56 }
57 }
58
59 // check for word boundary
60 char nextAfterMatch = source[i + tagstack.Length + oldValue.Length];
61 /**//*if (!CharIsWordBoundary(nextAfterMatch))
62 break;
63 */
64
65 // format old with specifier else it's a straight replace
66 if (isFormat)
67 outputBuffer.AppendFormat(newValue, oldValue);
68 else
69 outputBuffer.Append(newValue);
70
71 // if we're onlyFirstMatch, tack on remainder of source and return
72 if (onlyFirstMatch)
73 {
74 outputBuffer.AppendFormat(source.Substring(i + oldValue.Length,
75 source.Length - (i + oldValue.Length + 1)));
76 return outputBuffer.ToString();
77 }
78 else // pop index ahead to end of match and continue
79 i += oldValue.Length - 1;
80
81 lastIterMatched = true;
82 break;
83 }
84 }
85 }
86 }
87
88 break;
89
90 case ScanState.InAnchor:
91 if (anchorClose == tagstack.ToString(anchorClose.Length))
92 state = ScanState.Replace;
93 break;
94
95 case ScanState.InTag:
96 if (anchorOpen == tagstack.ToString(anchorOpen.Length))
97 state = ScanState.InAnchor;
98 else if (tagClose == nextChar)
99 state = ScanState.Replace;
100 break;
101
102 default:
103 break;
104 }
105
106 if (!lastIterMatched)
107 {
108 outputBuffer.Append(nextChar);
109 }
110 else
111 lastIterMatched = false;
112 }
113
114 return outputBuffer.ToString().Trim();
115 }
这个是在Dottext.Framework.Util下的scan函数,刚才说的就是在第53行和66行的代码设定的,Char.IsWhiteSpace(prevBeforeMatch)这个是判断match关键字前面的空格,CharIsWordBoundary(nextAfterMatch)是判断match关键字后面的空格,当我把这2个条件break的代码注释掉以后,关键字的功能就能正常的在中文文章上起效了,为了避免英文的问题,我考虑可以在后台增加条件,用来判断用户设定的关键字是否要求判断英文的单词位置隔离,这样就可以更好的解决中文与英文的关键字问题了。