利用 RegEx 可以很方便的做字符串处理,若我们需要从一篇文章中撷取出关键词片段(含前后文),可以利用下列的 FindKeywordParts 函式来处理,搜寻到的关键词片段的字符串集合会以 List(Of String) 型别传回。
''' <summary>
''' 由字符串中寻找关键词片段。
''' </summary>
''' <param name="Text">字符串。</param>
''' <param name="Keyword">关键词。</param>
''' <param name="BLength">包含关键词前的字符数。</param>
''' <param name="ALength">包含关键词后的字符数。</param>
''' <returns>传回符合的关键词片段的字符串集合。</returns>
Public Shared Function FindKeywordParts( _
ByVal Text As String, _
ByVal Keyword As String, _
ByVal BLength As Integer, _
ByVal ALength As Integer) As List(Of String)
Dim sPattern As String
Dim oRegEx As Regex
Dim oMatchs As MatchCollection
Dim oMatch As Match
Dim oList As New List(Of String)
'比对规则
sPattern = String.Format(".{{0,{1}}}{0}.{{0,{2}}}", Keyword, BLength, ALength)
oRegEx = New Regex(sPattern)
oMatchs = oRegEx.Matches(Text)
For Each oMatch In oMatchs
oList.Add(oMatch.Value)
Next
Return oList
End Function
''' 由字符串中寻找关键词片段。
''' </summary>
''' <param name="Text">字符串。</param>
''' <param name="Keyword">关键词。</param>
''' <param name="BLength">包含关键词前的字符数。</param>
''' <param name="ALength">包含关键词后的字符数。</param>
''' <returns>传回符合的关键词片段的字符串集合。</returns>
Public Shared Function FindKeywordParts( _
ByVal Text As String, _
ByVal Keyword As String, _
ByVal BLength As Integer, _
ByVal ALength As Integer) As List(Of String)
Dim sPattern As String
Dim oRegEx As Regex
Dim oMatchs As MatchCollection
Dim oMatch As Match
Dim oList As New List(Of String)
'比对规则
sPattern = String.Format(".{{0,{1}}}{0}.{{0,{2}}}", Keyword, BLength, ALength)
oRegEx = New Regex(sPattern)
oMatchs = oRegEx.Matches(Text)
For Each oMatch In oMatchs
oList.Add(oMatch.Value)
Next
Return oList
End Function
假设我们要找一篇文章中,包含「连胡会」这个关键词片段,并包含关键词前后文各10个字符,程序代码如下
Dim sText As String = "宋胡会可望谈两岸政治定位【吴燕玲╱台北报导】连胡会之后国民党单独召开记者会,以新闻公报的形式发布五大共识,亲民党中央昨天不愿对连胡会发表看法"
Dim oList As List(Of String)
oList = FindKeywordParts(sText, "连胡会", 10, 10)
Dim oList As List(Of String)
oList = FindKeywordParts(sText, "连胡会", 10, 10)