20170601xlVBA正则表达式提取体检数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
Public Sub GetFirst()
    GetDataFromWord "初检"
End Sub
 
Public Sub GetDataFromWord(ByVal SheetName As String)
    AppSettings
    'On Error GoTo ErrHandler
    Dim StartTime, UsedTime As Variant
    StartTime = VBA.Timer
    'Input code here
 
    Dim Wb As Workbook
    Dim Sht As Worksheet
    Dim oSht As Worksheet
    Dim Rng As Range
    Dim Arr As Variant
 
    Dim wdApp As Word.Application
    Dim wdDoc As Word.Document
    Dim wdRng As Word.Range
 
 
    'Const SHEET_NAME As String = "提取信息"
    Set Wb = Application.ThisWorkbook
    Set Sht = Wb.Worksheets(SheetName)
 
    Dim FilePath As String
    With Application.FileDialog(msoFileDialogFilePicker)
        .AllowMultiSelect = False
        .InitialFileName = Wb.Path
        .Title = "提取" & SheetName & "数据"
        .Filters.Clear
        .Filters.Add "Word文档", "*.rtf*"
        If .Show = -1 Then
            FilePath = .SelectedItems(1)
        Else
            MsgBox "您没有选中任何文件夹,本次汇总中断!"
            Exit Sub
        End If
    End With
 
    Debug.Print FilePath
 
 
 
    Set wdApp = New Word.Application
    Set wdDoc = wdApp.Documents.Open(FilePath)
    Application.StatusBar = ">>>>>>>>Positioning & Replacing >>>>>>>>"
    PositioningClear wdDoc, 5    '定位删除英文行 避免正则提取造成干扰
 
 
    Application.StatusBar = ">>>>>>>>Regexpress Getting array >>>>>>>>"
    Arr = RegGetArray(wdDoc.Content.Text)    '正则从全文提取内容 存入数组
    wdDoc.Close False    '关闭doc
    wdApp.Quit    '退出app
    Set wdApp = Nothing
    Set wdDoc = Nothing
 
 
    With Sht
        .Cells.Clear
        .Range("A1:D1").Value = Array("大项", "小项", "D值", "E值")
        Set Rng = .Range("A2").Resize(UBound(Arr, 2), UBound(Arr))
        Rng.Value = Application.WorksheetFunction.Transpose(Arr)
        Sort2003 .UsedRange
    End With
 
 
    UsedTime = VBA.Timer - StartTime
    Debug.Print "UsedTime:" & Format(UsedTime, "0.000 Seconds")
    'MsgBox "UsedTime:" & Format(UsedTime, "0.000 Seconds"), vbOKOnly, "NextSeven  QQ "
ErrorExit:
    Set Wb = Nothing
    Set Sht = Nothing
    Set Rng = Nothing
    AppSettings False
 
    On Error Resume Next
    wdApp.Quit
 
    Exit Sub
ErrHandler:
    If Err.Number <> 0 Then
        MsgBox Err.Description & "!", vbCritical, "NextSeven QQ "
        Debug.Print Err.Description
        Err.Clear
        Resume ErrorExit
    End If
End Sub
Public Sub AppSettings(Optional IsStart As Boolean = True)
    If IsStart Then
        Application.ScreenUpdating = False
        Application.DisplayAlerts = False
        Application.Calculation = xlCalculationManual
        Application.StatusBar = ">>>>>>>>Macro Is Running>>>>>>>>"
    Else
        Application.ScreenUpdating = True
        Application.DisplayAlerts = True
        Application.Calculation = xlCalculationAutomatic
        Application.StatusBar = False
    End If
End Sub
Function RegGetArray(ByVal OrgText As String) As String()
    Dim Reg As Object, Mh As Object, OneMh As Object
    Dim Reg2 As Object
 
    Dim Arr() As String, Index As Long
    Dim Elm As String
    Set Reg = CreateObject("Vbscript.Regexp")
    Set Reg2 = CreateObject("Vbscript.Regexp")
 
    Reg2.Global = True
 
 
    With Reg
        'OrgText = Application.ActiveDocument.Content
        .MultiLine = True
        .Global = True
        .Ignorecase = False
        '可用
        '.Pattern = "(?:\s)?(\S*?)?\s? *" & "(?:[ ])([^ ][^\r\n\v]*?)\s+?(D=[\d\.]+)\s+(E=[\d\.]+)[\s]+?"
        .Pattern = "(?:\s+?)([一-龥;,,]*?)?\s? *" & "(?:[ ])([^ ][^\r\n\v]*?)\s+?(D=[\d\.]+)\s+(E=[\d\.]+)[\s]+?"
        Set Mh = .Execute(OrgText)
        Index = 0
        ReDim Arr(1 To 4, 1 To 1)
        For Each OneMh In Mh
            Index = Index + 1
            ReDim Preserve Arr(1 To 4, 1 To Index)
            If OneMh.submatches(0) <> "" Then Elm = OneMh.submatches(0)
 
            Reg2.Pattern = "[;,,]?(左视图|前视图|纵切面)+[;,,]?"
            Arr(1, Index) = Reg2.Replace(Elm, "")
 
 
            Reg2.Pattern = "[\s#G]"
            Arr(2, Index) = Reg2.Replace(OneMh.submatches(1), "")
            'Debug.Print OneMh.submatches(2)
            Arr(3, Index) = Split(OneMh.submatches(2), "=")(1)
            'Debug.Print OneMh.submatches(3)
            Arr(4, Index) = Split(OneMh.submatches(3), "=")(1)
        Next OneMh
    End With
    RegGetArray = Arr
    Set Reg = Nothing: Set Mh = Nothing
    Set Reg2 = Nothing
End Function
 
Public Sub PositioningClear(ByVal OpenDoc As Word.Document, ByVal Times As Long)
    Dim wdRng As Word.Range
    Dim lngStart As Long
    Dim lngEnd As Long
    Dim lngTime As Long
    For lngTime = 1 To Times
        lngEnd = OpenDoc.Content.End
        With OpenDoc.Content.Find
            .ClearFormatting
            .Replacement.ClearFormatting
            .Text = "ALIMENTARY SYSTEM"
            .Replacement.Text = ""
            If .Execute Then
                lngStart = .Parent.Start
                Set wdRng = OpenDoc.Range(lngStart, lngEnd)
            End If
        End With
 
        If Not wdRng Is Nothing Then
            With wdRng.Find
                .ClearFormatting
                .Replacement.ClearFormatting
                .Text = "[^l^13][A-Za-z0-9\- ,;:.]@[^l^13]"
                .MatchWildcards = True
                .Wrap = wdFindStop
                .Forward = True
                .Replacement.Text = "^l"
                'n = 0
                .Execute Replace:=wdReplaceAll
                'Do While .Execute
                '   n = n + 1
                '   Debug.Print n; "____________"; .Parent.Text
                '    If n > 1000 Then Exit Do
                'Loop
            End With
        End If
        Set wdRng = Nothing
    Next lngTime
 
End Sub
 
Sub Sort2003(ByVal RngWithTitle As Range, Optional SortColumnNo As Long = 1)
'key1代表第一个排序的列的关键字
'Order1表示第一字段的排序方式,赋值为xlAscending表示升序,改为xlDescending表示降序。
'Header表示是否包含标题,赋值为xlYes表示标题不参与排序,赋值为xlNo表示标题也参数排序
'MatchCase表示排序时是否区分大小写,赋值为False表示不区分大小写
'Orientation表示排序方向,赋值为xlTopToBottom或者xlSortColumns表示按列排序,赋值为xlSortRows 表示排行排序
'SortMethod用于限制对汉字排序时的排序方式,赋值为xlPinYin表示按拼音排序,赋值为xlStroke表示按笔划排序
    With RngWithTitle
        .Sort Key1:=RngWithTitle.Cells(1, SortColumnNo), Order1:=xlAscending, Header:=xlYes, _
              MatchCase:=False, Orientation:=xlTopToBottom, SortMethod:=xlPinYin
    End With
End Sub

  

posted @   wangway  阅读(204)  评论(0编辑  收藏  举报
编辑推荐:
· .NET制作智能桌面机器人:结合BotSharp智能体框架开发语音交互
· 软件产品开发中常见的10个问题及处理方法
· .NET 原生驾驭 AI 新基建实战系列:向量数据库的应用与畅想
· 从问题排查到源码分析:ActiveMQ消费端频繁日志刷屏的秘密
· 一次Java后端服务间歇性响应慢的问题排查记录
阅读排行:
· 互联网不景气了那就玩玩嵌入式吧,用纯.NET开发并制作一个智能桌面机器人(四):结合BotSharp
· 一个基于 .NET 开源免费的异地组网和内网穿透工具
· 《HelloGitHub》第 108 期
· Windows桌面应用自动更新解决方案SharpUpdater5发布
· 我的家庭实验室服务器集群硬件清单
点击右上角即可分享
微信分享提示