20170601xlVBA正则表达式提取体检数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 | Public Sub GetFirst() GetDataFromWord "初检" End Sub Public Sub GetDataFromWord( ByVal SheetName As String ) AppSettings 'On Error GoTo ErrHandler Dim StartTime, UsedTime As Variant StartTime = VBA.Timer 'Input code here Dim Wb As Workbook Dim Sht As Worksheet Dim oSht As Worksheet Dim Rng As Range Dim Arr As Variant Dim wdApp As Word.Application Dim wdDoc As Word.Document Dim wdRng As Word.Range 'Const SHEET_NAME As String = "提取信息" Set Wb = Application.ThisWorkbook Set Sht = Wb.Worksheets(SheetName) Dim FilePath As String With Application.FileDialog(msoFileDialogFilePicker) .AllowMultiSelect = False .InitialFileName = Wb.Path .Title = "提取" & SheetName & "数据" .Filters.Clear .Filters.Add "Word文档" , "*.rtf*" If .Show = -1 Then FilePath = .SelectedItems(1) Else MsgBox "您没有选中任何文件夹,本次汇总中断!" Exit Sub End If End With Debug.Print FilePath Set wdApp = New Word.Application Set wdDoc = wdApp.Documents.Open(FilePath) Application.StatusBar = ">>>>>>>>Positioning & Replacing >>>>>>>>" PositioningClear wdDoc, 5 '定位删除英文行 避免正则提取造成干扰 Application.StatusBar = ">>>>>>>>Regexpress Getting array >>>>>>>>" Arr = RegGetArray(wdDoc.Content.Text) '正则从全文提取内容 存入数组 wdDoc.Close False '关闭doc wdApp.Quit '退出app Set wdApp = Nothing Set wdDoc = Nothing With Sht .Cells.Clear .Range( "A1:D1" ).Value = Array( "大项" , "小项" , "D值" , "E值" ) Set Rng = .Range( "A2" ).Resize(UBound(Arr, 2), UBound(Arr)) Rng.Value = Application.WorksheetFunction.Transpose(Arr) Sort2003 .UsedRange End With UsedTime = VBA.Timer - StartTime Debug.Print "UsedTime:" & Format(UsedTime, "0.000 Seconds" ) 'MsgBox "UsedTime:" & Format(UsedTime, "0.000 Seconds"), vbOKOnly, "NextSeven QQ " ErrorExit: Set Wb = Nothing Set Sht = Nothing Set Rng = Nothing AppSettings False On Error Resume Next wdApp.Quit Exit Sub ErrHandler: If Err.Number <> 0 Then MsgBox Err.Description & "!" , vbCritical, "NextSeven QQ " Debug.Print Err.Description Err.Clear Resume ErrorExit End If End Sub Public Sub AppSettings( Optional IsStart As Boolean = True ) If IsStart Then Application.ScreenUpdating = False Application.DisplayAlerts = False Application.Calculation = xlCalculationManual Application.StatusBar = ">>>>>>>>Macro Is Running>>>>>>>>" Else Application.ScreenUpdating = True Application.DisplayAlerts = True Application.Calculation = xlCalculationAutomatic Application.StatusBar = False End If End Sub Function RegGetArray( ByVal OrgText As String ) As String () Dim Reg As Object , Mh As Object , OneMh As Object Dim Reg2 As Object Dim Arr() As String , Index As Long Dim Elm As String Set Reg = CreateObject( "Vbscript.Regexp" ) Set Reg2 = CreateObject( "Vbscript.Regexp" ) Reg2.Global = True With Reg 'OrgText = Application.ActiveDocument.Content .MultiLine = True .Global = True .Ignorecase = False '可用 '.Pattern = "(?:\s)?(\S*?)?\s? *" & "(?:[ ])([^ ][^\r\n\v]*?)\s+?(D=[\d\.]+)\s+(E=[\d\.]+)[\s]+?" .Pattern = "(?:\s+?)([一-龥;,,]*?)?\s? *" & "(?:[ ])([^ ][^\r\n\v]*?)\s+?(D=[\d\.]+)\s+(E=[\d\.]+)[\s]+?" Set Mh = .Execute(OrgText) Index = 0 ReDim Arr(1 To 4, 1 To 1) For Each OneMh In Mh Index = Index + 1 ReDim Preserve Arr(1 To 4, 1 To Index) If OneMh.submatches(0) <> "" Then Elm = OneMh.submatches(0) Reg2.Pattern = "[;,,]?(左视图|前视图|纵切面)+[;,,]?" Arr(1, Index) = Reg2.Replace(Elm, "" ) Reg2.Pattern = "[\s#G]" Arr(2, Index) = Reg2.Replace(OneMh.submatches(1), "" ) 'Debug.Print OneMh.submatches(2) Arr(3, Index) = Split(OneMh.submatches(2), "=" )(1) 'Debug.Print OneMh.submatches(3) Arr(4, Index) = Split(OneMh.submatches(3), "=" )(1) Next OneMh End With RegGetArray = Arr Set Reg = Nothing : Set Mh = Nothing Set Reg2 = Nothing End Function Public Sub PositioningClear( ByVal OpenDoc As Word.Document, ByVal Times As Long ) Dim wdRng As Word.Range Dim lngStart As Long Dim lngEnd As Long Dim lngTime As Long For lngTime = 1 To Times lngEnd = OpenDoc.Content. End With OpenDoc.Content.Find .ClearFormatting .Replacement.ClearFormatting .Text = "ALIMENTARY SYSTEM" .Replacement.Text = "" If .Execute Then lngStart = .Parent.Start Set wdRng = OpenDoc.Range(lngStart, lngEnd) End If End With If Not wdRng Is Nothing Then With wdRng.Find .ClearFormatting .Replacement.ClearFormatting .Text = "[^l^13][A-Za-z0-9\- ,;:.]@[^l^13]" .MatchWildcards = True .Wrap = wdFindStop .Forward = True .Replacement.Text = "^l" 'n = 0 .Execute Replace:=wdReplaceAll 'Do While .Execute ' n = n + 1 ' Debug.Print n; "____________"; .Parent.Text ' If n > 1000 Then Exit Do 'Loop End With End If Set wdRng = Nothing Next lngTime End Sub Sub Sort2003( ByVal RngWithTitle As Range, Optional SortColumnNo As Long = 1) 'key1代表第一个排序的列的关键字 'Order1表示第一字段的排序方式,赋值为xlAscending表示升序,改为xlDescending表示降序。 'Header表示是否包含标题,赋值为xlYes表示标题不参与排序,赋值为xlNo表示标题也参数排序 'MatchCase表示排序时是否区分大小写,赋值为False表示不区分大小写 'Orientation表示排序方向,赋值为xlTopToBottom或者xlSortColumns表示按列排序,赋值为xlSortRows 表示排行排序 'SortMethod用于限制对汉字排序时的排序方式,赋值为xlPinYin表示按拼音排序,赋值为xlStroke表示按笔划排序 With RngWithTitle .Sort Key1:=RngWithTitle.Cells(1, SortColumnNo), Order1:=xlAscending, Header:=xlYes, _ MatchCase:= False , Orientation:=xlTopToBottom, SortMethod:=xlPinYin End With End Sub |
【推荐】还在用 ECharts 开发大屏?试试这款永久免费的开源 BI 工具!
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· .NET制作智能桌面机器人:结合BotSharp智能体框架开发语音交互
· 软件产品开发中常见的10个问题及处理方法
· .NET 原生驾驭 AI 新基建实战系列:向量数据库的应用与畅想
· 从问题排查到源码分析:ActiveMQ消费端频繁日志刷屏的秘密
· 一次Java后端服务间歇性响应慢的问题排查记录
· 互联网不景气了那就玩玩嵌入式吧,用纯.NET开发并制作一个智能桌面机器人(四):结合BotSharp
· 一个基于 .NET 开源免费的异地组网和内网穿透工具
· 《HelloGitHub》第 108 期
· Windows桌面应用自动更新解决方案SharpUpdater5发布
· 我的家庭实验室服务器集群硬件清单