20170601xlVBA正则表达式提取体检数据

Public Sub GetFirst()
    GetDataFromWord "初检"
End Sub

Public Sub GetDataFromWord(ByVal SheetName As String)
    AppSettings
    'On Error GoTo ErrHandler
    Dim StartTime, UsedTime As Variant
    StartTime = VBA.Timer
    'Input code here

    Dim Wb As Workbook
    Dim Sht As Worksheet
    Dim oSht As Worksheet
    Dim Rng As Range
    Dim Arr As Variant

    Dim wdApp As Word.Application
    Dim wdDoc As Word.Document
    Dim wdRng As Word.Range


    'Const SHEET_NAME As String = "提取信息"
    Set Wb = Application.ThisWorkbook
    Set Sht = Wb.Worksheets(SheetName)

    Dim FilePath As String
    With Application.FileDialog(msoFileDialogFilePicker)
        .AllowMultiSelect = False
        .InitialFileName = Wb.Path
        .Title = "提取" & SheetName & "数据"
        .Filters.Clear
        .Filters.Add "Word文档", "*.rtf*"
        If .Show = -1 Then
            FilePath = .SelectedItems(1)
        Else
            MsgBox "您没有选中任何文件夹,本次汇总中断!"
            Exit Sub
        End If
    End With

    Debug.Print FilePath



    Set wdApp = New Word.Application
    Set wdDoc = wdApp.Documents.Open(FilePath)
    Application.StatusBar = ">>>>>>>>Positioning & Replacing >>>>>>>>"
    PositioningClear wdDoc, 5    '定位删除英文行 避免正则提取造成干扰


    Application.StatusBar = ">>>>>>>>Regexpress Getting array >>>>>>>>"
    Arr = RegGetArray(wdDoc.Content.Text)    '正则从全文提取内容 存入数组
    wdDoc.Close False    '关闭doc
    wdApp.Quit    '退出app
    Set wdApp = Nothing
    Set wdDoc = Nothing


    With Sht
        .Cells.Clear
        .Range("A1:D1").Value = Array("大项", "小项", "D值", "E值")
        Set Rng = .Range("A2").Resize(UBound(Arr, 2), UBound(Arr))
        Rng.Value = Application.WorksheetFunction.Transpose(Arr)
        Sort2003 .UsedRange
    End With


    UsedTime = VBA.Timer - StartTime
    Debug.Print "UsedTime:" & Format(UsedTime, "0.000 Seconds")
    'MsgBox "UsedTime:" & Format(UsedTime, "0.000 Seconds"), vbOKOnly, "NextSeven  QQ "
ErrorExit:
    Set Wb = Nothing
    Set Sht = Nothing
    Set Rng = Nothing
    AppSettings False

    On Error Resume Next
    wdApp.Quit

    Exit Sub
ErrHandler:
    If Err.Number <> 0 Then
        MsgBox Err.Description & "!", vbCritical, "NextSeven QQ "
        Debug.Print Err.Description
        Err.Clear
        Resume ErrorExit
    End If
End Sub
Public Sub AppSettings(Optional IsStart As Boolean = True)
    If IsStart Then
        Application.ScreenUpdating = False
        Application.DisplayAlerts = False
        Application.Calculation = xlCalculationManual
        Application.StatusBar = ">>>>>>>>Macro Is Running>>>>>>>>"
    Else
        Application.ScreenUpdating = True
        Application.DisplayAlerts = True
        Application.Calculation = xlCalculationAutomatic
        Application.StatusBar = False
    End If
End Sub
Function RegGetArray(ByVal OrgText As String) As String()
    Dim Reg As Object, Mh As Object, OneMh As Object
    Dim Reg2 As Object

    Dim Arr() As String, Index As Long
    Dim Elm As String
    Set Reg = CreateObject("Vbscript.Regexp")
    Set Reg2 = CreateObject("Vbscript.Regexp")

    Reg2.Global = True


    With Reg
        'OrgText = Application.ActiveDocument.Content
        .MultiLine = True
        .Global = True
        .Ignorecase = False
        '可用
        '.Pattern = "(?:\s)?(\S*?)?\s? *" & "(?:[ ])([^ ][^\r\n\v]*?)\s+?(D=[\d\.]+)\s+(E=[\d\.]+)[\s]+?"
        .Pattern = "(?:\s+?)([一-龥;,,]*?)?\s? *" & "(?:[ ])([^ ][^\r\n\v]*?)\s+?(D=[\d\.]+)\s+(E=[\d\.]+)[\s]+?"
        Set Mh = .Execute(OrgText)
        Index = 0
        ReDim Arr(1 To 4, 1 To 1)
        For Each OneMh In Mh
            Index = Index + 1
            ReDim Preserve Arr(1 To 4, 1 To Index)
            If OneMh.submatches(0) <> "" Then Elm = OneMh.submatches(0)

            Reg2.Pattern = "[;,,]?(左视图|前视图|纵切面)+[;,,]?"
            Arr(1, Index) = Reg2.Replace(Elm, "")


            Reg2.Pattern = "[\s#G]"
            Arr(2, Index) = Reg2.Replace(OneMh.submatches(1), "")
            'Debug.Print OneMh.submatches(2)
            Arr(3, Index) = Split(OneMh.submatches(2), "=")(1)
            'Debug.Print OneMh.submatches(3)
            Arr(4, Index) = Split(OneMh.submatches(3), "=")(1)
        Next OneMh
    End With
    RegGetArray = Arr
    Set Reg = Nothing: Set Mh = Nothing
    Set Reg2 = Nothing
End Function

Public Sub PositioningClear(ByVal OpenDoc As Word.Document, ByVal Times As Long)
    Dim wdRng As Word.Range
    Dim lngStart As Long
    Dim lngEnd As Long
    Dim lngTime As Long
    For lngTime = 1 To Times
        lngEnd = OpenDoc.Content.End
        With OpenDoc.Content.Find
            .ClearFormatting
            .Replacement.ClearFormatting
            .Text = "ALIMENTARY SYSTEM"
            .Replacement.Text = ""
            If .Execute Then
                lngStart = .Parent.Start
                Set wdRng = OpenDoc.Range(lngStart, lngEnd)
            End If
        End With

        If Not wdRng Is Nothing Then
            With wdRng.Find
                .ClearFormatting
                .Replacement.ClearFormatting
                .Text = "[^l^13][A-Za-z0-9\- ,;:.]@[^l^13]"
                .MatchWildcards = True
                .Wrap = wdFindStop
                .Forward = True
                .Replacement.Text = "^l"
                'n = 0
                .Execute Replace:=wdReplaceAll
                'Do While .Execute
                '   n = n + 1
                '   Debug.Print n; "____________"; .Parent.Text
                '    If n > 1000 Then Exit Do
                'Loop
            End With
        End If
        Set wdRng = Nothing
    Next lngTime

End Sub

Sub Sort2003(ByVal RngWithTitle As Range, Optional SortColumnNo As Long = 1)
'key1代表第一个排序的列的关键字
'Order1表示第一字段的排序方式,赋值为xlAscending表示升序,改为xlDescending表示降序。
'Header表示是否包含标题,赋值为xlYes表示标题不参与排序,赋值为xlNo表示标题也参数排序
'MatchCase表示排序时是否区分大小写,赋值为False表示不区分大小写
'Orientation表示排序方向,赋值为xlTopToBottom或者xlSortColumns表示按列排序,赋值为xlSortRows 表示排行排序
'SortMethod用于限制对汉字排序时的排序方式,赋值为xlPinYin表示按拼音排序,赋值为xlStroke表示按笔划排序
    With RngWithTitle
        .Sort Key1:=RngWithTitle.Cells(1, SortColumnNo), Order1:=xlAscending, Header:=xlYes, _
              MatchCase:=False, Orientation:=xlTopToBottom, SortMethod:=xlPinYin
    End With
End Sub

  

posted @ 2017-07-07 00:18  wangway  阅读(202)  评论(0编辑  收藏  举报