VERSION 5.00
Begin VB.Form frmTextEx 
   Caption         =   "Simple Text Extract"
   ClientHeight    =   3195
   ClientLeft      =   60
   ClientTop       =   345
   ClientWidth     =   7230
   LinkTopic       =   "Form1"
   ScaleHeight     =   3195
   ScaleWidth      =   7230
   StartUpPosition =   1  'Fenstermitte
   Begin VB.TextBox txtData 
      Height          =   1935
      Left            =   120
      Locked          =   -1  'True
      MultiLine       =   -1  'True
      ScrollBars      =   3  'Beides
      TabIndex        =   4
      ToolTipText     =   "Extracted text"
      Top             =   1080
      Width           =   6975
   End
   Begin VB.CommandButton cmdExtract 
      Caption         =   "&Extract"
      Default         =   -1  'True
      Height          =   375
      Left            =   6000
      TabIndex        =   2
      ToolTipText     =   "Start extract"
      Top             =   240
      Width           =   1095
   End
   Begin VB.TextBox txtFileName 
      Height          =   285
      Left            =   840
      OLEDropMode     =   1  'Manuell
      TabIndex        =   1
      ToolTipText     =   "Enter a file name or drag one from Windows"
      Top             =   360
      Width           =   4335
   End
   Begin VB.Label Label3 
      Caption         =   "Drag and Drop enabled"
      BeginProperty Font 
         Name            =   "Arial"
         Size            =   6.75
         Charset         =   0
         Weight          =   400
         Underline       =   0   'False
         Italic          =   0   'False
         Strikethrough   =   0   'False
      EndProperty
      Height          =   255
      Left            =   3600
      TabIndex        =   5
      Top             =   720
      Width           =   1575
   End
   Begin VB.Label Label2 
      Caption         =   "&Text"
      Height          =   255
      Left            =   120
      TabIndex        =   3
      Top             =   840
      Width           =   495
   End
   Begin VB.Label Label1 
      Caption         =   "&File:"
      Height          =   255
      Left            =   120
      TabIndex        =   0
      Top             =   360
      Width           =   495
   End
End
Attribute VB_Name = "frmTextEx"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = False
Attribute VB_PredeclaredId = True
Attribute VB_Exposed = False
Option Explicit

Dim oAnalyzer As New HTMLDocAnalyzer


Private Sub cmdExtract_Click()

    Dim sText As String
    Dim nRes As Long
    Dim oObj As HTMLAObject
    Dim i As Long
    Dim sTmp As String
    
    txtData.Text = ""

    
    
    If Len(txtFileName) = 0 Then
        Exit Sub
    End If
    
    
    oAnalyzer.ConvertCharRefs = True
    oAnalyzer.UpperCaseTagNames = True
    
    nRes = oAnalyzer.Analyze(txtFileName)
    If nRes <> 0 Then
        MsgBox "Error : " & Str(nRes) & " File: " & txtFileName
    End If
    
    For i = 1 To oAnalyzer.Count
    
        Set oObj = oAnalyzer.GetObject(i)
        
        Select Case oObj.Type
        
            Case SHTAObjectTypeText
                    sText = sText & oObj.Data
                
            Case SHTAObjectTypeEol
                    sText = sText & " "
                
            Case SHTAObjectTypeTagStart
                If IsBreakTag(oObj.Data) Then
                        sText = sText & Chr(13)
                End If
            
        End Select
        
    Next
    
    txtData.Text = CompactWhiteSpaces(sText)


End Sub


Private Sub txtFileName_OLEDragDrop(Data As DataObject, Effect As Long, Button As Integer, Shift As Integer, X As Single, Y As Single)

    If Data.GetFormat(vbCFFiles) Then
        If Data.Files.Count = 1 Then
            txtFileName = Data.Files.Item(1)
        End If
    End If

End Sub

Private Sub txtFileName_OLEDragOver(Data As DataObject, Effect As Long, Button As Integer, Shift As Integer, X As Single, Y As Single, State As Integer)

    If Data.GetFormat(vbCFFiles) Then
        Effect = vbDropEffectCopy And Effect
    Else
        Effect = vbDropEffectNone
    End If
    
End Sub

Function RemoveTabs(sData As String) As String

    Dim sTmp As String
    Dim i As Long
    Dim c As String
    
    For i = 1 To Len(sData)
        c = Mid(sData, i, 1)
        If c <> Chr(9) Then
            sTmp = sTmp + c
        End If
    Next
    
    RemoveTabs = sTmp

End Function

Function IsBreakTag(ByVal sName As String) As Boolean

    Static oDict As New Collection
    Dim vItem As Variant
    
    
    'add further names as needed
    If oDict.Count = 0 Then
        oDict.Add "A", "A"
        oDict.Add "B", "B"
        oDict.Add "BLOCKQUOTE", "BLOCKQUOTE"
        oDict.Add "BODY", "BODY"
        oDict.Add "EM", "EM"
        oDict.Add "FONT", "FONT"
        oDict.Add "HEAD", "HEAD"
        oDict.Add "HTML", "HTML"
        oDict.Add "STROKE", "STROKE"
        oDict.Add "TABLE", "TABLE"
        oDict.Add "TR", "TR"
    End If
    
On Error GoTo ErrHandler
    
    vItem = oDict.Item(sName)
    If Len(vItem) Then
        IsBreakTag = False
        Exit Function
    End If
    
ErrHandler:
       
    IsBreakTag = True

End Function

Private Function CompactWhiteSpaces(ByVal sText As String) As String

    Dim nState As Long
    Dim c As String
    Dim sOut As String
    Dim sLine As String
    Dim i As Long

    If Len(sText) = 0 Then
       Exit Function
    End If
    
    i = 1
    nState = 0
    
    While i <= Len(sText)

        c = Mid(sText, i, 1)
        
        Select Case nState
            Case 0
                If Not (c = " " Or c = Chr(9) Or c = Chr(160) Or c = Chr(13)) Then
                    sLine = sLine + c
                    nState = 1
                ElseIf c = Chr(13) Then '
                    sOut = sOut + sLine + vbCrLf '
                    sLine = ""   '
                End If
                i = i + 1
                   
            Case 1
                
                If c = " " Or c = Chr(9) Or c = Chr(160) Then
                    sLine = sLine + " "
                    nState = 0
                ElseIf c = Chr(13) Then
                    If Len(sLine) Then
                        sOut = sOut + sLine + vbCrLf
                        sLine = ""
                    End If
                    nState = 0
                Else
                    sLine = sLine + c
                End If
                i = i + 1
                
        End Select

    Wend
    
    If Len(sLine) Then
        sOut = sOut + sLine
    End If
    
    CompactWhiteSpaces = sOut

End Function
