fb:porticula NoPaste
Wortzaehler
Uploader: | grindstone |
Datum/Zeit: | 19.08.2014 12:52:09 |
#define NULL 0
#define recordLen SizeOf(tNode)
Const skipChars = 32
Const treeSubNodeCount = 256 - skipChars
Const printInterval = 750
Enum EState
undefined
word
End Enum
Type tNode
subNodes(1 To treeSubNodeCount) As tNode Ptr
count As UInteger = 0
End Type
Declare Function isSplittingChar (char As UByte) As Integer
Declare Sub putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer)
Declare Sub traverseTree (node As tNode Ptr, index As Ubyte, path As String)
Declare Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer
Declare Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer
Declare Sub deallocateTree (tree() As tNode Ptr)
Declare Sub deallocateNodeAndSubNodes (node As tNode Ptr)
Dim As UByte char, c
Dim As EState state = EState.undefined
Dim As UInteger numWords = 0
Dim As UInteger currentWordCharCount = 0, totalCharCount = 0
Dim As String buffer = ""
Dim As Double tStart, tEnd
Dim As String text
Dim As Integer anfang, ende
Dim tree(1 To treeSubNodeCount) As tNode Ptr
For i As Integer = LBound(tree) To UBound(tree)
tree(i) = CAllocate(recordLen)
Next i
tStart = TIMER
'Open ExePath + "/debian-reference.de.txt" For Binary As #1
Open ExePath + "/debianreferenz.txt" For Binary As #1
text = Input (Lof(1),1) 'datei einlesen
Close 1
Do 'text abarbeiten
Do 'nächsten wortanfang suchen
anfang += 1
Loop Until (isSplittingChar(text[anfang]) = 0) Or (anfang > Len(text))
ende = anfang
Do 'nächstes wortende suchen
ende += 1
Loop Until (isSplittingChar(text[ende]) <> 0) Or (ende >= Len(text))
buffer = Mid(text,anfang + 1, ende - anfang) 'wort aus text holen
totalCharCount += ende - anfang
If buffer = "" Then
Exit Do
EndIf
anfang = ende 'zeiger für nächste suche setzen
numWords += 1
putCharsIntoTree(tree(buffer[0]-skipChars), buffer, 0)
Loop Until ende >= Len(text)
text = "" 'speicher freigeben
tEnd = TIMER
Locate 1,1
Print "Datei eingelesen in ";
Print Using "##.###"; (tEnd-tStart);
Print " Sekunden."
Print
Dim As Double averageLength = 0
If (numWords > 0) Then
averageLength = (totalCharCount / CDbl(numWords))
End If
Print "Es wurden " & numWords & " Woerter gezaehlt. "
Print "Die durchschnittliche Wortlaenge betrug ";
Print Using "##.### "; averageLength;
Print "Buchstaben."
Print
'Ganzen Baum ausgeben:
'Print "Haeufigkeiten:"
'For i As Integer = 1 To treeSubNodeCount
' traverseTree(tree(i), i, Chr(skipChars+i))
'Next i
'Print
Print "Gezielter Lookup von Woertern:"
Dim lookupWords(1 To ...) As String = { "Shell", "Editor", "der", "die", "das" }
tStart = Timer
For i As Integer = LBound(lookupWords) To UBound(lookupWords)
Print "Wie oft kommt " & chr(34) & lookupWords(i) & chr(34) & " vor? " & lookupWordCount(tree(), lookupWords(i)) & "x"
Next i
tEnd = Timer
Print
Print "Das Nachschlagen der Haeufigkeiten dauerte ";
Print Using "##.###"; (tEnd-tStart);
Print " Sekunden."
Print
Print "Abbau des Baums aus dem Speicher... ";
deallocateTree(tree())
Print "Fertig."
Print
Print "Druecken Sie eine beliebige Taste zum Beenden."
GetKey
End 0
'Ermittelt, ob das uebergebene Zeichen eines ist, das das Ende
'eines Wortes kennzeichnet (zum Beispiel ein Leerzeichen, ein
'Zeilenumbruch oder ein Komma).
Function isSplittingChar (char As UByte) As Integer
' , . : ; /
Return ((char < 33) Or (char = 44) Or (char = 46) Or (char = 58) Or (char = 59) Or (char = 47))
End Function
Sub putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer)
Dim As UByte c = buffer[index]
If ((index+1) >= Len(buffer)) Then
' Wort endet hier in diesem Knoten
node->count += 1
Else
c = buffer[index+1]
If (node->subNodes(c-skipChars) = NULL) Then
node->subNodes(c-skipChars) = CAllocate(recordLen)
End if
putCharsIntoTree(node->subNodes(c-skipChars), buffer, index+1)
End If
End Sub
Sub traverseTree (node As tNode Ptr, index As Ubyte, path As String)
If (node = NULL) Then Return
If (node->count > 0) Then
Print path & " => " & node->count & " x"
End If
For i As Integer = 1 To treeSubNodeCount
If (node->subNodes(i) <> NULL) Then
traverseTree (node->subNodes(i), i, path + Chr(i+skipChars))
End If
Next i
End Sub
Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer
If (Len(word) < 1) Then Return 0
Return lookupCharacterCount(tree(word[0]-skipChars), word, 0)
End Function
Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer
If ((index+1) >= Len(word)) Then
Return node->count
Else
Dim As UByte nextChar = word[index+1]
Return lookupCharacterCount (node->subNodes(nextChar-skipChars), word, index+1)
End If
End Function
Sub deallocateTree (tree() As tNode Ptr)
For i As Integer = 1 To treeSubNodeCount
deallocateNodeAndSubNodes(tree(i))
DeAllocate tree(i)
Next i
End Sub
Sub deallocateNodeAndSubNodes (node As tNode Ptr)
If (node = NULL) Then Return
deallocateTree(node->subNodes())
End Sub