Buchempfehlung
Windows-Programmierung. Das Entwicklerhandbuch zur WIN32-API
Windows-Programmierung. Das Entwicklerhandbuch zur WIN32-API
"Der" Petzold, das über 1000 Seiten starke Standardwerk zum Win32-API - besonders nützlich u. a. bei der GUI-Programmierung in FreeBASIC! [Mehr Infos...]
FreeBASIC-Chat
Es sind Benutzer im FreeBASIC-Chat online.
(Stand:  )
FreeBASIC bei Twitter
Twitter FreeBASIC-Nachrichten jetzt auch über Twitter erhalten. Follow us!

Code-Beispiel

Code-Beispiele » Suchen und Sortieren

Schneller Wortzähler (Tree)

Lizenz:Erster Autor:Letzte Bearbeitung:
GPLMitgliedgrindstone 02.09.2014

Dieses Programm liest einen oder mehrere Texte ein und erzeugt aus den Wörtern eine Baumstruktur, die entsprechend ausgewertet werden kann (siehe dazu die Beispiele im Programm).

Die Entstehungsgeschichte des Programms und die Diskussion dazu kann Externer Link!hier nachgelesen werden.

In der hier vorgestellten Ausführung stellt das Programm eine Verbindung zur deutschen Wikipedia-Site her und nutzt die dortige Funktion "Zufälliger Artikel", um laufend Texte herunterzuladen und zu verarbeiten.

Zur Bedienung: Mit den Zifferntasten 1 - 8 wird die Art der Darstellung gewählt, mit + und - lässt sich zwischen 640 x 480 und 1200 x 800 Pixeln umschalten, ein Druck auf "d" schreibt die komplette Wörterliste in eine Datei im Programmverzeichnis und Esc beendet das Programm.

'Die Routinen zum Erzeugen und Durchsuchen des Baumes wurden von Sebastian geschrieben
' (http://users.freebasic-portal.de/sebastian/fb/wordcount/wordcount_tree.bas), die
' Programmteile zur Verbindung mit dem Internet stammen aus dem Codebeispiel
' "Websites selbst verarbeiten" von PMedia
' (http://www.freebasic-portal.de/code-beispiele/internet-netzwerke/websites-selbst-verarbeiten-105.html)

#define NULL 0
#define recordLen SizeOf(tNode)
#Ifndef recvbufflen
#define RECVBUFFLEN 16384
#EndIf
#Ifndef newline
#define newline Chr(13,10)
#EndIf

#Include Once "win/winsock2.bi"

Const skipChars = 32
Const treeSubNodeCount = 256 - skipChars
Const printInterval = 750

Const As Integer schwarz = RGB(0,0,0), rot = RGB(255,0,0), gruen = RGB(0,255,0), _
                 blau = RGB(0,0,255), cyan = RGB(0,255,255), magenta = RGB(255,0,255), _
                 gelb = RGB(255,255,0), weiss = RGB(255,255,255)

Type tNode
    subNodes(1 To treeSubNodeCount) As tNode Ptr
    count As UInteger = 0
End Type

Type tSample
    ratio As Single
    words As UInteger
    nodes As UInteger
End Type

Dim As tSample sample
Dim As tSample Ptr samplePtr

Declare Function isSplittingChar (char As UByte) As Integer
Declare Function putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer) As Integer
Declare Sub traverseTree (node As tNode Ptr, index As UByte, path As String, pluginPointer As Any Ptr = 0)
Declare Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer
Declare Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer
Declare Sub deallocateTree (tree() As tNode Ptr)
Declare Sub deallocateNodeAndSubNodes (node As tNode Ptr)
Declare Sub auswertung1(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
Declare Sub auswertung2(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
Declare Sub auswertung3(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
Declare Sub auswertung4(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
Declare Sub auswertung5(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
Declare Sub auswertung6(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
Declare Sub InitWinsock
Declare Sub ExitWinsock
Declare Function httpget(server As String, path As String, hadd As String = "") As String
Declare Function httppost(server As String, path As String, toPost As String, hadd As String = "") As String
Declare Function httperror(text As String) As Integer
Declare Function timeFormat (sekunden As Double, stellen As Integer = 0) As String
Declare Sub ausgabe(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)

Declare Function extractPlainText(text As String) As String
Declare Function utf8ToAnsi(text As String) As String


Dim As UByte char, c
Dim As UInteger numWords = 0, currentWordCharCount = 0, totalCharCount = 0, newWords, _
                words, wordsMax, nodes, nodesMax, wScale, nScale, rScale, siteCount, _
                totalNodeCount = treeSubNodeCount, newWordCount, newNodes, wordBegin, wordEnd
Dim Shared As Integer scrWidth, scrHeight
Dim As ULongInt totalInputSize, sampleTrigger
Dim As String buffer = "", domain, address, g, exposition = "1", text, textm
Dim As String samples = String(SizeOf(tSample),Chr(0))
Dim As Double tStart, tEnd, totalInputTime, startTime, averageLength
Dim As Single xwert, ywert, ratioMax, ratioMaxTemp, ratioAct

Dim tree(1 To treeSubNodeCount) As tNode Ptr


'******* URLDownloadToFile einbinden ****************************
Dim URLDownloadToFile As Function (_
ByVal pCaller As Long, _
ByVal szURL As ZString Ptr, _
ByVal szFileName As ZString Ptr, _
ByVal dwReserved As Long, _
ByVal lpfnCB As Long) As Long

Dim library As Any Ptr

library = DylibLoad( "urlmon.dll" )
URLDownloadToFile = DyLibSymbol( library, "URLDownloadToFileA" )
'*****************************************************************

For i As Integer = LBound(tree) To UBound(tree)
    tree(i) = Callocate(recordLen)
Next i


Dim As String inputFiles(1 To ...) = { "d:\internet\bibel\altes_testament_luther_revidiert.txt", _
                                       "d:\internet\bibel\neues_testament_luther_revidiert.txt", _
                                        ExePath + "\debianreferenz.txt" }

'Dim As String inputFiles(1 To ...) = { ExePath + "\debianreferenz.txt", _
'                                       "d:\internet\bibel\neues_testament_luther_revidiert.txt", _
'                                       "d:\internet\bibel\altes_testament_luther_revidiert.txt"  }

'domain = "de.wikipedia.org"
'address = "wiki/Spezial:Zuf%C3%A4llige_Seite"
'domain = "en.wikipedia.org"
'address = "wiki/Special:Random"
'textm = httpget(domain,address) + " "
'Open ExePath + "\wiki.txt" For Output As #1
'Print #1, textm
'Close
'Sleep
'End

startTime = Timer

'ScreenRes 1200,800,16
ScreenRes 640,480,16
ScreenInfo scrWidth,scrHeight
Width scrWidth/8,scrHeight/8

Do
    'eingangsdaten von wikipedia holen
    domain = "de.wikipedia.org"
    address = "wiki/Spezial:Zuf%C3%A4llige_Seite"
    g = "Location: http://de.wikipedia.org/wiki/" 'kennung

    'domain = "en.wikipedia.org"
    'address = "wiki/Special:Random"
    'g = "Location: http://en.wikipedia.org/wiki/" 'kennung

    text = httpget(domain,address) 'zufällige seite holen

    'link auf zufällige seite isolieren
    wordBegin = InStr(text,g) 'pointer auf anfang
    wordEnd = InStr(wordBegin + Len(g),text,Chr(13,10)) 'pointer auf ende
    address = Mid(text,wordBegin,wordEnd-wordBegin) 'link holen
    address = Mid(address,35) ' präfix ("Location: http://de.wikipedia.org/") abtrennen
    text = httpget(domain,address) ' <zufällige> website herunterladen
    siteCount += 1

    tStart = Timer
    text = extractPlainText(text)
    If text = "" Then 'fehler
        Continue Do 'nächste site
    EndIf

    text = LCase(text) 'umwandeln in kleinbuchstaben

    ''themenbereich eingrenzn
    'Dim themeWords(1 To ...) As String = { "computer", "freebasic", "website", "internet" }
    '
    'For x As Integer = 1 To UBound(themeWords)
    '   If InStr(text,themeWords(x)) Then
    '       Exit For
    '   EndIf
    '   Continue Do
    'Next

    ''website in datei schreiben
    'Open ExePath + "\wiki.txt" For Output As #1
    'Print #1, text
    'Close

    wordBegin = -1
    Do 'text abarbeiten
        Do 'nächsten wortanfang suchen
            wordBegin += 1
        Loop Until (isSplittingChar(text[wordBegin]) = 0) Or (wordBegin > Len(text))
        wordEnd = wordBegin
        Do 'nächstes wortende suchen
            wordEnd += 1
        Loop Until (isSplittingChar(text[wordEnd]) <> 0)
        buffer = Mid(text,wordBegin + 1, wordEnd - wordBegin) 'wort aus text holen
        'buffer = LCase(buffer)
        totalCharCount += wordEnd - wordBegin 'buchstabenzähler aktualisieren
        wordBegin = wordEnd 'zeiger für nächste suche setzen

        If buffer = "" Then 'nächstes wort
            Continue Do
        EndIf

        ' wörter ausfiltern
        'If (InStr(buffer, Any "0123456789""")) Or (Len(buffer) > 50) Then 'wort auslassen
        '   Continue Do
        'EndIf
        'Select Case buffer[0]
        '   Case Asc("a") To Asc("z"),Asc("A") To Asc("Z"),Asc("ä"),Asc("ö"),Asc("ü"),Asc("Ä"),Asc("Ö"),Asc("Ü") 'nur wörter, die mit buchstaben beginnen
        '
        '   Case Else
        '       Continue Do
        'End Select

        totalCharCount += wordEnd - wordBegin 'buchstabenzähler aktualisieren

        numWords += 1 'wortzähler erhöhen
        newNodes = putCharsIntoTree(tree(buffer[0]-skipChars), buffer, 0)
        totalNodeCount += newNodes 'knotenzähler aktualisieren
        If newNodes Then 'neues Wort gefunden
            newWordCount += 1 'zähler für "unterschiedliche wörter" aktualisieren
        EndIf

        'für graph
        If totalInputSize + wordBegin >= sampleTrigger + 1000 Then 'schnappschuss alle 1000 zeichen
            samples += Mks(CSng(newWordCount/numWords)) + Mki(numWords) + Mki(totalNodeCount)
            sampleTrigger = totalInputSize + wordBegin
        EndIf
    Loop Until wordEnd >= Len(text) 'bis text abgearbeitet ist

    totalInputSize += Len(text) 'zähler für insgesamt eingelesene daten aktualisieren
    tEnd = Timer
    totalInputTime += (tEnd - tStart)

    'darstellung
    ScreenLock
    View Print
    Cls

    'textblock
    Print "                       Gesamtzeit: ";timeFormat(Timer - startTime,2)
    Print "Gesamtzeit zum Einlesen der Daten: ";timeFormat(totalInputTime,2)
    Print
    Print "                Anzahl der Knoten: ";totalNodeCount
    g = Str((totalNodeCount * recordLen)/1024^2)
    g = Left(g,InStr(g,".")+1)
    Print "                Belegter Speicher: ";g;" MB"
    Print

    If (numWords > 0) Then
        averageLength = (totalCharCount / CDbl(numWords))
    End If
    Print "                Woerter insgesamt: " & numWords
    Print "         Unterschiedliche Woerter: " & newWordCount
    Print "     Durchschnittliche Wortlaenge:";
    Print Using "##.### "; averageLength;
    Print "Buchstaben."
    Print
    Print "Eingelesene Website (";siteCount;"): ";address
    Locate 14,1

    Select Case exposition 'gewählte art der auswertung
        Case "1" 'graphische darstellung
            Color cyan,schwarz
            Print " 1) Graphische Darstellung:"
            Color weiss,schwarz
            Line (10,scrHeight-10)-(10,170)
            Line (10,scrHeight-10)-(scrWidth-10,scrHeight-10)
            'darstellung vorbereiten
            'maximalwerte
            samplePtr = Cast(tSample Ptr,StrPtr(samples) + Len(samples) - SizeOf(tSample)) 'pointer auf letzten samplewert
            wordsMax = samplePtr->words
            nodesMax = samplePtr->nodes
            ratioAct = samplePtr->ratio

            wScale = wordsMax / (scrHeight - 200) 'maßstab für wörter
            nScale = nodesMax / (scrHeight - 200) 'maßstab für speicherbelegung
            rScale = (scrHeight - 200) / ratioMax 'maßstab für wortverhältnis

            'graphen auf bildschirm schreiben
            wordBegin = 0
            ratioMaxTemp = 0
            For y As Integer = 0 To Len(samples) - 1 Step SizeOf(tSample) 'alle samples
                With *Cast(tSample Ptr,StrPtr(samples) + y) 'pointer auf sample
                    If .ratio > ratioMaxTemp Then 'größtes verhältnis ermitteln
                        ratioMaxTemp = .ratio
                    EndIf
                    xwert = 10 + (scrWidth - 20) * y / Len(samples)
                    ywert = (scrHeight - 10) - .nodes / nScale

                    ' graphen schreiben
                    PSet (xwert,ywert) 'speicherbelegung
                    PSet (xwert,scrHeight - 10 - .words / wScale),gelb 'wörter gesamt
                    PSet (xwert,scrHeight - 10 - Int(.ratio * rScale)),cyan 'unterschiedliche wörter
                End With
            Next
            ratioMax = ratioMaxTemp 'größtes verhältnis merken

            'beschriftung
            Line (10,ywert)-(scrWidth - 10,ywert),weiss,,&b0000000000000011
            g = Str((totalNodeCount * recordLen) / 1024^2)
            g = Left(g,InStr(g,".") + 1)
            Draw String (30,ywert - 10),g + " " + "MB" 'speicherbedarf
            Draw String (30,ywert + 5),Str(wordsMax),gelb 'wörter gesamt
            g = Str(100 * ratioAct )
            g = Left(g,InStr(g,".") + 1) + "%"
            Draw String (scrWidth - 50,scrHeight - 30 - ratioAct * rScale),g,cyan 'wortverhältnis

            'legende
            Draw String (scrWidth / 2,scrHeight - 50),"_____ Speicher",weiss
            Draw String (scrWidth / 2,scrHeight - 40),"_____ Woerter gesamt",gelb
            Draw String (scrWidth / 2,scrHeight - 30),"_____ Rel. verschiedene",cyan

        Case "2" 'gezielt nach wörtern suchen
            Color cyan,schwarz
            Print " 2) Gezielter Lookup von Woertern:"
            Color weiss,schwarz
            Locate CsrLin + 3,1

            Dim lookupWords(1 To ...) As String = { "Computernerd", "freeBasic", "der", "die", "das", _
                                                    "und", "in", "Klingone", "Rhabarber", "Haus" }

            tStart = Timer
            For i As Integer = LBound(lookupWords) To UBound(lookupWords)
                Locate CsrLin,5
                Print "Wie oft kommt ";
                Color gelb,schwarz
                Print lookupWords(i);
                Color weiss,schwarz
                Print " vor? ";
                Color rot,schwarz
                Print lookupWordCount(tree(), LCase(lookupWords(i)));
                Color weiss,schwarz
                Print "x"
            Next i
            tEnd = Timer
            Locate CsrLin + 1,5
            Print "Das Nachschlagen der Haeufigkeiten dauerte ";
            Print Using "##.######"; (tEnd-tStart);
            Print " Sekunden."

        Case "3" 'häufigste und längste wörter
            Color cyan,schwarz
            Print " 3) Die 20 haeufigsten Woerter:"
            Color weiss,schwarz
            View Print CsrLin + 3 To scrHeight/8

            tStart = Timer 'zeitmessung starten
            For i As Integer = 1 To treeSubNodeCount 'alle knoten durchgehen
                traverseTree(tree(i), i, Chr(skipChars + i),@auswertung1)
            Next i
            tEnd = Timer 'zeitmessung stoppen
            Locate CsrLin + 3,5
            Print "Die Auswertung der ";newWordCount;" Woerter dauerte ";
            Print Using "##.###"; (tEnd-tStart);
            Print " Sekunden."

        Case "4" '3. buchstabe "a"
            Color cyan,schwarz
            Print " 4) Alle Woerter, deren 3. Buchstabe ein ""a"" ist:"
            Color weiss,schwarz
            View Print CsrLin + 3 To scrHeight/8

            tStart = Timer
            For i As Integer = 1 To treeSubNodeCount
                traverseTree(tree(i), i, Chr(skipChars + i),@auswertung2)
            Next i
            tEnd = Timer
            Locate CsrLin + 1,5
            Print "Die Auswertung der ";newWordCount;" Woerter dauerte ";
            Print Using "##.###"; (tEnd-tStart);
            Print " Sekunden."

        Case "5" 'alle wörter mit "auto"
            Color cyan,schwarz
            Print " 5) Alle Woerter, in denen die Zeichenfolge ""auto"" vorkommt:"
            Color weiss,schwarz
            View Print CsrLin + 3 To scrHeight/8

            tStart = Timer
            For i As Integer = 1 To treeSubNodeCount
                traverseTree(tree(i), i, Chr(skipChars + i),@auswertung3)
            Next i
            tEnd = Timer
            Locate CsrLin + 1,5
            Print "Die Auswertung der ";newWordCount;" Woerter dauerte ";
            Print Using "##.###"; (tEnd-tStart);
            Print " Sekunden."

        Case "6" 'visualisierung --> baum als punkte
            Color cyan,schwarz
            Print " 6) Visualisierung des kompletten Baums als Punktefeld (1 Knoten = 1 Punkt)"
            Color weiss,schwarz

            For i As Integer = 1 To treeSubNodeCount
                traverseTree(tree(i), i, Chr(skipChars + i),@auswertung4)
            Next i

        Case "7" 'visualisierung --> anfangsbuchstaben
            Color cyan,schwarz
            Print " 7) Visualisierung des kompletten Baums als Punktefeld mit Anfangsbuchstaben"
            Color weiss,schwarz

            For i As Integer = 1 To treeSubNodeCount
                traverseTree(tree(i), i, Chr(skipChars + i),@auswertung5)
            Next i

        Case "8" 'visualisierung --> anfangsbuchstaben als weisse punkte / baum als punkte
            Color cyan,schwarz
            Print " 8) Baum als Punktefeld mit weissen Punkten am Wortanfang"
            Color weiss,schwarz

            For i As Integer = 1 To treeSubNodeCount
                traverseTree(tree(i), i, Chr(skipChars + i),@auswertung6)
            Next i

    End Select
    ScreenUnLock

    'tastaturabfrage
    g = InKey
    Select Case g
        Case "1","2","3","4","5","6","7","8" 'darstellung auswählen
            exposition = g
        Case "d" 'gesamten baum in datei schreiben
            Open ExePath + "\wiki.txt" For Output As #1
            For i As Integer = 1 To treeSubNodeCount
                traverseTree(tree(i), i, Chr(skipChars+i),@ausgabe)
            Next i
            Close 1
        Case "+" 'grosses fenster
            ScreenRes 1200,800,16
            ScreenInfo scrWidth,scrHeight
            Width scrWidth/8,scrHeight/8
        Case "-" 'kleines fenster
            ScreenRes 640,480,16
            ScreenInfo scrWidth,scrHeight
            Width scrWidth/8,scrHeight/8
        Case Chr(27) 'esc
            Exit Do 'programm beenden
    End Select
Loop

View Print 13 To scrHeight/8
Cls 2

Print
Print "Abbau des Baums aus dem Speicher...  ";
deallocateTree(tree())
Print "Fertig."

End 0


'Ermittelt, ob das uebergebene Zeichen eines ist, das das Ende
'eines Wortes kennzeichnet (zum Beispiel ein Leerzeichen, ein
'Zeilenumbruch oder ein Komma).
Function isSplittingChar (char As UByte) As Integer
    '                               ,              .              :              ;              /
    Return ((char < 33) Or (char = 44) Or (char = 46) Or (char = 58) Or (char = 59) Or _
            (char = 47) Or (char = Asc("-") Or (char = Asc("+"))))
End Function

Function putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer) As Integer
    Dim As UByte c = buffer[index]
    Dim As Integer newNodes = 0
    If ((index+1) >= Len(buffer)) Then
        ' Wort endet hier in diesem Knoten
        node->count += 1
    Else
        c = buffer[index+1]
        If (node->subNodes(c-skipChars) = NULL) Then
            node->subNodes(c-skipChars) = Callocate(recordLen)
            newNodes += 1
        End If
        newNodes += putCharsIntoTree(node->subNodes(c-skipChars), buffer, index+1)
    End If
    Return newNodes
End Function

Sub traverseTree (node As tNode Ptr, index As UByte, path As String, pluginPointer As Any Ptr = 0)
    'ruft nacheinander alle knoten des baums auf
    Dim Plugin As Sub (text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)

    'Print #1, node;" ";index;" ";path;" ";pluginPointer
    If (node = NULL) And (index <> 0) Then Return
    If pluginPointer Then 'plugin vorhanden
        Plugin = pluginPointer
        Plugin(path,node->count,node,index) 'plugin aufrufen
    Else
        If (node->count > 0) Then
        Print path & " => " & node->count & " x"
        End If
    End If

    For i As Integer = 1 To treeSubNodeCount 'rekursiver aufruf aller knoten, auf die der aktuelle knoten zeigt
        If (node->subNodes(i) <> NULL) Then
            traverseTree (node->subNodes(i), i, path + Chr(i+skipChars),pluginPointer)
        End If
    Next i
End Sub

Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer
    If (Len(word) < 1) Then Return 0
    Return lookupCharacterCount(tree(word[0]-skipChars), word, 0) 'pointer auf 1. knoten
End Function

Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer
    Dim As String wd = word

    If ((index+1) >= Len(wd)) Then
        Return node->count
    Else
      Dim As UByte nextChar = wd[index+1] 'nächster buchstabe
      Dim As tNode Ptr nextNode = node->subNodes(nextChar-skipChars) 'knoten für nächsten buchstaben
      If (nextNode = NULL) Then Return 0 'Der Baum geht hier nicht mehr weiter, aber das Wort wurde bisher nicht gefunden.
      Return lookupCharacterCount (nextNode, wd, index+1)
    End If
End Function

Sub deallocateTree (tree() As tNode Ptr)
    For i As Integer = 1 To treeSubNodeCount
        deallocateNodeAndSubNodes(tree(i))
        DeAllocate tree(i)
    Next i
End Sub

Sub deallocateNodeAndSubNodes (node As tNode Ptr)
    If (node = NULL) Then Return
    deallocateTree(node->subNodes())
End Sub

Sub InitWinsock Constructor
    'Autor: PMedia http://www.freebasic-portal.de/code-beispiele/internet-netzwerke/websites-selbst-verarbeiten-105.html
    ' init winsock
    Dim wsaData As WSAData
    If( WSAStartup( MAKEWORD( 1, 1 ), @wsaData ) <> 0 ) Then
        Print "Error: WSAStartup failed"
        End 1
    End If

End Sub

Sub ExitWinsock Destructor

    WSACleanup

End Sub

Function httpget(server As String, path As String, hadd As String = "") As String
    Dim IP As Integer
    Dim ia As in_addr
    Dim s As SOCKET
    Dim hostentry As hostent Ptr
    Dim sendbuffer As String
    Dim recvbuffer As ZString * RECVBUFFLEN+1
    Dim bytes As Integer
    Dim sa As sockaddr_in
    Dim in As String
    ia.S_addr = inet_addr( server )
    If ( ia.S_addr = INADDR_NONE ) Then
        hostentry = gethostbyname( server )
        If ( hostentry = 0 ) Then
            Return "Error: IP couldn't be resolved!"
        End If
        IP = *Cast( Integer Ptr, *hostentry->h_addr_list )
    Else
        IP = ia.S_addr
    End If
    s = OpenSocket( AF_INET, SOCK_STREAM, IPPROTO_TCP )
    If( s = 0 ) Then
        Return "Error: Socket couldn't be opened."
    End If
    sa.sin_port         = htons( 80 )
    sa.sin_family       = AF_INET
    sa.sin_addr.S_addr  = ip
    If ( connect( s, Cast( PSOCKADDR, @sa ), Len( sa )) = SOCKET_ERROR ) Then
        closesocket( s )
        Return "Error: Couldn't connect to host"
    End If
    sendBuffer = "GET /" + path + " HTTP/1.0" + NEWLINE + _
    "Host: " + server + NEWLINE + _
    "Connection: close" + NEWLINE + _
    hadd + _
    NEWLINE
    If( send( s, sendBuffer, Len( sendBuffer ), 0 ) = SOCKET_ERROR ) Then
        closesocket( s )
        Return "Error: Couldn't send request"
    End If
    Do
        bytes = recv( s, recvBuffer, RECVBUFFLEN, 0 )
        If( bytes <= 0 ) Then
            Exit Do
        End If
        recvbuffer[bytes] = 0
        in += recvbuffer
    Loop
    shutdown( s, 2 )
    closesocket( s )
    Return in
End Function

Function httperror(text As String) As Integer
    If Left(text,7) = "Error: " Then
        Return -1
    Else
        Return 0
    EndIf
End Function

Function httppost(server As String, path As String, toPost As String, hadd As String = "") As String
    Dim IP As Integer
    Dim ia As in_addr
    Dim s As SOCKET
    Dim hostentry As hostent Ptr
    Dim sendbuffer As String
    Dim recvbuffer As ZString * RECVBUFFLEN+1
    Dim bytes As Integer
    Dim sa As sockaddr_in
    Dim in As String

    ia.S_addr = inet_addr( server )
    If ( ia.S_addr = INADDR_NONE ) Then
        hostentry = gethostbyname( server )
        If ( hostentry = 0 ) Then
            Return "IP couldn't be resolved!"
        End If
        IP = *Cast( Integer Ptr, *hostentry->h_addr_list )
    Else
        IP = ia.S_addr
    End If
    s = OpenSocket( AF_INET, SOCK_STREAM, IPPROTO_TCP )
    If( s = 0 ) Then
        Return "Socket couldn't be opened."
    End If
    sa.sin_port         = htons( 80 )
    sa.sin_family       = AF_INET
    sa.sin_addr.S_addr  = ip
    If ( connect( s, Cast( PSOCKADDR, @sa ), Len( sa )) = SOCKET_ERROR ) Then
        closesocket( s )
        Return "Couldn't connect to host"
    End If
    sendBuffer = "POST /" + path + " HTTP/1.0" + NEWLINE + _
    "Host: " + server + NEWLINE + _
    "Content-Type: application/x-www-form-urlencoded" + NEWLINE + _
    "Content-Length: " + Str(Len(toPost)) + NEWLINE + _
    "Connection: close" + NEWLINE + _
    hadd + _
    NEWLINE + _
    toPost + NEWLINE
    If( send( s, sendBuffer, Len( sendBuffer ), 0 ) = SOCKET_ERROR ) Then
        closesocket( s )
        Return "Couldn't send request"
    End If
    Do
        bytes = recv( s, recvBuffer, RECVBUFFLEN, 0 )
        If( bytes <= 0 ) Then
            Exit Do
        End If
        recvbuffer[bytes] = 0
        in += recvbuffer
    Loop
    shutdown( s, 2 )
    closesocket( s )
    Return in
End Function

Function timeFormat (sekunden As Double, stellen As Integer = 0) As String
  Dim As Integer minuten, stunden
  Dim As String zeit

  stunden = Int(sekunden / 3600)
  sekunden -= (stunden * 3600)
  minuten = Int(sekunden / 60)
  sekunden -= (minuten * 60)

  zeit = Str(stunden) + ":" + Right("0" + Str(minuten),2) + ":" + Mid("0" + Str(sekunden),InStr("0" + Str(sekunden),".") - 2)
  If stellen Then
    zeit = Left(zeit, InStr(zeit,".") + stellen)
  EndIf
  Return zeit

End Function

Sub auswertung1(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
    'häufigste wörter und längstes wort suchen

    Static As Integer vorkommen(21), laenge
    Static As String wort(21), lwort

    If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset
        For x As Integer = 1 To UBound(wort)
            vorkommen(x) = 0
            wort (x) = ""
        Next
        laenge = 0
        lwort = ""
    EndIf

    If Len(text) > 1 Then
        vorkommen(UBound(wort)) = count
        wort(UBound(wort)) = text

        For x As Integer = UBound(wort) To 2 Step -1
            If vorkommen(x) > vorkommen(x - 1) Then
                Swap vorkommen(x),vorkommen(x - 1)
                Swap wort(x),wort(x - 1)
            Else
                Exit For
            EndIf
        Next

        If Len(text) > laenge Then
            lwort = text
            laenge = Len(text)
        EndIf
    EndIf

    If index = treeSubnodeCount Then 'letzter aufruf --> ergebnis ausgeben
        For x As Integer = 1 To UBound(wort) - 1
            Locate CsrLin,5
            Print x;" ";wort(x);
            Locate CsrLin,(Pos + 5)-(Pos + 10)Mod 5
            Print vorkommen(x);" x                                  "
        Next
        Print
        Print
        Print
        Print "     Das laengste Wort (";laenge;" Zeichen ) ist: "
        Print
        Print lwort
    EndIf

End Sub

Sub auswertung2(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
    'alle wörter mit 3. buchstaben "a"

    Static As Integer zaehler

    If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset
        zaehler = 0
    EndIf

    If (count > 0) And (Len(text) >= 3) And (text[2] = Asc("a")) Then
        zaehler += 1
        Print zaehler;" ";text
    EndIf

End Sub

Sub auswertung3(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
    'alle wörter, in denen "auto" vorkommt

    Static As Integer zaehler

    If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset
        zaehler = 0
    EndIf

    If (count > 0) And InStr(text,"auto") Then
        zaehler += 1
        Print zaehler;" ";text
    EndIf

End Sub

Sub auswertung4(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
    'baum als punktefeld darstellen

    Static As Integer nodenr
    Dim As Integer h, b, pc
    Dim As UShort c

    If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset
        nodenr = 0
    EndIf

    ScreenInfo b,h
    nodenr += 1
    For x As Integer = 1 To treeSubNodeCount
        If node->subNodes(x) <> 0 Then
            pc += 1
        EndIf
    Next
    c = LoWord(node->count)
    c = c * c
    PSet((nodenr Mod b)+1,h-Int(nodenr/b)-1),RGB(HiByte(c),LoByte(c),LoByte(pc*pc))

End Sub


Sub auswertung5(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
    'baum als punktefeld mit anfangsbuchstaben darstellen

    Static As Integer nodenr
    Dim As Integer h, b, pc
    Dim As UShort c

    If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset
        nodenr = 0
    EndIf

    ScreenInfo b,h
    nodenr += 1
    For x As Integer = 1 To treeSubNodeCount
        If node->subNodes(x) <> 0 Then
            pc += 1
        EndIf
    Next
    c = LoWord(node->count)
    c = c * c
    If Len(text) = 1 Then
        Draw String((nodenr Mod b)+1,h-Int(nodenr/b)-10),Chr(text[0] + skipChars),RGB(255,255,255)
    Else
        PSet((nodenr Mod b)+1,h-Int(nodenr/b)-10),RGB(HiByte(c),LoByte(c),LoByte(pc*pc))
    EndIf

End Sub

Sub auswertung6(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
    'baum als punktefeld mit weissen punkten am wortanfang

    Static As Integer nodenr
    Dim As Integer h, b, pc
    Dim As UShort c

    If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset
        nodenr = 0
    EndIf

    ScreenInfo b,h
    nodenr += 1
    For x As Integer = 1 To treeSubNodeCount
        If node->subNodes(x) <> 0 Then
            pc += 1
        EndIf
    Next
    c = LoWord(node->count)
    c = c * c
    If Len(text) = 1 Then
        PSet((nodenr Mod b)+1,h-Int(nodenr/b)-1),RGB(255,255,255)
        PSet((nodenr Mod b)+0,h-Int(nodenr/b)-1),RGB(255,255,255)
        PSet((nodenr Mod b)+1,h-Int(nodenr/b)-0),RGB(255,255,255)
        PSet((nodenr Mod b)+0,h-Int(nodenr/b)-0),RGB(255,255,255)
    Else
        PSet((nodenr Mod b)+1,h-Int(nodenr/b)-1),RGB(HiByte(c),LoByte(c),LoByte(pc*pc))
    EndIf

End Sub

Sub ausgabe(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
    'wörterliste in datei schreiben

    Print #1, text;" ";count

End Sub

Function extractPlainText(text As String) As String
    'normalen text aus website extrahieren
    Dim As Integer anfang, ende, rptr, flag, x
    Dim As String umlaut

    ende = 1
    Do
        anfang = InStr(ende,text,"<p>") + 3 'beginn des textes
        If anfang = 3 Then
            Exit Do
        EndIf
        ende = InStr(anfang,text,"</p>") - 1 'ende des textes
        flag = 1
        For x As Integer = anfang - 1 To ende - 1
            If text[x] = Asc("<") Then 'beginn eines tags --> folgenden text ignorieren
                flag = 0
            ElseIf text[x] = Asc(">") Then 'ende eines tags --> folgenden text übertragen
                flag = 1
            Else
                If flag Then 'folgendes zeichen an den anfang des strings verschieben
                    text[rptr] = text[x]
                    rptr += 1
                    If rptr >= Len(text) Then 'fehler
                        Return ""
                    EndIf
                EndIf
            EndIf
        Next

        text[rptr] = Asc(" ") 'leerzeichen einfügen
        rptr += 1
    Loop Until ende >= Len(text)

    If (rptr > 0) And (rptr < Len(text)) Then
        text = utf8ToAnsi(Left(text,rptr)) 'string kürzen und UTF-8-zeichen nach ANSI konvertieren
    Else
        text = "" 'fehler
    EndIf

    Return text

End Function

Function utf8ToAnsi(text As String) As String
    Dim As Integer x
    Dim As String umlaut

    Do While InStr(text,Chr(195))
        x = InStr(text,Chr(195))
        Select Case text[x]
            Case 164
                umlaut = "ä"
            Case 132
                umlaut = "Ä"
            Case 182
                umlaut = "ö"
            Case 150
                umlaut = "Ö"
            Case 178
                umlaut = "ò"
            Case 188
                umlaut = "ü"
            Case 156
                umlaut = "Ü"
            Case 186
                umlaut = "ú"
            Case 159
                umlaut = "ß"
            Case 169
                umlaut = "é"
            Case 168
                umlaut = "è"
            Case 160
                umlaut = "à"
            Case 161
                umlaut = "á"
            Case 167
                umlaut = "c" 'eigentlich c mit apostroph unten
            Case Else
                'Print "*** unbekanntes Sonderzeichen ***"
                'Print text
                'Print x;" ";Mid(text,x+1,1);" ";Asc(Mid(text,x+1,1))
                'Sleep
                umlaut = "?"
        End Select
        text = Left(text,x - 1) + umlaut + Mid(text,x + 2) 'utf-8 durch ansi-zeichen ersetzen
    Loop

    Return text

End Function

Zusätzliche Informationen und Funktionen
  • Das Code-Beispiel wurde am 01.09.2014 von Mitgliedgrindstone angelegt.
  • Die aktuellste Version wurde am 02.09.2014 von Mitgliedgrindstone gespeichert.
  Bearbeiten Bearbeiten  

  Versionen Versionen