Code-Beispiel
Schneller Wortzähler (Tree)
Lizenz: | Erster Autor: | Letzte Bearbeitung: |
GPL | grindstone | 02.09.2014 |
Dieses Programm liest einen oder mehrere Texte ein und erzeugt aus den Wörtern eine Baumstruktur, die entsprechend ausgewertet werden kann (siehe dazu die Beispiele im Programm).
Die Entstehungsgeschichte des Programms und die Diskussion dazu kann hier nachgelesen werden.
In der hier vorgestellten Ausführung stellt das Programm eine Verbindung zur deutschen Wikipedia-Site her und nutzt die dortige Funktion "Zufälliger Artikel", um laufend Texte herunterzuladen und zu verarbeiten.
Zur Bedienung: Mit den Zifferntasten 1 - 8 wird die Art der Darstellung gewählt, mit + und - lässt sich zwischen 640 x 480 und 1200 x 800 Pixeln umschalten, ein Druck auf "d" schreibt die komplette Wörterliste in eine Datei im Programmverzeichnis und Esc beendet das Programm.
'Die Routinen zum Erzeugen und Durchsuchen des Baumes wurden von Sebastian geschrieben
' (http://users.freebasic-portal.de/sebastian/fb/wordcount/wordcount_tree.bas), die
' Programmteile zur Verbindung mit dem Internet stammen aus dem Codebeispiel
' "Websites selbst verarbeiten" von PMedia
' (http://www.freebasic-portal.de/code-beispiele/internet-netzwerke/websites-selbst-verarbeiten-105.html)
#define NULL 0
#define recordLen SizeOf(tNode)
#Ifndef recvbufflen
#define RECVBUFFLEN 16384
#EndIf
#Ifndef newline
#define newline Chr(13,10)
#EndIf
#Include Once "win/winsock2.bi"
Const skipChars = 32
Const treeSubNodeCount = 256 - skipChars
Const printInterval = 750
Const As Integer schwarz = RGB(0,0,0), rot = RGB(255,0,0), gruen = RGB(0,255,0), _
blau = RGB(0,0,255), cyan = RGB(0,255,255), magenta = RGB(255,0,255), _
gelb = RGB(255,255,0), weiss = RGB(255,255,255)
Type tNode
subNodes(1 To treeSubNodeCount) As tNode Ptr
count As UInteger = 0
End Type
Type tSample
ratio As Single
words As UInteger
nodes As UInteger
End Type
Dim As tSample sample
Dim As tSample Ptr samplePtr
Declare Function isSplittingChar (char As UByte) As Integer
Declare Function putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer) As Integer
Declare Sub traverseTree (node As tNode Ptr, index As UByte, path As String, pluginPointer As Any Ptr = 0)
Declare Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer
Declare Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer
Declare Sub deallocateTree (tree() As tNode Ptr)
Declare Sub deallocateNodeAndSubNodes (node As tNode Ptr)
Declare Sub auswertung1(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
Declare Sub auswertung2(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
Declare Sub auswertung3(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
Declare Sub auswertung4(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
Declare Sub auswertung5(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
Declare Sub auswertung6(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
Declare Sub InitWinsock
Declare Sub ExitWinsock
Declare Function httpget(server As String, path As String, hadd As String = "") As String
Declare Function httppost(server As String, path As String, toPost As String, hadd As String = "") As String
Declare Function httperror(text As String) As Integer
Declare Function timeFormat (sekunden As Double, stellen As Integer = 0) As String
Declare Sub ausgabe(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
Declare Function extractPlainText(text As String) As String
Declare Function utf8ToAnsi(text As String) As String
Dim As UByte char, c
Dim As UInteger numWords = 0, currentWordCharCount = 0, totalCharCount = 0, newWords, _
words, wordsMax, nodes, nodesMax, wScale, nScale, rScale, siteCount, _
totalNodeCount = treeSubNodeCount, newWordCount, newNodes, wordBegin, wordEnd
Dim Shared As Integer scrWidth, scrHeight
Dim As ULongInt totalInputSize, sampleTrigger
Dim As String buffer = "", domain, address, g, exposition = "1", text, textm
Dim As String samples = String(SizeOf(tSample),Chr(0))
Dim As Double tStart, tEnd, totalInputTime, startTime, averageLength
Dim As Single xwert, ywert, ratioMax, ratioMaxTemp, ratioAct
Dim tree(1 To treeSubNodeCount) As tNode Ptr
'******* URLDownloadToFile einbinden ****************************
Dim URLDownloadToFile As Function (_
ByVal pCaller As Long, _
ByVal szURL As ZString Ptr, _
ByVal szFileName As ZString Ptr, _
ByVal dwReserved As Long, _
ByVal lpfnCB As Long) As Long
Dim library As Any Ptr
library = DylibLoad( "urlmon.dll" )
URLDownloadToFile = DyLibSymbol( library, "URLDownloadToFileA" )
'*****************************************************************
For i As Integer = LBound(tree) To UBound(tree)
tree(i) = Callocate(recordLen)
Next i
Dim As String inputFiles(1 To ...) = { "d:\internet\bibel\altes_testament_luther_revidiert.txt", _
"d:\internet\bibel\neues_testament_luther_revidiert.txt", _
ExePath + "\debianreferenz.txt" }
'Dim As String inputFiles(1 To ...) = { ExePath + "\debianreferenz.txt", _
' "d:\internet\bibel\neues_testament_luther_revidiert.txt", _
' "d:\internet\bibel\altes_testament_luther_revidiert.txt" }
'domain = "de.wikipedia.org"
'address = "wiki/Spezial:Zuf%C3%A4llige_Seite"
'domain = "en.wikipedia.org"
'address = "wiki/Special:Random"
'textm = httpget(domain,address) + " "
'Open ExePath + "\wiki.txt" For Output As #1
'Print #1, textm
'Close
'Sleep
'End
startTime = Timer
'ScreenRes 1200,800,16
ScreenRes 640,480,16
ScreenInfo scrWidth,scrHeight
Width scrWidth/8,scrHeight/8
Do
'eingangsdaten von wikipedia holen
domain = "de.wikipedia.org"
address = "wiki/Spezial:Zuf%C3%A4llige_Seite"
g = "Location: http://de.wikipedia.org/wiki/" 'kennung
'domain = "en.wikipedia.org"
'address = "wiki/Special:Random"
'g = "Location: http://en.wikipedia.org/wiki/" 'kennung
text = httpget(domain,address) 'zufällige seite holen
'link auf zufällige seite isolieren
wordBegin = InStr(text,g) 'pointer auf anfang
wordEnd = InStr(wordBegin + Len(g),text,Chr(13,10)) 'pointer auf ende
address = Mid(text,wordBegin,wordEnd-wordBegin) 'link holen
address = Mid(address,35) ' präfix ("Location: http://de.wikipedia.org/") abtrennen
text = httpget(domain,address) ' <zufällige> website herunterladen
siteCount += 1
tStart = Timer
text = extractPlainText(text)
If text = "" Then 'fehler
Continue Do 'nächste site
EndIf
text = LCase(text) 'umwandeln in kleinbuchstaben
''themenbereich eingrenzn
'Dim themeWords(1 To ...) As String = { "computer", "freebasic", "website", "internet" }
'
'For x As Integer = 1 To UBound(themeWords)
' If InStr(text,themeWords(x)) Then
' Exit For
' EndIf
' Continue Do
'Next
''website in datei schreiben
'Open ExePath + "\wiki.txt" For Output As #1
'Print #1, text
'Close
wordBegin = -1
Do 'text abarbeiten
Do 'nächsten wortanfang suchen
wordBegin += 1
Loop Until (isSplittingChar(text[wordBegin]) = 0) Or (wordBegin > Len(text))
wordEnd = wordBegin
Do 'nächstes wortende suchen
wordEnd += 1
Loop Until (isSplittingChar(text[wordEnd]) <> 0)
buffer = Mid(text,wordBegin + 1, wordEnd - wordBegin) 'wort aus text holen
'buffer = LCase(buffer)
totalCharCount += wordEnd - wordBegin 'buchstabenzähler aktualisieren
wordBegin = wordEnd 'zeiger für nächste suche setzen
If buffer = "" Then 'nächstes wort
Continue Do
EndIf
' wörter ausfiltern
'If (InStr(buffer, Any "0123456789""")) Or (Len(buffer) > 50) Then 'wort auslassen
' Continue Do
'EndIf
'Select Case buffer[0]
' Case Asc("a") To Asc("z"),Asc("A") To Asc("Z"),Asc("ä"),Asc("ö"),Asc("ü"),Asc("Ä"),Asc("Ö"),Asc("Ü") 'nur wörter, die mit buchstaben beginnen
'
' Case Else
' Continue Do
'End Select
totalCharCount += wordEnd - wordBegin 'buchstabenzähler aktualisieren
numWords += 1 'wortzähler erhöhen
newNodes = putCharsIntoTree(tree(buffer[0]-skipChars), buffer, 0)
totalNodeCount += newNodes 'knotenzähler aktualisieren
If newNodes Then 'neues Wort gefunden
newWordCount += 1 'zähler für "unterschiedliche wörter" aktualisieren
EndIf
'für graph
If totalInputSize + wordBegin >= sampleTrigger + 1000 Then 'schnappschuss alle 1000 zeichen
samples += Mks(CSng(newWordCount/numWords)) + Mki(numWords) + Mki(totalNodeCount)
sampleTrigger = totalInputSize + wordBegin
EndIf
Loop Until wordEnd >= Len(text) 'bis text abgearbeitet ist
totalInputSize += Len(text) 'zähler für insgesamt eingelesene daten aktualisieren
tEnd = Timer
totalInputTime += (tEnd - tStart)
'darstellung
ScreenLock
View Print
Cls
'textblock
Print " Gesamtzeit: ";timeFormat(Timer - startTime,2)
Print "Gesamtzeit zum Einlesen der Daten: ";timeFormat(totalInputTime,2)
Print
Print " Anzahl der Knoten: ";totalNodeCount
g = Str((totalNodeCount * recordLen)/1024^2)
g = Left(g,InStr(g,".")+1)
Print " Belegter Speicher: ";g;" MB"
Print
If (numWords > 0) Then
averageLength = (totalCharCount / CDbl(numWords))
End If
Print " Woerter insgesamt: " & numWords
Print " Unterschiedliche Woerter: " & newWordCount
Print " Durchschnittliche Wortlaenge:";
Print Using "##.### "; averageLength;
Print "Buchstaben."
Print
Print "Eingelesene Website (";siteCount;"): ";address
Locate 14,1
Select Case exposition 'gewählte art der auswertung
Case "1" 'graphische darstellung
Color cyan,schwarz
Print " 1) Graphische Darstellung:"
Color weiss,schwarz
Line (10,scrHeight-10)-(10,170)
Line (10,scrHeight-10)-(scrWidth-10,scrHeight-10)
'darstellung vorbereiten
'maximalwerte
samplePtr = Cast(tSample Ptr,StrPtr(samples) + Len(samples) - SizeOf(tSample)) 'pointer auf letzten samplewert
wordsMax = samplePtr->words
nodesMax = samplePtr->nodes
ratioAct = samplePtr->ratio
wScale = wordsMax / (scrHeight - 200) 'maßstab für wörter
nScale = nodesMax / (scrHeight - 200) 'maßstab für speicherbelegung
rScale = (scrHeight - 200) / ratioMax 'maßstab für wortverhältnis
'graphen auf bildschirm schreiben
wordBegin = 0
ratioMaxTemp = 0
For y As Integer = 0 To Len(samples) - 1 Step SizeOf(tSample) 'alle samples
With *Cast(tSample Ptr,StrPtr(samples) + y) 'pointer auf sample
If .ratio > ratioMaxTemp Then 'größtes verhältnis ermitteln
ratioMaxTemp = .ratio
EndIf
xwert = 10 + (scrWidth - 20) * y / Len(samples)
ywert = (scrHeight - 10) - .nodes / nScale
' graphen schreiben
PSet (xwert,ywert) 'speicherbelegung
PSet (xwert,scrHeight - 10 - .words / wScale),gelb 'wörter gesamt
PSet (xwert,scrHeight - 10 - Int(.ratio * rScale)),cyan 'unterschiedliche wörter
End With
Next
ratioMax = ratioMaxTemp 'größtes verhältnis merken
'beschriftung
Line (10,ywert)-(scrWidth - 10,ywert),weiss,,&b0000000000000011
g = Str((totalNodeCount * recordLen) / 1024^2)
g = Left(g,InStr(g,".") + 1)
Draw String (30,ywert - 10),g + " " + "MB" 'speicherbedarf
Draw String (30,ywert + 5),Str(wordsMax),gelb 'wörter gesamt
g = Str(100 * ratioAct )
g = Left(g,InStr(g,".") + 1) + "%"
Draw String (scrWidth - 50,scrHeight - 30 - ratioAct * rScale),g,cyan 'wortverhältnis
'legende
Draw String (scrWidth / 2,scrHeight - 50),"_____ Speicher",weiss
Draw String (scrWidth / 2,scrHeight - 40),"_____ Woerter gesamt",gelb
Draw String (scrWidth / 2,scrHeight - 30),"_____ Rel. verschiedene",cyan
Case "2" 'gezielt nach wörtern suchen
Color cyan,schwarz
Print " 2) Gezielter Lookup von Woertern:"
Color weiss,schwarz
Locate CsrLin + 3,1
Dim lookupWords(1 To ...) As String = { "Computernerd", "freeBasic", "der", "die", "das", _
"und", "in", "Klingone", "Rhabarber", "Haus" }
tStart = Timer
For i As Integer = LBound(lookupWords) To UBound(lookupWords)
Locate CsrLin,5
Print "Wie oft kommt ";
Color gelb,schwarz
Print lookupWords(i);
Color weiss,schwarz
Print " vor? ";
Color rot,schwarz
Print lookupWordCount(tree(), LCase(lookupWords(i)));
Color weiss,schwarz
Print "x"
Next i
tEnd = Timer
Locate CsrLin + 1,5
Print "Das Nachschlagen der Haeufigkeiten dauerte ";
Print Using "##.######"; (tEnd-tStart);
Print " Sekunden."
Case "3" 'häufigste und längste wörter
Color cyan,schwarz
Print " 3) Die 20 haeufigsten Woerter:"
Color weiss,schwarz
View Print CsrLin + 3 To scrHeight/8
tStart = Timer 'zeitmessung starten
For i As Integer = 1 To treeSubNodeCount 'alle knoten durchgehen
traverseTree(tree(i), i, Chr(skipChars + i),@auswertung1)
Next i
tEnd = Timer 'zeitmessung stoppen
Locate CsrLin + 3,5
Print "Die Auswertung der ";newWordCount;" Woerter dauerte ";
Print Using "##.###"; (tEnd-tStart);
Print " Sekunden."
Case "4" '3. buchstabe "a"
Color cyan,schwarz
Print " 4) Alle Woerter, deren 3. Buchstabe ein ""a"" ist:"
Color weiss,schwarz
View Print CsrLin + 3 To scrHeight/8
tStart = Timer
For i As Integer = 1 To treeSubNodeCount
traverseTree(tree(i), i, Chr(skipChars + i),@auswertung2)
Next i
tEnd = Timer
Locate CsrLin + 1,5
Print "Die Auswertung der ";newWordCount;" Woerter dauerte ";
Print Using "##.###"; (tEnd-tStart);
Print " Sekunden."
Case "5" 'alle wörter mit "auto"
Color cyan,schwarz
Print " 5) Alle Woerter, in denen die Zeichenfolge ""auto"" vorkommt:"
Color weiss,schwarz
View Print CsrLin + 3 To scrHeight/8
tStart = Timer
For i As Integer = 1 To treeSubNodeCount
traverseTree(tree(i), i, Chr(skipChars + i),@auswertung3)
Next i
tEnd = Timer
Locate CsrLin + 1,5
Print "Die Auswertung der ";newWordCount;" Woerter dauerte ";
Print Using "##.###"; (tEnd-tStart);
Print " Sekunden."
Case "6" 'visualisierung --> baum als punkte
Color cyan,schwarz
Print " 6) Visualisierung des kompletten Baums als Punktefeld (1 Knoten = 1 Punkt)"
Color weiss,schwarz
For i As Integer = 1 To treeSubNodeCount
traverseTree(tree(i), i, Chr(skipChars + i),@auswertung4)
Next i
Case "7" 'visualisierung --> anfangsbuchstaben
Color cyan,schwarz
Print " 7) Visualisierung des kompletten Baums als Punktefeld mit Anfangsbuchstaben"
Color weiss,schwarz
For i As Integer = 1 To treeSubNodeCount
traverseTree(tree(i), i, Chr(skipChars + i),@auswertung5)
Next i
Case "8" 'visualisierung --> anfangsbuchstaben als weisse punkte / baum als punkte
Color cyan,schwarz
Print " 8) Baum als Punktefeld mit weissen Punkten am Wortanfang"
Color weiss,schwarz
For i As Integer = 1 To treeSubNodeCount
traverseTree(tree(i), i, Chr(skipChars + i),@auswertung6)
Next i
End Select
ScreenUnLock
'tastaturabfrage
g = InKey
Select Case g
Case "1","2","3","4","5","6","7","8" 'darstellung auswählen
exposition = g
Case "d" 'gesamten baum in datei schreiben
Open ExePath + "\wiki.txt" For Output As #1
For i As Integer = 1 To treeSubNodeCount
traverseTree(tree(i), i, Chr(skipChars+i),@ausgabe)
Next i
Close 1
Case "+" 'grosses fenster
ScreenRes 1200,800,16
ScreenInfo scrWidth,scrHeight
Width scrWidth/8,scrHeight/8
Case "-" 'kleines fenster
ScreenRes 640,480,16
ScreenInfo scrWidth,scrHeight
Width scrWidth/8,scrHeight/8
Case Chr(27) 'esc
Exit Do 'programm beenden
End Select
Loop
View Print 13 To scrHeight/8
Cls 2
Print
Print "Abbau des Baums aus dem Speicher... ";
deallocateTree(tree())
Print "Fertig."
End 0
'Ermittelt, ob das uebergebene Zeichen eines ist, das das Ende
'eines Wortes kennzeichnet (zum Beispiel ein Leerzeichen, ein
'Zeilenumbruch oder ein Komma).
Function isSplittingChar (char As UByte) As Integer
' , . : ; /
Return ((char < 33) Or (char = 44) Or (char = 46) Or (char = 58) Or (char = 59) Or _
(char = 47) Or (char = Asc("-") Or (char = Asc("+"))))
End Function
Function putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer) As Integer
Dim As UByte c = buffer[index]
Dim As Integer newNodes = 0
If ((index+1) >= Len(buffer)) Then
' Wort endet hier in diesem Knoten
node->count += 1
Else
c = buffer[index+1]
If (node->subNodes(c-skipChars) = NULL) Then
node->subNodes(c-skipChars) = Callocate(recordLen)
newNodes += 1
End If
newNodes += putCharsIntoTree(node->subNodes(c-skipChars), buffer, index+1)
End If
Return newNodes
End Function
Sub traverseTree (node As tNode Ptr, index As UByte, path As String, pluginPointer As Any Ptr = 0)
'ruft nacheinander alle knoten des baums auf
Dim Plugin As Sub (text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
'Print #1, node;" ";index;" ";path;" ";pluginPointer
If (node = NULL) And (index <> 0) Then Return
If pluginPointer Then 'plugin vorhanden
Plugin = pluginPointer
Plugin(path,node->count,node,index) 'plugin aufrufen
Else
If (node->count > 0) Then
Print path & " => " & node->count & " x"
End If
End If
For i As Integer = 1 To treeSubNodeCount 'rekursiver aufruf aller knoten, auf die der aktuelle knoten zeigt
If (node->subNodes(i) <> NULL) Then
traverseTree (node->subNodes(i), i, path + Chr(i+skipChars),pluginPointer)
End If
Next i
End Sub
Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer
If (Len(word) < 1) Then Return 0
Return lookupCharacterCount(tree(word[0]-skipChars), word, 0) 'pointer auf 1. knoten
End Function
Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer
Dim As String wd = word
If ((index+1) >= Len(wd)) Then
Return node->count
Else
Dim As UByte nextChar = wd[index+1] 'nächster buchstabe
Dim As tNode Ptr nextNode = node->subNodes(nextChar-skipChars) 'knoten für nächsten buchstaben
If (nextNode = NULL) Then Return 0 'Der Baum geht hier nicht mehr weiter, aber das Wort wurde bisher nicht gefunden.
Return lookupCharacterCount (nextNode, wd, index+1)
End If
End Function
Sub deallocateTree (tree() As tNode Ptr)
For i As Integer = 1 To treeSubNodeCount
deallocateNodeAndSubNodes(tree(i))
DeAllocate tree(i)
Next i
End Sub
Sub deallocateNodeAndSubNodes (node As tNode Ptr)
If (node = NULL) Then Return
deallocateTree(node->subNodes())
End Sub
Sub InitWinsock Constructor
'Autor: PMedia http://www.freebasic-portal.de/code-beispiele/internet-netzwerke/websites-selbst-verarbeiten-105.html
' init winsock
Dim wsaData As WSAData
If( WSAStartup( MAKEWORD( 1, 1 ), @wsaData ) <> 0 ) Then
Print "Error: WSAStartup failed"
End 1
End If
End Sub
Sub ExitWinsock Destructor
WSACleanup
End Sub
Function httpget(server As String, path As String, hadd As String = "") As String
Dim IP As Integer
Dim ia As in_addr
Dim s As SOCKET
Dim hostentry As hostent Ptr
Dim sendbuffer As String
Dim recvbuffer As ZString * RECVBUFFLEN+1
Dim bytes As Integer
Dim sa As sockaddr_in
Dim in As String
ia.S_addr = inet_addr( server )
If ( ia.S_addr = INADDR_NONE ) Then
hostentry = gethostbyname( server )
If ( hostentry = 0 ) Then
Return "Error: IP couldn't be resolved!"
End If
IP = *Cast( Integer Ptr, *hostentry->h_addr_list )
Else
IP = ia.S_addr
End If
s = OpenSocket( AF_INET, SOCK_STREAM, IPPROTO_TCP )
If( s = 0 ) Then
Return "Error: Socket couldn't be opened."
End If
sa.sin_port = htons( 80 )
sa.sin_family = AF_INET
sa.sin_addr.S_addr = ip
If ( connect( s, Cast( PSOCKADDR, @sa ), Len( sa )) = SOCKET_ERROR ) Then
closesocket( s )
Return "Error: Couldn't connect to host"
End If
sendBuffer = "GET /" + path + " HTTP/1.0" + NEWLINE + _
"Host: " + server + NEWLINE + _
"Connection: close" + NEWLINE + _
hadd + _
NEWLINE
If( send( s, sendBuffer, Len( sendBuffer ), 0 ) = SOCKET_ERROR ) Then
closesocket( s )
Return "Error: Couldn't send request"
End If
Do
bytes = recv( s, recvBuffer, RECVBUFFLEN, 0 )
If( bytes <= 0 ) Then
Exit Do
End If
recvbuffer[bytes] = 0
in += recvbuffer
Loop
shutdown( s, 2 )
closesocket( s )
Return in
End Function
Function httperror(text As String) As Integer
If Left(text,7) = "Error: " Then
Return -1
Else
Return 0
EndIf
End Function
Function httppost(server As String, path As String, toPost As String, hadd As String = "") As String
Dim IP As Integer
Dim ia As in_addr
Dim s As SOCKET
Dim hostentry As hostent Ptr
Dim sendbuffer As String
Dim recvbuffer As ZString * RECVBUFFLEN+1
Dim bytes As Integer
Dim sa As sockaddr_in
Dim in As String
ia.S_addr = inet_addr( server )
If ( ia.S_addr = INADDR_NONE ) Then
hostentry = gethostbyname( server )
If ( hostentry = 0 ) Then
Return "IP couldn't be resolved!"
End If
IP = *Cast( Integer Ptr, *hostentry->h_addr_list )
Else
IP = ia.S_addr
End If
s = OpenSocket( AF_INET, SOCK_STREAM, IPPROTO_TCP )
If( s = 0 ) Then
Return "Socket couldn't be opened."
End If
sa.sin_port = htons( 80 )
sa.sin_family = AF_INET
sa.sin_addr.S_addr = ip
If ( connect( s, Cast( PSOCKADDR, @sa ), Len( sa )) = SOCKET_ERROR ) Then
closesocket( s )
Return "Couldn't connect to host"
End If
sendBuffer = "POST /" + path + " HTTP/1.0" + NEWLINE + _
"Host: " + server + NEWLINE + _
"Content-Type: application/x-www-form-urlencoded" + NEWLINE + _
"Content-Length: " + Str(Len(toPost)) + NEWLINE + _
"Connection: close" + NEWLINE + _
hadd + _
NEWLINE + _
toPost + NEWLINE
If( send( s, sendBuffer, Len( sendBuffer ), 0 ) = SOCKET_ERROR ) Then
closesocket( s )
Return "Couldn't send request"
End If
Do
bytes = recv( s, recvBuffer, RECVBUFFLEN, 0 )
If( bytes <= 0 ) Then
Exit Do
End If
recvbuffer[bytes] = 0
in += recvbuffer
Loop
shutdown( s, 2 )
closesocket( s )
Return in
End Function
Function timeFormat (sekunden As Double, stellen As Integer = 0) As String
Dim As Integer minuten, stunden
Dim As String zeit
stunden = Int(sekunden / 3600)
sekunden -= (stunden * 3600)
minuten = Int(sekunden / 60)
sekunden -= (minuten * 60)
zeit = Str(stunden) + ":" + Right("0" + Str(minuten),2) + ":" + Mid("0" + Str(sekunden),InStr("0" + Str(sekunden),".") - 2)
If stellen Then
zeit = Left(zeit, InStr(zeit,".") + stellen)
EndIf
Return zeit
End Function
Sub auswertung1(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
'häufigste wörter und längstes wort suchen
Static As Integer vorkommen(21), laenge
Static As String wort(21), lwort
If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset
For x As Integer = 1 To UBound(wort)
vorkommen(x) = 0
wort (x) = ""
Next
laenge = 0
lwort = ""
EndIf
If Len(text) > 1 Then
vorkommen(UBound(wort)) = count
wort(UBound(wort)) = text
For x As Integer = UBound(wort) To 2 Step -1
If vorkommen(x) > vorkommen(x - 1) Then
Swap vorkommen(x),vorkommen(x - 1)
Swap wort(x),wort(x - 1)
Else
Exit For
EndIf
Next
If Len(text) > laenge Then
lwort = text
laenge = Len(text)
EndIf
EndIf
If index = treeSubnodeCount Then 'letzter aufruf --> ergebnis ausgeben
For x As Integer = 1 To UBound(wort) - 1
Locate CsrLin,5
Print x;" ";wort(x);
Locate CsrLin,(Pos + 5)-(Pos + 10)Mod 5
Print vorkommen(x);" x "
Next
Print
Print
Print
Print " Das laengste Wort (";laenge;" Zeichen ) ist: "
Print
Print lwort
EndIf
End Sub
Sub auswertung2(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
'alle wörter mit 3. buchstaben "a"
Static As Integer zaehler
If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset
zaehler = 0
EndIf
If (count > 0) And (Len(text) >= 3) And (text[2] = Asc("a")) Then
zaehler += 1
Print zaehler;" ";text
EndIf
End Sub
Sub auswertung3(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
'alle wörter, in denen "auto" vorkommt
Static As Integer zaehler
If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset
zaehler = 0
EndIf
If (count > 0) And InStr(text,"auto") Then
zaehler += 1
Print zaehler;" ";text
EndIf
End Sub
Sub auswertung4(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
'baum als punktefeld darstellen
Static As Integer nodenr
Dim As Integer h, b, pc
Dim As UShort c
If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset
nodenr = 0
EndIf
ScreenInfo b,h
nodenr += 1
For x As Integer = 1 To treeSubNodeCount
If node->subNodes(x) <> 0 Then
pc += 1
EndIf
Next
c = LoWord(node->count)
c = c * c
PSet((nodenr Mod b)+1,h-Int(nodenr/b)-1),RGB(HiByte(c),LoByte(c),LoByte(pc*pc))
End Sub
Sub auswertung5(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
'baum als punktefeld mit anfangsbuchstaben darstellen
Static As Integer nodenr
Dim As Integer h, b, pc
Dim As UShort c
If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset
nodenr = 0
EndIf
ScreenInfo b,h
nodenr += 1
For x As Integer = 1 To treeSubNodeCount
If node->subNodes(x) <> 0 Then
pc += 1
EndIf
Next
c = LoWord(node->count)
c = c * c
If Len(text) = 1 Then
Draw String((nodenr Mod b)+1,h-Int(nodenr/b)-10),Chr(text[0] + skipChars),RGB(255,255,255)
Else
PSet((nodenr Mod b)+1,h-Int(nodenr/b)-10),RGB(HiByte(c),LoByte(c),LoByte(pc*pc))
EndIf
End Sub
Sub auswertung6(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
'baum als punktefeld mit weissen punkten am wortanfang
Static As Integer nodenr
Dim As Integer h, b, pc
Dim As UShort c
If (index = 1) And (text = Chr(skipChars + 1)) Then '1. aufruf --> reset
nodenr = 0
EndIf
ScreenInfo b,h
nodenr += 1
For x As Integer = 1 To treeSubNodeCount
If node->subNodes(x) <> 0 Then
pc += 1
EndIf
Next
c = LoWord(node->count)
c = c * c
If Len(text) = 1 Then
PSet((nodenr Mod b)+1,h-Int(nodenr/b)-1),RGB(255,255,255)
PSet((nodenr Mod b)+0,h-Int(nodenr/b)-1),RGB(255,255,255)
PSet((nodenr Mod b)+1,h-Int(nodenr/b)-0),RGB(255,255,255)
PSet((nodenr Mod b)+0,h-Int(nodenr/b)-0),RGB(255,255,255)
Else
PSet((nodenr Mod b)+1,h-Int(nodenr/b)-1),RGB(HiByte(c),LoByte(c),LoByte(pc*pc))
EndIf
End Sub
Sub ausgabe(text As String = "", count As UInteger = 0, node As tNode Ptr = 0, index As UByte = 0)
'wörterliste in datei schreiben
Print #1, text;" ";count
End Sub
Function extractPlainText(text As String) As String
'normalen text aus website extrahieren
Dim As Integer anfang, ende, rptr, flag, x
Dim As String umlaut
ende = 1
Do
anfang = InStr(ende,text,"<p>") + 3 'beginn des textes
If anfang = 3 Then
Exit Do
EndIf
ende = InStr(anfang,text,"</p>") - 1 'ende des textes
flag = 1
For x As Integer = anfang - 1 To ende - 1
If text[x] = Asc("<") Then 'beginn eines tags --> folgenden text ignorieren
flag = 0
ElseIf text[x] = Asc(">") Then 'ende eines tags --> folgenden text übertragen
flag = 1
Else
If flag Then 'folgendes zeichen an den anfang des strings verschieben
text[rptr] = text[x]
rptr += 1
If rptr >= Len(text) Then 'fehler
Return ""
EndIf
EndIf
EndIf
Next
text[rptr] = Asc(" ") 'leerzeichen einfügen
rptr += 1
Loop Until ende >= Len(text)
If (rptr > 0) And (rptr < Len(text)) Then
text = utf8ToAnsi(Left(text,rptr)) 'string kürzen und UTF-8-zeichen nach ANSI konvertieren
Else
text = "" 'fehler
EndIf
Return text
End Function
Function utf8ToAnsi(text As String) As String
Dim As Integer x
Dim As String umlaut
Do While InStr(text,Chr(195))
x = InStr(text,Chr(195))
Select Case text[x]
Case 164
umlaut = "ä"
Case 132
umlaut = "Ä"
Case 182
umlaut = "ö"
Case 150
umlaut = "Ö"
Case 178
umlaut = "ò"
Case 188
umlaut = "ü"
Case 156
umlaut = "Ü"
Case 186
umlaut = "ú"
Case 159
umlaut = "ß"
Case 169
umlaut = "é"
Case 168
umlaut = "è"
Case 160
umlaut = "à"
Case 161
umlaut = "á"
Case 167
umlaut = "c" 'eigentlich c mit apostroph unten
Case Else
'Print "*** unbekanntes Sonderzeichen ***"
'Print text
'Print x;" ";Mid(text,x+1,1);" ";Asc(Mid(text,x+1,1))
'Sleep
umlaut = "?"
End Select
text = Left(text,x - 1) + umlaut + Mid(text,x + 2) 'utf-8 durch ansi-zeichen ersetzen
Loop
Return text
End Function
Zusätzliche Informationen und Funktionen | |||||||
---|---|---|---|---|---|---|---|
|
|