回复:[CODE SNIPPET] VB.NET制作词频表
	
	
		
		
			以下是引用 xujiajin 在 2005-8-6 0:22:58 的发言:
dzhigner, 我是个外行,你还是帮忙看看上面的script有没有问题。谢谢。
		
		
	 
如下是是一个类模块作为例子:
Imports System.IO
Imports System.Text
Imports System.Text.RegularExpressions
Public Class BUILD_WORDLIST_DEMO1
    Dim TK As Integer
    Private Function BUILDWLIST(ByVal FLNM As String, ByVal WDRegex As Regex, ByVal STRDELIMS As String, ByVal OUTPUT_UPCS As Boolean, ByVal ENCODING As Encoding, ByVal HASH2 As Hashtable) As StringBuilder
        Dim WDUPCS, LINE As String
        Dim WORDS As String()
        Dim READER As System.IO.StreamReader
        Dim HASHTBL As New Hashtable
        Dim en As IEnumerator
        Dim DELIMS As Char() = STRDELIMS.ToCharArray
        Dim BOO As New StringBuilder
        Try
            READER = New System.IO.StreamReader(FLNM, ENCODING)
            Do While Not READER.Peek < 0
                LINE = Trim(READER.ReadLine)
                If Not LINE = "" And Not LINE Is Nothing Then
                    WORDS = LINE.Split(DELIMS)
                    en = WORDS.GetEnumerator()
                    Do While en.MoveNext
                        If OUTPUT_UPCS Then
                            WDUPCS = CType(en.Current, String).ToUpper
                            If HASH2.Contains(WDUPCS) Then
                                WDUPCS = HASH2.Item(WDUPCS)
                            End If
                        Else
                            WDUPCS = CType(en.Current, String)
                        End If
                        If WDRegex.IsMatch(en.Current) Then
                            TK = TK + 1
                            If Not HASHTBL.Contains(WDUPCS) Then
                                HASHTBL.Add(WDUPCS, 1)
                            Else
                                HASHTBL.Item(WDUPCS) = CType(HASHTBL.Item(WDUPCS), Integer) + 1
                            End If
                        End If
                    Loop
                End If
            Loop
            READER.Close()
            Console.WriteLine(HASHTBL.Count)
            Dim ENDIC As IDictionaryEnumerator = HASHTBL.GetEnumerator
            Do While ENDIC.MoveNext
                BOO.Append(CType(ENDIC.Key, String) & vbTab & CType(ENDIC.Value, String) & vbCrLf)
                ' HASHTBL.Remove(ENDIC.Current)
            Loop
            HASHTBL = Nothing
            Return BOO
        Catch ex As Exception
            MsgBox(ex.ToString)
            READER.Close()
            HASHTBL = Nothing
            If Not BOO Is Nothing Then
                Return BOO
                BOO = Nothing
            Else : Return Nothing
            End If
        End Try
    End Function
    Public Sub MAIN()
        TK = 0
        Dim HASH As New Hashtable
        Dim LINE As String
        Dim WORDS As String()
        Dim DELI As Char()
        ReDim DELI(0)
        DELI(0) = "=" '本例中使用的词元表的结构如:abolished=abolish
        Dim REGX As New Regex("^\b[A-Za-z\-]+\b", 9)
        Dim EC As Encoding = Encoding.UTF8
        Dim STRDELI As String = ".,;:!#$^&()<>+=/\'?|`~ " & Chr(34) & Chr(32)
        Dim FN As String = "D:\ENGLISH_CORPORA\RAW\BROWN_SENTENCE.TXT"
        Dim SB As StringBuilder
        Dim LEMREADER As StreamReader = New StreamReader("D:\ENGLISH_CORPORA\LEMMALIST.TXT", EC)
        Do While Not LEMREADER.Peek < 0
            LINE = Trim(LEMREADER.ReadLine)
            WORDS = LINE.Split(DELI)
            If WORDS.GetUpperBound(0) >= 1 Then
                If Not HASH.Contains(WORDS(0).ToUpper) Then
                    HASH.Add(WORDS(0).ToUpper, WORDS(1).ToUpper)
                End If
            End If
        Loop
        SB = BUILDWLIST(FN, REGX, STRDELI, True, EC, HASH)
        If Not SB Is Nothing Then
            Console.WriteLine("TOTAL:" & TK)
            Console.Write(SB.ToString)
        End If
    End Sub
End Class
'**************************************************************
调用以上类的方法,例子如下:
Private Sub MenuItem5_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MenuItem5.Click
        Dim k As New BUILD_WORDLIST_DEMO
        k.MAIN()
    End Sub 
‘**************************************************************
[本贴已被 作者 于 2005年08月06日 04时14分20秒 编辑过]