抓取網頁資料值應用

摘要:抓取網頁資料值應用

資料來源
感謝該名大大的分享   我就化為己用啦
此範例為抓取證交所公告之證券編碼資料首先新增一個web應用程式及一個名為TSE的類別
該類別程式碼如下:
 

Public Class TSE
    '取得國內上市證券編碼與名稱
    Private ListUrl1 As String = "http://203.74.228.63/t40/C_public.jsp?strMode=2"
    '取得國內證券編碼與名稱
    Public Function GetStkNoListAll() As ArrayList
      
        '存放移除html標籤後且要抓取的範圍
        Dim idAr As New ArrayList

        '取得上市股票證券編碼之網頁原始碼
        Dim page As String = GetWebPage(ListUrl1, "")
        '證券編碼與名稱對照表的陣列
        Dim tempAr As ArrayList = RemoveHtmlTag(page, vbCrLf)
        '取得上市股票證券編碼
        Dim startIndex As Integer = tempAr.IndexOf("上市股票及權利證書") + 1
        Dim endIndex As Integer = tempAr.IndexOf("上市認購(售)權證")
        '取得編號及名稱
        For i As Integer = startIndex To endIndex - 1
            Dim s() As String = Split(tempAr(i), " ")
            idAr.Add(s(0).Trim & " " & s(1).Trim)
        Next
         Return idAr
    End Function


    '取得網頁HTML
    Private Shared Function GetWebPage(ByVal URL As String, Optional ByVal proxy As String = "") As String
        Dim proxyObject As System.Net.WebProxy
        Dim proxyString As String

        '啟用錯誤處理常式,並指定常式在程序中的位置
        On Error GoTo err

        '準備讀取資料,Create Function會傳回 WebRequest 類別的子代
        Dim MyRequest As System.Net.HttpWebRequest = System.Net.WebRequest.Create(URL)

        '讀取registry看看是否需要設定 proxy  
        proxyString = proxy
        If proxyString <> "" Then
            '設定proxy
            proxyObject = New System.Net.WebProxy(proxyString, True)
            MyRequest.Proxy = proxyObject
        End If

        '讀取遠端網頁  GetResponse 方法會傳回包含來自網際網路資源之回應的 WebResponse 物件
        Dim MyWebResponse As System.Net.WebResponse = MyRequest.GetResponse()
        Dim MyStream As IO.Stream
        MyStream = MyWebResponse.GetResponseStream

        'StreamReader 是為特定編碼方式的字元輸入而設計,而 Stream 類別則是為位元組輸入和輸出而設計。使用 StreamReader 來從標準文字檔讀取資訊行。
        '使用預設的編碼方式 --> System.Text.Encoding.Default 為 ANSI 編碼方式
        Dim StreamReader As New IO.StreamReader(MyStream, System.Text.Encoding.Default)
        '資料流從目前位置到末端的其餘字串。如果目前位置位於資料流末端,則傳回空字串 ("")。  
        GetWebPage = StreamReader.ReadToEnd()
err:
        If Err.Number <> 0 Then GetWebPage = Nothing
        MyRequest = Nothing
        MyWebResponse = Nothing
        MyStream = Nothing
        StreamReader = Nothing
        proxyObject = Nothing
    End Function


    '去除html的標記
    Private Function RemoveHtmlTag(ByVal html As String, ByVal del As String) As ArrayList

        '記得要Add Reference → Microsoft.mshtml
        Dim doc As mshtml.IHTMLDocument2 = New mshtml.HTMLDocumentClass

        '嘗試寫入網頁資料
        Try
            '避開語法問題
            html = Replace(html, "<SCRIPT", "<!-- <SCRIPT")
            html = Replace(html, "</SCRIPT>", "</SCRIPT> -->")
            html = Replace(html, "<script>", "<!-- <script>")
            html = Replace(html, "</script>", "</script> -->")
            html = Replace(html, "<script ", "<!-- <script ")  

            doc.write(html)

            'innerText,去除html標記以後的文字  之後根據換行做分割
            Dim item() As String = Split(Doc.body.innerText, del)

            '將文字內容用ArrayList存放
            Dim ar As New ArrayList
            For i As Integer = 0 To item.Length - 1
                ar.Add(item(i).Trim)
            Next
            Return ar
        Catch ex As Exception
            MsgBox("選擇的股票無交易訊息!", MsgBoxStyle.Information Or MsgBoxStyle.OkOnly)
            Return Nothing
        End Try
    End Function


End Class

而在.aspx檔做測試的部份:
先在頁面上加入一個listbox以及一個button
button_Click事件:

Protected Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click

        Dim arrSource As ArrayList = TSE.GetStkNoList    '讀取資料
        ListBox1.DataSource = arrSource    '顯示資料
        ListBox1.DataBind()

End Sub