摘要:抓取網頁資料值應用
資料來源
感謝該名大大的分享 我就化為己用啦
此範例為抓取證交所公告之證券編碼資料首先新增一個web應用程式及一個名為TSE的類別
該類別程式碼如下:
Public Class TSE
'取得國內上市證券編碼與名稱
Private ListUrl1 As String = "http://203.74.228.63/t40/C_public.jsp?strMode=2"
'取得國內證券編碼與名稱
Public Function GetStkNoListAll() As ArrayList
'存放移除html標籤後且要抓取的範圍
Dim idAr As New ArrayList
'取得上市股票證券編碼之網頁原始碼
Dim page As String = GetWebPage(ListUrl1, "")
'證券編碼與名稱對照表的陣列
Dim tempAr As ArrayList = RemoveHtmlTag(page, vbCrLf)
'取得上市股票證券編碼
Dim startIndex As Integer = tempAr.IndexOf("上市股票及權利證書") + 1
Dim endIndex As Integer = tempAr.IndexOf("上市認購(售)權證")
'取得編號及名稱
For i As Integer = startIndex To endIndex - 1
Dim s() As String = Split(tempAr(i), " ")
idAr.Add(s(0).Trim & " " & s(1).Trim)
Next
Return idAr
End Function
'取得網頁HTML
Private Shared Function GetWebPage(ByVal URL As String, Optional ByVal proxy As String = "") As String
Dim proxyObject As System.Net.WebProxy
Dim proxyString As String
'啟用錯誤處理常式,並指定常式在程序中的位置
On Error GoTo err
'準備讀取資料,Create Function會傳回 WebRequest 類別的子代
Dim MyRequest As System.Net.HttpWebRequest = System.Net.WebRequest.Create(URL)
'讀取registry看看是否需要設定 proxy
proxyString = proxy
If proxyString <> "" Then
'設定proxy
proxyObject = New System.Net.WebProxy(proxyString, True)
MyRequest.Proxy = proxyObject
End If
'讀取遠端網頁 GetResponse 方法會傳回包含來自網際網路資源之回應的 WebResponse 物件
Dim MyWebResponse As System.Net.WebResponse = MyRequest.GetResponse()
Dim MyStream As IO.Stream
MyStream = MyWebResponse.GetResponseStream
'StreamReader 是為特定編碼方式的字元輸入而設計,而 Stream 類別則是為位元組輸入和輸出而設計。使用 StreamReader 來從標準文字檔讀取資訊行。
'使用預設的編碼方式 --> System.Text.Encoding.Default 為 ANSI 編碼方式
Dim StreamReader As New IO.StreamReader(MyStream, System.Text.Encoding.Default)
'資料流從目前位置到末端的其餘字串。如果目前位置位於資料流末端,則傳回空字串 ("")。
GetWebPage = StreamReader.ReadToEnd()
err:
If Err.Number <> 0 Then GetWebPage = Nothing
MyRequest = Nothing
MyWebResponse = Nothing
MyStream = Nothing
StreamReader = Nothing
proxyObject = Nothing
End Function
'去除html的標記
Private Function RemoveHtmlTag(ByVal html As String, ByVal del As String) As ArrayList
'記得要Add Reference → Microsoft.mshtml
Dim doc As mshtml.IHTMLDocument2 = New mshtml.HTMLDocumentClass
'嘗試寫入網頁資料
Try
'避開語法問題
html = Replace(html, "<SCRIPT", "<!-- <SCRIPT")
html = Replace(html, "</SCRIPT>", "</SCRIPT> -->")
html = Replace(html, "<script>", "<!-- <script>")
html = Replace(html, "</script>", "</script> -->")
html = Replace(html, "<script ", "<!-- <script ")
doc.write(html)
'innerText,去除html標記以後的文字 之後根據換行做分割
Dim item() As String = Split(Doc.body.innerText, del)
'將文字內容用ArrayList存放
Dim ar As New ArrayList
For i As Integer = 0 To item.Length - 1
ar.Add(item(i).Trim)
Next
Return ar
Catch ex As Exception
MsgBox("選擇的股票無交易訊息!", MsgBoxStyle.Information Or MsgBoxStyle.OkOnly)
Return Nothing
End Try
End Function
End Class而在.aspx檔做測試的部份:
先在頁面上加入一個listbox以及一個button
button_Click事件:
Protected Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click
Dim arrSource As ArrayList = TSE.GetStkNoList '讀取資料
ListBox1.DataSource = arrSource '顯示資料
ListBox1.DataBind()
End Sub
Public
Private