Extract Values From HTML TD And Tr
I have some HTML source that i get from a website for option quotes. (please see below) What is the best way to extract the various text values in tr and store in a collection base
Solution 1:
After some fiddling I have derived a regex/VBA solution using
- XMLHTTP to access the site (change
strSite
to suit) - a Regexp to get the required numbers
- a variant array with 20 records to hold, then dump the numbers to the active sheet
Looking at the source HTML to find Regex patterns
The Call options have a common starting and finishing string that delimit the 10 values, but there are three different strings
- Strings 1-4,7-10 for each record match
<td class="ylwbg">
X</td>
- String 6 has a
Style
(and other text) preceding the>
before theX
- String 5 contains a much longer
<a href text
X</a>
A regex of
.Pattern = "(<tdclass=""ylwbg"")(Style.+?){0,1}>(.+?)(<\/td>)"
extracts all the needed strings, but further work is needed later on string 5
The Put options start with <td class="nobg"
so these are happily not extracted by a regex that gets points 1-3
Actual Code
Sub GetTxt()
Dim objXmlHTTP As Object
Dim objRegex As Object
Dim objRegMC As Object
Dim objRegM As Object
Dim strResponse As String
Dim strSite As String
Dim lngCnt As Long
Dim strTemp As String
Dim X(1 To 20, 1 To 10)
X(1, 1) = "OI"
X(1, 2) = "Chng in vol"
X(1, 3) = "Volume"
X(1, 4) = "IV"
X(1, 5) = "LTP"
X(1, 6) = "Net Chg"
X(1, 7) = "Bid Qty"
X(1, 8) = "Bid Price"
X(1, 9) = "Ask Price"
X(1, 10) = "Ask Qnty"
Set objXmlHTTP = CreateObject("MSXML2.XMLHTTP")
strSite = "http://nseindia.com/live_market/dynaContent/live_watch/option_chain/optionDates.jsp?symbol=NIFTY&instrument=OPTIDX&strike=4700.00"
On Error GoTo ErrHandler
With objXmlHTTP
.Open "GET", strSite, False
.Send
If .Status = 200 Then strResponse = .ResponseText
End With
On Error GoTo 0
Set objRegex = CreateObject("vbscript.regexp")
With objRegex
'*cleaning regex* to remove all spaces
.Pattern = "[\xA0\s]+"
.Global = True
strResponse = .Replace(strResponse, vbNullString)
.Pattern = "(<tdclass=""ylwbg"")(Style.+?){0,1}>(.+?)(<\/td>)"
If .Test(strResponse) Then
lngCnt = 20
Set objRegMC = .Execute(strResponse)
For Each objRegM In objRegMC
lngCnt = lngCnt + 1
If Right$(objRegM.submatches(2), 2) <> "a>" Then
X(Int((lngCnt - 1) / 10), IIf(lngCnt Mod 10 > 0, lngCnt Mod 10, 10)) = objRegM.submatches(2)
Else
'Get submatches of the form <a href="/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=NIFTY&instrument=OPTIDX&strike=4700.00&type=CE&expiry=23FEB2012" target="_blank"> 206.40</a>
strTemp = Val(Right(objRegM.submatches(2), Len(objRegM.submatches(2)) - InStrRev(objRegM.submatches(2), """") - 1))
X(Int((lngCnt - 1) / 10), IIf(lngCnt Mod 10 > 0, lngCnt Mod 10, 10)) = strTemp
End If
Next
Else
MsgBox "Parsing unsuccessful", vbCritical
End If
End With
Set objRegex = Nothing
Set objXmlHTTP = Nothing
[a1].Resize(UBound(X, 1), UBound(X, 2)) = X
Exit Sub
ErrHandler:
MsgBox "Site not accessible"
If Not objXmlHTTP Is Nothing Then Set objXmlHTTP = Nothing
End Sub
Post a Comment for "Extract Values From HTML TD And Tr"