The portion of code pasted below attempts to loop through each page of a web search. The button to do so is defined by the html also below. The loop works through only the second page, regardless of the results page count, at which point, I receive a Permission denied (Error 70).
Sub finrascrape()
Dim ie As Object
Dim pgcount, a, i, commaindex As Integer
Dim bname, cc, cnum, pg, nb As IHTMLElement
Dim blist, clist, pagelist, nextb, testtxt, testtxt2 As IHTMLElementCollection
Set ie = CreateObject("internetexplorer.application")
brokersearch = InputBox("ENTER BROKER NAME OR CRD#")
firmsearch = InputBox("ENTER FIRM NAME OR CRD#")
geosearch = InputBox("ENTER ZIP CODE (must be valid zip code, otherwise leave blank)")
With ie
.navigate "https://brokercheck.finra.org/"
.Visible = True
Do While .Busy Or .readyState <> 4
DoEvents
Loop
End With
Set intags = ie.document.getElementsByTagName("input")
For Each gat In intags
If gat.placeholder = "Name or CRD#" Then
gat.Value = brokersearch
Set evt = ie.document.createEvent("keyboardevent")
evt.initEvent "change", True, False
gat.dispatchEvent evt
ElseIf gat.placeholder = "Firm Name or CRD# (optional)" Then
gat.Value = firmsearch
Set evt = ie.document.createEvent("keyboardevent")
evt.initEvent "change", True, False
gat.dispatchEvent evt
ElseIf gat.placeholder = "City, State or Zip (optional)" Then
gat.Value = geosearch
Set evt = ie.document.createEvent("keyboardevent")
evt.initEvent "change", True, False
gat.dispatchEvent evt
Else
End If
Next gat
ie.document.getElementsByClassName("md-raised md-primary md-hue-2 md-button md-ink-ripple").Item.Click
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
Set pagelist = ie.document.getElementsByClassName("bold font-dark-blue ng-binding")
For Each pg In pagelist
If pg.className = "bold font-dark-blue ng-binding" Then
a = pg.innerText
Exit For
Debug.Print a
End If
Next pg
pgcount = WorksheetFunction.RoundUp(a / 12, 0)
Debug.Print pgcount
Sheets("Results").Select
Range("A1").Value = "BROKER NAME"
Range("B1").Value = "BROKER CRD#"
Range("C1").Value = "MAILING CITY"
Range("D1").Value = "MAILING STATE"
Range("E1").Value = "MAILING ZIP"
With Range(Cells(1, 1), Cells(1, Cells(1, Columns.Count).End(xlToLeft).Column))
.Font.Bold = True
.HorizontalAlignment = xlCenter
End With
For i = 1 To pgcount
On Error GoTo reportcomplete
Set blist = ie.document.getElementsByClassName("smaller ng-binding flex")
For Each bname In blist
Sheets("Results").Select
Sheets("Results").Range("A" & Range("A" & Rows.Count).End(xlUp).Row + 1).Value = bname.innerText
Next bname
Set clist = ie.document.getElementsByClassName("smaller")
For Each cnum In clist
Sheets("Process").Select
If cnum.className = "smaller" Then
Sheets("Process").Range("A" & Range("A" & Rows.Count).End(xlUp).Row + 1).Value = cnum.innerText
End If
Next cnum
Sheets("Process").Select
For a = 1 To Sheets("Process").Range("A" & Rows.Count).End(xlUp).Row
If Left(Sheets("Process").Range("A" & a).Value, 4) = "CRD#" Then
Sheets("Results").Select
Sheets("Results").Range("B" & Range("B" & Rows.Count).End(xlUp).Row + 1).Value = Right(Sheets("Process").Range("A" & a).Value, Len(Sheets("Process").Range("A" & a)) - 7)
If InStr(Sheets("Process").Range("A" & a + 1).Value, ",") = 0 Then
Sheets("Process").Range("A" & a + 1).Value = "UNAVAILABLE, NA XXXXX"
End If
ElseIf InStr(Sheets("Process").Range("A" & a).Value, ",") > 0 Then
commaindex = InStr(Sheets("Process").Range("A" & a).Value, ",")
Sheets("Results").Select
Sheets("Results").Range("C" & Sheets("Results").Range("C" & Rows.Count).End(xlUp).Row + 1).Value = Left(Sheets("Process").Range("A" & a).Value, commaindex - 1)
Sheets("Results").Range("D" & Sheets("Results").Range("D" & Rows.Count).End(xlUp).Row + 1).Value = Left(Right(Sheets("Process").Range("A" & a).Value, _
Len(Sheets("Process").Range("A" & a)) - commaindex - 1), 2)
Sheets("Results").Range("E" & Sheets("Results").Range("E" & Rows.Count).End(xlUp).Row + 1).Value = Left(Right(Sheets("Process").Range("A" & a).Value, _
Len(Sheets("Process").Range("A" & a)) - commaindex - 4), 5)
Else
End If
Sheets("Process").Select
Next a
Sheets("Process").Cells.Clear
Set testtxt = ie.document.getElementsByTagName("a")
For Each txt In testtxt
If txt.className = "ng-binding" Then
txt.Click
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
Set testtxt2 = ie.document.getElementsByTagName("a")
For a = pgcount To 2 Step -1
For Each txt2 In testtxt2
If txt2.className = "ng-binding" And txt2.innerText = a & " of " & pgcount & " pages" Then
'Debug.Print a
GoTo end_of_for
End If
Next txt2
Next a
End If
Next txt
end_of_for:
a = pgcount
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
Next i
reportcomplete:
MsgBox "FINRA Web Scrape Complete. Please review."
End Sub
HTML:
<li ng-if="::directionLinks" ng-class="{disabled: noNext()||ngDisabled}" class="pagination-next ng-scope">
›</li>
The URL for the search landing page is https://brokercheck.finra.org. Any help would be much appreciated.
I developed a loop as follows, which successfully navigates the "a" tagged elements, identifies the correct button based on the innerText of the prior element, and loops through each page.
Set testtxt = ie.document.getElementsByTagName("a")
If i < pgcount Then
For d = 1 To testtxt.Length
If testtxt.Item(d).innerText = i & " of " & pgcount & " pages" Then
testtxt.Item(d + 1).Click
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
Exit For
End If
Next d
End If
This is what you can do to traverse all 27 pages and get the broker names.
Sub Get_Content()
Dim ie As New InternetExplorer, html As HTMLDocument
Dim itm As Object, post As Object, posts As Object, elem As Object
With ie
.Visible = True
.navigate "https://brokercheck.finra.org/"
Do Until .readyState = READYSTATE_COMPLETE: Loop
Set html = .document
End With
Set evt = html.createEvent("keyboardevent")
evt.initEvent "change", True, False
For Each itm In html.getElementsByTagName("input")
If InStr(itm.placeholder, "Name or CRD#") > 0 Then
itm.Value = "Michael John"
Exit For
End If
Next itm
itm.dispatchEvent evt
For Each post In html.getElementsByTagName("input")
If InStr(post.placeholder, "Firm Name or CRD# (optional)") > 0 Then
post.Value = "Morgan Stanley"
Exit For
End If
Next post
post.dispatchEvent evt
html.getElementsByClassName("md-button")(0).Click
Do While ie.Busy Or ie.readyState <> 4: DoEvents: Loop
Do
For Each elem In html.getElementsByClassName("smaller ng-binding flex")
x = x + 1: Cells(x, 1) = elem.innerText
Next elem
html.getElementsByClassName("pagination-next")(0).getElementsByTagName("a")(0).Click
Do While ie.Busy Or ie.readyState <> 4: DoEvents: Loop
Loop Until InStr(html.body.innerHTML, " class=""pagination-last ng-scope disabled""") > 0
ie.Quit
End Sub
I'm trying to webscrape multiple tables from a website. So far I have built an excel VBA macro to do this. I also figured out how to get all the data when it is on multiple pages in the website. For instance, if I have 1000 results but 50 are displayed on each page. The problem is that I have the same 5 tables on multiple pages because each table has 1000 results.
My code can only loop through each page for 1 table. I also have written code to grab each table, but I cannot figure out how to do that for each of the 50 search results (each page).
How can I loop through multiple tables and click the next page in the process to capture all the data?
Sub ETFDat()
Dim IE As Object
Dim i As Long
Dim strText As String
Dim jj As Long
Dim hBody As Object
Dim hTR As Object
Dim hTD As Object
Dim tb As Object
Dim bb As Object
Dim Tr As Object
Dim Td As Object
Dim ii As Long
Dim doc As Object
Dim hTable As Object
Dim y As Long
Dim z As Long
Dim wb As Excel.Workbook
Dim ws As Excel.Worksheet
Set wb = Excel.ActiveWorkbook
Set ws = wb.ActiveSheet
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
y = 1 'Column A in Excel
z = 1 'Row 1 in Excel
Sheets("Fund Basics").Activate
Cells.Select
Selection.Clear
IE.navigate "http://www.etf.com/channels/smart-beta-etfs/channels/smart- beta-etfs?qt-tabs=0#qt-tabs" ', , , , "Content-Type: application/x-www-form-urlencoded" & vbCrLf
Do While IE.busy: DoEvents: Loop
Do While IE.ReadyState <> 4: DoEvents: Loop
Set doc = IE.document
Set hTable = doc.getElementsByTagName("table") '.GetElementByID("tablePerformance")
ii = 1
Do While ii <= 17
For Each tb In hTable
Set hBody = tb.getElementsByTagName("tbody")
For Each bb In hBody
Set hTR = bb.getElementsByTagName("tr")
For Each Tr In hTR
Set hTD = Tr.getElementsByTagName("td")
y = 1 ' Resets back to column A
For Each Td In hTD
ws.Cells(z, y).Value = Td.innerText
y = y + 1
Next Td
DoEvents
z = z + 1
Next Tr
Exit For
Next bb
Exit For
Next tb
With doc
Set elems = .getElementsByTagName("a")
For Each e In elems
If (e.getAttribute("id") = "nextPage") Then
e.Click
Exit For
End If
Next e
End With
ii = ii + 1
Application.Wait (Now + TimeValue("00:00:05"))
Loop
MsgBox "Done"
End Sub
There is the example showing how the data could be retrieved from the website using XHRs and JSON parsing, it consists of several steps.
Retrieve the data.
I looked into a little with XHRs using Chrome Developer Tools Network tab.
Most relevant data I found is JSON string returned by GET XHR from http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/50/50/1 after I clicked the next page button:
The response has the following structure for single row item:
[
{
"productId": 576,
"fund": "iShares Russell 1000 Value ETF",
"ticker": "IWD",
"inceptionDate": "2000-05-22",
"launchDate": "2000-05-22",
"hasSegmentReport": "true",
"genericReport": "false",
"hasReport": "true",
"fundsInSegment": 20,
"economicDevelopment": "Developed Markets",
"totalRows": 803,
"fundBasics": {
"issuer": "<a href='/channels/blackrock-etfs' alt='BlackRock'>BlackRock</a>",
"expenseRatio": {
"value": 20
},
"aum": {
"value": 36957230250
},
"spreadPct": {
"value": 0.000094
},
"segment": "Equity: U.S. - Large Cap Value"
},
"performance": {
"priceTrAsOf": "2017-02-27",
"priceTr1Mo": {
"value": 0.031843
},
"priceTr3Mo": {
"value": 0.070156
},
"priceTr1Yr": {
"value": 0.281541
},
"priceTr3YrAnnualized": {
"value": 0.099171
},
"priceTr5YrAnnualized": {
"value": 0.13778
},
"priceTr10YrAnnualized": {
"value": 0.061687
}
},
"analysis": {
"analystPick": null,
"opportunitiesList": null,
"letterGrade": "A",
"efficiencyScore": 97.977103,
"tradabilityScore": 99.260541,
"fitScore": 84.915658,
"leveragedFactor": null,
"exposureReset": null,
"avgDailyDollarVolume": 243848188.037378,
"avgDailyShareVolume": 2148400.688889,
"spread": {
"value": 0.010636
},
"fundClosureRisk": "Low"
},
"fundamentals": {
"dividendYield": {
"value": 0.021543
},
"equity": {
"pe": 27.529645,
"pb": 1.964124
},
"fixedIncome": {
"duration": null,
"creditQuality": null,
"ytm": {
"value": null
}
}
},
"classification": {
"assetClass": "Equity",
"strategy": "Value",
"region": "North America",
"geography": "U.S.",
"category": "Size and Style",
"focus": "Large Cap",
"niche": "Value",
"inverse": "false",
"leveraged": "false",
"etn": "false",
"selectionCriteria": "Multi-Factor",
"weightingScheme": "Multi-Factor",
"activePerSec": "false",
"underlyingIndex": "Russell 1000 Value Index",
"indexProvider": "Russell",
"brand": "iShares"
},
"tax": {
"legalStructure": "Open-Ended Fund",
"maxLtCapitalGainsRate": 20,
"maxStCapitalGainsRate": 39.6,
"taxReporting": "1099"
}
}
]
The property "totalRows": 803 specifies the total rows count. So to make data retrieving as fast as it possible, better to make the request to get the first row. As you can see from the URL, there is ../-aum/50/50/.. tail, which points sorting order, item to start from, and total items to return. Thus to get the only row it should be http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/0/1/1
Parse retrieved JSON, get the total number of rows from totalRows property.
Make another one request to get the entire table.
Parse the entire table JSON, convert it to 2d array and output. You can perform further processing with direct access to the array.
For the table shown below:
The resulting table contains 803 rows and header with columns as follows:
productId
fund
ticker
inceptionDate
launchDate
hasSegmentReport
genericReport
hasReport
fundsInSegment
economicDevelopment
totalRows
fundBasics_issuer
fundBasics_expenseRatio_value
fundBasics_aum_value
fundBasics_spreadPct_value
fundBasics_segment
performance_priceTrAsOf
performance_priceTr1Mo_value
performance_priceTr3Mo_value
performance_priceTr1Yr_value
performance_priceTr3YrAnnualized_value
performance_priceTr5YrAnnualized_value
performance_priceTr10YrAnnualized_value
analysis_analystPick
analysis_opportunitiesList
analysis_letterGrade
analysis_efficiencyScore
analysis_tradabilityScore
analysis_fitScore
analysis_leveragedFactor
analysis_exposureReset
analysis_avgDailyDollarVolume
analysis_avgDailyShareVolume
analysis_spread_value
analysis_fundClosureRisk
fundamentals_dividendYield_value
fundamentals_equity_pe
fundamentals_equity_pb
fundamentals_fixedIncome_duration
fundamentals_fixedIncome_creditQuality
fundamentals_fixedIncome_ytm_value
classification_assetClass
classification_strategy
classification_region
classification_geography
classification_category
classification_focus
classification_niche
classification_inverse
classification_leveraged
classification_etn
classification_selectionCriteria
classification_weightingScheme
classification_activePerSec
classification_underlyingIndex
classification_indexProvider
classification_brand
tax_legalStructure
tax_maxLtCapitalGainsRate
tax_maxStCapitalGainsRate
tax_taxReporting
Put the below code into VBA Project standard module:
Option Explicit
Sub GetData()
Dim sJSONString As String
Dim vJSON As Variant
Dim sState As String
Dim lRowsQty As Long
Dim aData()
Dim aHeader()
' Download and parse the only first row to get total rows qty
sJSONString = GetXHR("http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/0/1/1")
JSON.Parse sJSONString, vJSON, sState
lRowsQty = vJSON(0)("totalRows")
' Download and parse the entire data
sJSONString = GetXHR("http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/0/" & lRowsQty & "/1")
JSON.Parse sJSONString, vJSON, sState
' Convert JSON to 2d array
JSON.ToArray vJSON, aData, aHeader
' Output
With Sheets(1)
.Cells.Delete
OutputArray .Cells(1, 1), aHeader
Output2DArray .Cells(2, 1), aData
.Cells.Columns.AutoFit
End With
End Sub
Function GetXHR(sURL As String) As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", sURL, False
.Send
GetXHR = .responseText
End With
End Function
Sub OutputArray(oDstRng As Range, aCells As Variant)
With oDstRng
.Parent.Select
With .Resize( _
1, _
UBound(aCells) - LBound(aCells) + 1)
.NumberFormat = "#"
.Value = aCells
End With
End With
End Sub
Sub Output2DArray(oDstRng As Range, aCells As Variant)
With oDstRng
.Parent.Select
With .Resize( _
UBound(aCells, 1) - LBound(aCells, 1) + 1, _
UBound(aCells, 2) - LBound(aCells, 2) + 1)
.NumberFormat = "#"
.Value = aCells
End With
End With
End Sub
Create one more standard module, name it JSON and put the below code into it, this code provides JSON processing functionality:
Option Explicit
Private sBuffer As String
Private oTokens As Object
Private oRegEx As Object
Private bMatch As Boolean
Private oChunks As Object
Private oHeader As Object
Private aData() As Variant
Private i As Long
Sub Parse(ByVal sSample As String, vJSON As Variant, sState As String)
' Backus–Naur form JSON parser implementation based on RegEx
' Input:
' sSample - source JSON string
' Output:
' vJson - created object or array to be returned as result
' sState - string Object|Array|Error depending on processing
sBuffer = sSample
Set oTokens = CreateObject("Scripting.Dictionary")
Set oRegEx = CreateObject("VBScript.RegExp")
With oRegEx ' Patterns based on specification http://www.json.org/
.Global = True
.MultiLine = True
.IgnoreCase = True ' Unspecified True, False, Null accepted
.Pattern = "(?:'[^']*'|""(?:\\""|[^""])*"")(?=\s*[,\:\]\}])" ' Double-quoted string, unspecified quoted string
Tokenize "s"
.Pattern = "[+-]?(?:\d+\.\d*|\.\d+|\d+)(?:e[+-]?\d+)?(?=\s*[,\]\}])" ' Number, E notation number
Tokenize "d"
.Pattern = "\b(?:true|false|null)(?=\s*[,\]\}])" ' Constants true, false, null
Tokenize "c"
.Pattern = "\b[A-Za-z_]\w*(?=\s*\:)" ' Unspecified non-double-quoted property name accepted
Tokenize "n"
.Pattern = "\s+"
sBuffer = .Replace(sBuffer, "") ' Remove unnecessary spaces
.MultiLine = False
Do
bMatch = False
.Pattern = "<\d+(?:[sn])>\:<\d+[codas]>" ' Object property structure
Tokenize "p"
.Pattern = "\{(?:<\d+p>(?:,<\d+p>)*)?\}" ' Object structure
Tokenize "o"
.Pattern = "\[(?:<\d+[codas]>(?:,<\d+[codas]>)*)?\]" ' Array structure
Tokenize "a"
Loop While bMatch
.Pattern = "^<\d+[oa]>$" ' Top level object structure, unspecified array accepted
If .Test(sBuffer) And oTokens.Exists(sBuffer) Then
Retrieve sBuffer, vJSON
sState = IIf(IsObject(vJSON), "Object", "Array")
Else
vJSON = Null
sState = "Error"
End If
End With
Set oTokens = Nothing
Set oRegEx = Nothing
End Sub
Private Sub Tokenize(sType)
Dim aContent() As String
Dim lCopyIndex As Long
Dim i As Long
Dim sKey As String
With oRegEx.Execute(sBuffer)
If .Count = 0 Then Exit Sub
ReDim aContent(0 To .Count - 1)
lCopyIndex = 1
For i = 0 To .Count - 1
With .Item(i)
sKey = "<" & oTokens.Count & sType & ">"
oTokens(sKey) = .Value
aContent(i) = Mid(sBuffer, lCopyIndex, .FirstIndex - lCopyIndex + 1) & sKey
lCopyIndex = .FirstIndex + .Length + 1
End With
Next
End With
sBuffer = Join(aContent, "") & Mid(sBuffer, lCopyIndex, Len(sBuffer) - lCopyIndex + 1)
bMatch = True
End Sub
Private Sub Retrieve(sTokenKey, vTransfer)
Dim sTokenValue As String
Dim sName As String
Dim vValue As Variant
Dim aTokens() As String
Dim i As Long
sTokenValue = oTokens(sTokenKey)
With oRegEx
.Global = True
Select Case Left(Right(sTokenKey, 2), 1)
Case "o"
Set vTransfer = CreateObject("Scripting.Dictionary")
aTokens = Split(sTokenValue, "<")
For i = 1 To UBound(aTokens)
Retrieve "<" & Split(aTokens(i), ">", 2)(0) & ">", vTransfer
Next
Case "p"
aTokens = Split(sTokenValue, "<", 4)
Retrieve "<" & Split(aTokens(1), ">", 2)(0) & ">", sName
Retrieve "<" & Split(aTokens(2), ">", 2)(0) & ">", vValue
If IsObject(vValue) Then
Set vTransfer(sName) = vValue
Else
vTransfer(sName) = vValue
End If
Case "a"
aTokens = Split(sTokenValue, "<")
If UBound(aTokens) = 0 Then
vTransfer = Array()
Else
ReDim vTransfer(0 To UBound(aTokens) - 1)
For i = 1 To UBound(aTokens)
Retrieve "<" & Split(aTokens(i), ">", 2)(0) & ">", vValue
If IsObject(vValue) Then
Set vTransfer(i - 1) = vValue
Else
vTransfer(i - 1) = vValue
End If
Next
End If
Case "n"
vTransfer = sTokenValue
Case "s"
vTransfer = Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace( _
Mid(sTokenValue, 2, Len(sTokenValue) - 2), _
"\""", """"), _
"\\", "\"), _
"\/", "/"), _
"\b", Chr(8)), _
"\f", Chr(12)), _
"\n", vbLf), _
"\r", vbCr), _
"\t", vbTab)
.Global = False
.Pattern = "\\u[0-9a-fA-F]{4}"
Do While .Test(vTransfer)
vTransfer = .Replace(vTransfer, ChrW(("&H" & Right(.Execute(vTransfer)(0).Value, 4)) * 1))
Loop
Case "d"
vTransfer = Evaluate(sTokenValue)
Case "c"
Select Case LCase(sTokenValue)
Case "true"
vTransfer = True
Case "false"
vTransfer = False
Case "null"
vTransfer = Null
End Select
End Select
End With
End Sub
Function Serialize(vJSON As Variant) As String
Set oChunks = CreateObject("Scripting.Dictionary")
SerializeElement vJSON, ""
Serialize = Join(oChunks.Items(), "")
Set oChunks = Nothing
End Function
Private Sub SerializeElement(vElement As Variant, ByVal sIndent As String)
Dim aKeys() As Variant
Dim i As Long
With oChunks
Select Case VarType(vElement)
Case vbObject
If vElement.Count = 0 Then
.Item(.Count) = "{}"
Else
.Item(.Count) = "{" & vbCrLf
aKeys = vElement.Keys
For i = 0 To UBound(aKeys)
.Item(.Count) = sIndent & vbTab & """" & aKeys(i) & """" & ": "
SerializeElement vElement(aKeys(i)), sIndent & vbTab
If Not (i = UBound(aKeys)) Then .Item(.Count) = ","
.Item(.Count) = vbCrLf
Next
.Item(.Count) = sIndent & "}"
End If
Case Is >= vbArray
If UBound(vElement) = -1 Then
.Item(.Count) = "[]"
Else
.Item(.Count) = "[" & vbCrLf
For i = 0 To UBound(vElement)
.Item(.Count) = sIndent & vbTab
SerializeElement vElement(i), sIndent & vbTab
If Not (i = UBound(vElement)) Then .Item(.Count) = "," 'sResult = sResult & ","
.Item(.Count) = vbCrLf
Next
.Item(.Count) = sIndent & "]"
End If
Case vbInteger, vbLong
.Item(.Count) = vElement
Case vbSingle, vbDouble
.Item(.Count) = Replace(vElement, ",", ".")
Case vbNull
.Item(.Count) = "null"
Case vbBoolean
.Item(.Count) = IIf(vElement, "true", "false")
Case Else
.Item(.Count) = """" & _
Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(vElement, _
"\", "\\"), _
"""", "\"""), _
"/", "\/"), _
Chr(8), "\b"), _
Chr(12), "\f"), _
vbLf, "\n"), _
vbCr, "\r"), _
vbTab, "\t") & _
""""
End Select
End With
End Sub
Function ToString(vJSON As Variant) As String
Select Case VarType(vJSON)
Case vbObject, Is >= vbArray
Set oChunks = CreateObject("Scripting.Dictionary")
ToStringElement vJSON, ""
oChunks.Remove 0
ToString = Join(oChunks.Items(), "")
Set oChunks = Nothing
Case vbNull
ToString = "Null"
Case vbBoolean
ToString = IIf(vJSON, "True", "False")
Case Else
ToString = CStr(vJSON)
End Select
End Function
Private Sub ToStringElement(vElement As Variant, ByVal sIndent As String)
Dim aKeys() As Variant
Dim i As Long
With oChunks
Select Case VarType(vElement)
Case vbObject
If vElement.Count = 0 Then
.Item(.Count) = "''"
Else
.Item(.Count) = vbCrLf
aKeys = vElement.Keys
For i = 0 To UBound(aKeys)
.Item(.Count) = sIndent & aKeys(i) & ": "
ToStringElement vElement(aKeys(i)), sIndent & vbTab
If Not (i = UBound(aKeys)) Then .Item(.Count) = vbCrLf
Next
End If
Case Is >= vbArray
If UBound(vElement) = -1 Then
.Item(.Count) = "''"
Else
.Item(.Count) = vbCrLf
For i = 0 To UBound(vElement)
.Item(.Count) = sIndent & i & ": "
ToStringElement vElement(i), sIndent & vbTab
If Not (i = UBound(vElement)) Then .Item(.Count) = vbCrLf
Next
End If
Case vbNull
.Item(.Count) = "Null"
Case vbBoolean
.Item(.Count) = IIf(vElement, "True", "False")
Case Else
.Item(.Count) = CStr(vElement)
End Select
End With
End Sub
Sub ToArray(vJSON As Variant, aRows() As Variant, aHeader() As Variant)
' Input:
' vJSON - Array or Object which contains rows data
' Output:
' aData - 2d array representing JSON data
' aHeader - 1d array of property names
Dim sName As Variant
Set oHeader = CreateObject("Scripting.Dictionary")
Select Case VarType(vJSON)
Case vbObject
If vJSON.Count > 0 Then
ReDim aData(0 To vJSON.Count - 1, 0 To 0)
oHeader("#") = 0
i = 0
For Each sName In vJSON
aData(i, 0) = "#" & sName
ToArrayElement vJSON(sName), ""
i = i + 1
Next
Else
ReDim aData(0 To 0, 0 To 0)
End If
Case Is >= vbArray
If UBound(vJSON) >= 0 Then
ReDim aData(0 To UBound(vJSON), 0 To 0)
For i = 0 To UBound(vJSON)
ToArrayElement vJSON(i), ""
Next
Else
ReDim aData(0 To 0, 0 To 0)
End If
Case Else
ReDim aData(0 To 0, 0 To 0)
aData(0, 0) = ToString(vJSON)
End Select
aHeader = oHeader.Keys()
Set oHeader = Nothing
aRows = aData
Erase aData
End Sub
Private Sub ToArrayElement(vElement As Variant, sFieldName As String)
Dim sName As Variant
Dim j As Long
Select Case VarType(vElement)
Case vbObject ' collection of objects
For Each sName In vElement
ToArrayElement vElement(sName), sFieldName & IIf(sFieldName = "", "", "_") & sName
Next
Case Is >= vbArray ' collection of arrays
For j = 0 To UBound(vElement)
ToArrayElement vElement(j), sFieldName & IIf(sFieldName = "", "", "_") & "#" & j
Next
Case Else
If Not oHeader.Exists(sFieldName) Then
oHeader(sFieldName) = oHeader.Count
If UBound(aData, 2) < oHeader.Count - 1 Then ReDim Preserve aData(0 To UBound(aData, 1), 0 To oHeader.Count - 1)
End If
j = oHeader(sFieldName)
aData(i, j) = ToString(vElement)
End Select
End Sub
I tried useful the original question and found little bug that I fixed
The answer to you question is to
1 loop on Doc elements until next page link is found, and you to
2 set/reset the doc var to avoid the Doc lost
Do
Do While IE.Busy: DoEvents: Loop
Do While IE.readyState <> 4: DoEvents: Loop
Set doc = IE.document
....
Loop While nextPageFound 'exit if "next page" not found
when following the next page hyper link, during loop on Doc elements.
And then I added the
3 split of Table Header and Body (if exist) for 1st row field name set
4 RESCUE of cell value from specific class by name cell against simple td.innerText
5 loop on doc for link search until NextPageFound using
e.getAttribute("title") instead id=nextPage
6 use of a SetUp sheet with parameters to customize the script with url and destination data sheet
strUrl = ThisWorkbook.Sheets("Setup").Range("b1").Value
strDestSheet = ThisWorkbook.Sheets("Setup").Range("b2").Value
And here is the VBA function revisited:
Sub ETFDatNew()
Dim IE As Object
Dim i As Long
Dim strText As String
Dim jj As Long
Dim hBody As Object
Dim hTR As Object
Dim hTD As Object
Dim tb As Object
Dim bb As Object
Dim Tr As Object
Dim Td As Object
Dim ii As Long
Dim doc As Object
Dim hTable As Object
Dim y As Long
Dim z As Long
Dim wb As Excel.Workbook
Dim ws As Excel.Worksheet
Set wb = Excel.ActiveWorkbook
Set ws = wb.ActiveSheet
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
y = 1 'Column A in Excel
z = 1 'Row 1 in Excel
'6 I used a SetUp sheet with parameters to customize the script with url and destination data sheet
strUrl = ThisWorkbook.Sheets("Setup").Range("b1").Value
strDestSheet = ThisWorkbook.Sheets("Setup").Range("b2").Value
Sheets(strDestSheet).Activate ' Destination sheet
Cells.Select
Selection.Clear
'IE.navigate "http://halleyweb.com/c058057/mc/mc_p_ricerca.php" ', , , , "Content-Type: application/x-www-form-urlencoded" & vbCrLf
IE.navigate strUrl
ii = 1
nextPageFound = True 'set to init scope var / settato per iniziare il loop
Do '1) loop until nextPageFound instead of '' ii <= 17 Or
'2 set/reset of doc to avoid doc lost after next page link click (see below)
Do While IE.Busy: DoEvents: Loop
Do While IE.readyState <> 4: DoEvents: Loop
Set doc = IE.document
' all tables or single one by Id
Set hTable = doc.getElementsByTagName("table") 'or by .getElementById("table-albo-pretorio")
For Each tb In hTable
'3) splitting header vs body for 1st row field name search
'tHeader
If z = 1 Then
Set hBody = tb.getElementsByTagName("thead")
For Each bb In hBody
Set hTR = bb.getElementsByTagName("tr")
For Each Tr In hTR
Set hTD = Tr.getElementsByTagName("th") 'header th
y = 1 ' Resets back to column A
For Each Th In hTD
ws.Cells(z, y).Value = Th.innerText
y = y + 1
Next Th
DoEvents
z = z + 1
Next Tr
Exit For
Next bb
End If
'tBody
Set hBody = tb.getElementsByTagName("tbody")
For Each bb In hBody
Set hTR = bb.getElementsByTagName("tr")
For Each Tr In hTR
Set hTD = Tr.getElementsByTagName("td")
y = 1 ' Resets back to column A
For Each Td In hTD
'4) RESCUE cell value from td.innerText against specific class name cell
'ws.Cells(z, y).Value = Td.innerText
If CBool(Td.getElementsByClassName("tablesaw-cell-content").Length) Then 'there is at least 1
'use the first
ws.Cells(z, y).Value = Td.getElementsByClassName("tablesaw-cell-content")(0).innerText
End If
y = y + 1 'colonna successiva /next col
Next Td
DoEvents
z = z + 1 'riga successiva /next row
Next Tr
Exit For
Next bb
Exit For
Next tb
'5 looping on doc for link search until NextPageFound using e.getAttribute("title") instead id=nextPage
With doc 'ricerca dei link
Set elems = .getElementsByTagName("a")
nextPageFound = False ' si predispone per concludere nel caso non sia presente una pagina successiva
For Each e In elems
If (e.getAttribute("title") = "Pagina successiva") Then ' alla ricerca di link con title="Pagina successiva" / instead of id=nextPage
e.Click
nextPageFound = True ' trovata pagina successiva /found next page
Exit For
End If
Next e
End With
ii = ii + 1
Application.Wait (Now + TimeValue("00:00:01"))
Loop While nextPageFound ' conclude nel caso non sia stato trovato il link Pagina successiva / exit if not found
IE.Quit
Set IE = Nothing
Application.StatusBar = ""
MsgBox "Estrazione completata" ' completed
End Sub
I got a form, which has 2 buttons, 1 is to set the input data file, 1 is to start a sub, the sub should make a query against the db. But the same simple query "select * from opt_in_customer_record;" return different thing! That's horrible! Why???
Here is my code, btnBrowse_Click() will pop window for user to select file, everytime I will the same file. btnGenData_Click() is the sub which got problem.
for the data file, here is the first 20 lines, Event_Plan_Code is the first column.
5BUDP;HongKong;050111;520010100000800
5BUDP;HongKong;010111;520010100100867
5BUDP;HongKong;130111;520010100182001
3BUDP;HongKong;050111;520010100244746
5BUDP;HongKong;040111;520010100282676
1BUDP;HongKong;110111;520010100310573
1BUDP;HongKong;120111;520010100310573
3BUDP;HongKong;310111;520010100361924
1BUDP;HongKong;310111;520010100392644
1BUDP;HongKong;290111;520010100406914
3BUDP;HongKong;280111;520010100429143
3BUDP;HongKong;190111;520010100440403
3BUDP;HongKong;300111;520010100482444
1BUDP;HongKong;130111;520010100523409
3BUDP;HongKong;210111;520010100576847
5BUDP;HongKong;230111;520010100583232
3BUDP;HongKong;200111;520010100637103
3BUDP;HongKong;160111;520010100639083
3BUDP;HongKong;190111;520010100666157
3BUDP;HongKong;250111;520010100774408
I made the program to stop if the first character of Event_Plan_Code is 1, just to stop the program for debugging. And each time I press the button, different result I got:
1st run:
5BUDP
5BUDP
5BUDP
3BUDP
5BUDP
1BUDP
it make sense.
2nd run:
3BUDP
1BUDP
The problem is that the query should start over again and the result should be the same! Now different result I got.
Thank you very much if you may answer my question!
Option Compare Database
Private Sub btnBrowse_Click()
Dim filePath As String
filePath = LaunchCD(Me)
txtFilePath.Value = filePath
txtStatus.Value = ""
End Sub
Private Sub btnGenData_Click()
'On Error GoTo Error_Handling
Dim extractCdrFlag As Boolean
txtStatus.Value = ""
If IsNull(txtFilePath.Value) Then
MsgBox "Please enter a valid input file location."
Else
txtStatus.Value = ""
txtStatus.Value = txtStatus.Value & "Deleting previous record from table Opt_In_Customer_Record..." & vbCrLf
CurrentDb.Execute "deleteAll"
txtStatus.Value = txtStatus.Value & "Delete successfully." & vbCrLf
If FileExists(txtFilePath.Value) Then
txtStatus.Value = txtStatus.Value & "Trying to import data from file..." & vbCrLf
DoCmd.TransferText acImportDelim, "Import_Specification", "Opt_In_Customer_Record", txtFilePath.Value, False
txtStatus.Value = txtStatus.Value & "Data imported successfully." & vbCrLf
Testing
txtStatus.Value = ""
Else
MsgBox "File does not exist. Please enter again."
End If
End If
Exit Sub
Error_Handling:
MsgBox "Error while generating data! Please check your data setting!"
Exit Sub
End Sub
Sub Testing()
'On Error GoTo Error_Handling
Dim conConnection As New ADODB.Connection
Dim cmdCommand As New ADODB.Command
Dim rstRecordSet As New ADODB.Recordset
Dim eventPlanCode As String
Dim visitedCountry As String
Dim startDateTxt As String
Dim startDate As Date
Dim endDate As Date
Dim imsi As String
Dim currentMonth As String
Dim nextMonth As String
Dim currentYear As String
Dim nextYear As String
Dim temp As Integer
Dim sql As String
'MsgBox CurrentDb.Name
With conConnection
.Provider = "Microsoft.Jet.OLEDB.4.0"
.ConnectionString = CurrentDb.Name
.Open
End With
'MsgBox conConnection.ConnectionString
With cmdCommand
.ActiveConnection = conConnection
.CommandText = "SELECT * FROM Opt_In_Customer_Record;"
.CommandType = adCmdText
End With
With rstRecordSet
.CursorType = adOpenStatic
.CursorLocation = adUseClient
.LockType = adLockOptimistic
.Open cmdCommand
End With
If rstRecordSet.EOF = False Then
rstRecordSet.MoveFirst
Do
'Debug.Print txtStatus.Value
eventPlanCode = rstRecordSet!Event_Plan_Code
visitedCountry = rstRecordSet!Visited_Country
startDateTxt = rstRecordSet!Start_Date
imsi = rstRecordSet!imsi
currentMonth = Mid$(startDateTxt, 3, 2) '01
currentYear = "20" & Mid$(startDateTxt, 5, 2) '2011
startDate = DateSerial(Val(currentYear), Val(currentMonth), Val(Mid$(startDateTxt, 1, 2)))
endDate = startDate + Val(Mid$(eventPlanCode, 1, 1))
MsgBox rstRecordSet!Event_Plan_Code
If (Mid$(eventPlanCode, 1, 1) = "1") Then
Exit Sub
End If
'MsgBox startDate & " " & endDate
If (currentMonth = "01") Then
nextMonth = "02"
ElseIf (currentMonth = "02") Then
nextMonth = "03"
ElseIf (currentMonth = "03") Then
nextMonth = "04"
ElseIf (currentMonth = "04") Then
nextMonth = "05"
ElseIf (currentMonth = "05") Then
nextMonth = "06"
ElseIf (currentMonth = "06") Then
nextMonth = "07"
ElseIf (currentMonth = "07") Then
nextMonth = "08"
ElseIf (currentMonth = "08") Then
nextMonth = "09"
ElseIf (currentMonth = "09") Then
nextMonth = "10"
ElseIf (currentMonth = "10") Then
nextMonth = "11"
ElseIf (currentMonth = "11") Then
nextMonth = "12"
ElseIf (currentMonth = "12") Then
nextMonth = "01"
End If
temp = Val(currentYear)
temp = temp + 1
nextYear = Str(temp)
'MsgBox currentYear & currentMonth & " " & nextYear & nextMonth
'Exit Do
rstRecordSet.MoveNext
Loop Until rstRecordSet.EOF = True
End If
'sql = "select * from ( select * from " & "dbo.inbound_rated_all_" & currentYear & currentMonth & " A inner join Opt_In_Customer_Record B "
conConnection.Close
Set conConnection = Nothing
Set cmdCommand = Nothing
Set rstRecordSet = Nothing
Exit Sub
Error_Handling:
MsgBox "Error during function Testing!"
Set conConnection = Nothing
Set cmdCommand = Nothing
Set rstRecordSet = Nothing
Exit Sub
End Sub
If you want the rows in a particular order, add an ORDER BY clause to your query:
select * from opt_in_customer_record order by event_plan_code
Actually, event_plan_code isn't the right column because it contains duplicates, but that should point you in the right direction.