Nested json object with vba - json

I found this code on the internet that creates a json file from an excel file.
http://www.excelvbamacros.in/2015/01/export-range-in-jason-format.html
This is the code:
Public Sub create_json_file()
Application.Calculation = xlCalculationManual
Application.ScreenUpdating = False
Application.DisplayStatusBar = False
Application.EnableEvents = False
ActiveSheet.DisplayPageBreaks = False
Dim fs As Object
Dim jsonfile
Dim rangetoexport As Range
Dim rowcounter As Long
Dim columncounter As Long
Dim linedata As String
Range("A1").Select
Selection.End(xlDown).Select
Dim lRow As Long
lRow = ActiveCell.Row
Set rangetoexport = Sheets(1).Range("A1:N" & lRow)
Set fs = CreateObject("Scripting.FileSystemObject")
Set jsonfile = fs.CreateTextFile("C:\Users\Desktop\" & "jsondata.txt", True)
linedata = "["
jsonfile.WriteLine linedata
For rowcounter = 2 To rangetoexport.Rows.Count
linedata = ""
For columncounter = 1 To rangetoexport.Columns.Count
linedata = linedata & """" & rangetoexport.Cells(1, columncounter) & """" & ":" & """" & rangetoexport.Cells(rowcounter, columncounter) & """" & ","
Next
linedata = Left(linedata, Len(linedata) - 1)
If rowcounter = rangetoexport.Rows.Count Then
linedata = "{" & linedata & "}"
Else
linedata = "{" & linedata & "},"
End If
jsonfile.WriteLine linedata
Next
linedata = "]"
jsonfile.WriteLine linedata
jsonfile.Close
Set fs = Nothing
Application.Calculation = xlCalculationAutomatic
Application.ScreenUpdating = True
Application.DisplayStatusBar = True
Application.EnableEvents = True
ActiveSheet.DisplayPageBreaks = True
End Sub
It works perfect but my json has to have a nested json object. It needs to look like this:
{
"a": "1234",
"b": 0,
"c": true,
"d": true,
"e": 1,
"f": 24,
"g": null,
"h":
{
"j": 151.70,
"k": 1,
"l": 2,
"m": true
},
"n": null,
"y": true,
"z": -1
}
Code does this:
{
"a": "1234",
"b": 0,
"c": true,
"d": true,
"e": 1,
"f": 24,
"g": null,
"h": ""
"j": 151.70,
"k": 1,
"l": 2,
"m": true
"n": null,
"y": true,
"z": -1
}
a,b,h... these are columns and my example is just one row.
I couldn't add to the code so that it would create the "h": part. Can anyone help me?

Add another loop for sheet2 inside the one for sheet1 .
Option Explicit
Public Sub create_json_file()
Const FILENAME = "jsondata.txt"
Const FOLDER = "C:\Users\Desktop\"
Const q = """"
Dim ar1, ar2, fso, ts
Dim r As Long, c As Long, c2 As Long, lrow As Long
Dim s As String
lrow = Sheets(1).Cells(Rows.Count, 1).End(xlUp).Row
ar1 = Sheets(1).Range("A1:K" & lrow).Value2
ar2 = Sheets(2).Range("A1:D" & lrow).Value2
' build json string
s = "[{" & vbCrLf
For r = 2 To UBound(ar1)
If r > 2 Then s = s & ",{" & vbCrLf
For c = 1 To UBound(ar1, 2)
If c > 1 Then s = s & "," & vbCrLf
s = s & q & ar1(1, c) & q & ":"
If ar1(1, c) = "h" Then
s = s & "{" & vbCrLf
For c2 = 1 To UBound(ar2, 2)
If c2 > 1 Then s = s & ","
s = s & q & ar2(1, c2) & q & ":" _
& q & ar2(r, c2) & q
Next
s = s & "}"
Else
s = s & q & ar1(r, c) & q
End If
Next
s = s & "}" & vbCrLf
Next
s = s & "]"
' write out
Set fso = CreateObject("Scripting.FileSystemObject")
Set ts = fso.CreateTextFile(FOLDER & FILENAME, True)
ts.Write s
MsgBox lrow - 1 & " rows exported to " & FOLDER & FILENAME, vbInformation
End Sub

Related

Export Excel Selected Range to Json

I am trying to export a selected range to json and repeatedly run into the same problems receiving error messages "Sub or Function not defined".
Sub PrintJson()
Dim fs As Object
Dim jsonfile
Dim rangetoexport As Range
Dim rowcounter As Long
Dim columncounter As Long
Dim linedata As String
Dim path As String
Dim fname As String
Set rangetoexport = Sheets("Marketing").Range("C6", Range("C6").End(xlDown).End(xlToRight))
path = ThisWorkbook.path & "\"
fname = "export.json"
Set fs = CreateObject("Scripting.FileSystemObject")
Set jsonfile = fs.CreateTextFile(path & fname, True)
linedata = "{""Output"": ["
jsonfile.WriteLine linedata
For rowcounter = 2 To rangetoexport.Rows.Count
linedata = ""
For columncounter = 1 To rangetoexport.Columns.Count
linedata = linedata & """" & Replace(rangetoexport.Cells(1, columncounter), """", "\""") & """" & ":" & """" _
& Replace(rangetoexport.Cells(rowcounter, columncounter), """", "\""") & """" & ","
Next
linedata = Left(linedata, Len(linedata) - 1)
If rowcounter = rangetoexport.Rows.Count Then
linedata = "{" & linedata & "}"
Else
linedata = "{" & linedata & "},"
End If
jsonfile.WriteLine linedata
Next
linedata = "]}"
jsonfile.WriteLine linedata
jsonfile.Close
Set fs = Nothing
End Sub
I am really new to VBA and just learning. But this confuses me, so any help appreciated.

Convert Excel Sheet (nested) to JSON

I am giving a spreedsheet and I need to convert into JSON.
I have the following spreadsheet as so:
In essence, I'd need to convert into like this:
{ "CompanyA": {
"Products": ["Beds", "Knifes", "Spoons"]
}, "CompanyB": {
"Products": ["Beds", "Knifes", "Spoons"],
"Sites": ["West Coast", "East Coast"]
}, "CompanyC": {
"Office": ["Los Angeles"]
}}
I tried looking at online sources, but I haven't got a good solution to what I am looking for
Here's some basic code which should point you to the right direction.
I have commented it as much as possible.
Sub GetJSONOutput()
Dim wks As Worksheet: Set wks = ActiveSheet
Dim lngLastRow As Long, i As Long, j As Long, k As Long
Dim blFirstRow As Boolean
Dim strOut As String
lngLastRow = wks.Cells.Find("*", wks.Cells(1, 1), , , , xlPrevious).Row
k = 1
For i = 1 To lngLastRow
'\\ First Element - Column A
'\\ Check for first line and build beginning style
If Len(wks.Cells(i, 1).Value) > 0 Then
If blFirstRow = False Then
strOut = "{ """ & wks.Cells(i, 1).Value & """: {"
blFirstRow = True
Else '\\ Rest follow the same style
strOut = "}, """ & wks.Cells(i, 1).Value & """: {"
End If
wks.Cells(k, 4).Value = strOut: k = k + 1 '--> Output Column D
End If
'\\ Middle element - Column B
If Len(wks.Cells(i, 2).Value) > 0 Then strbase = " """ & wks.Cells(i, 2).Value & """: ["
If Len(wks.Cells(i, 3).Value) > 0 Then
'\\ Now we have Middle element then we need to loop through all elements under it!
'\\ Last Element - Column C
If Len(wks.Cells(i + 1, 3).Value) > 0 Then
strAppend = ""
For j = i To wks.Cells(i, 3).End(xlDown).Row
strAppend = strAppend & "|" & wks.Cells(j, 3).Value
Next j
strOut = strbase & """" & Replace(Mid(strAppend, 2, Len(strAppend)), "|", Chr(34) & ", " & Chr(34)) & """]"
wks.Cells(k, 4).Value = strOut: k = k + 1 '--> Output Column D
i = j - 1
Else
strOut = strbase & """" & wks.Cells(i, 3).Value & """]"
wks.Cells(k, 4).Value = strOut: k = k + 1 '--> Output Column D
End If
End If
'\\ Complete output by outputting the last closing brackets
If i = lngLastRow Then
strOut = "}}"
wks.Cells(k, 4).Value = strOut '--> Output Column D
End If
Next i
End Sub
Sub ConvertToJSONText()
Dim Sht As Worksheet
Set Sht = Worksheets("Sheet1")
Dim a As Integer
Dim lstA
Dim lstB
Dim lstC
a = 0
Dim myJsonText
myJsonText = "{"
Do While True
a = a + 1
If Sht.Range("a" & a).Value = "" And Sht.Range("b" & a).Value = "" And Sht.Range("c" & a).Value = "" Then
Exit Do
End If
If Sht.Range("a" & a).Value <> "" And Sht.Range("b" & a).Value = "" And Sht.Range("c" & a).Value = "" Then
If lstB <> "" Then myJsonText = myJsonText & "]"
If lstA <> "" Then myJsonText = myJsonText & "},"
lstA = Sht.Range("a" & a).Value
lstB = ""
lstC = ""
myJsonText = myJsonText & """" & lstA & """: {"
End If
If Sht.Range("a" & a).Value = "" And Sht.Range("b" & a).Value <> "" And Sht.Range("c" & a).Value = "" Then
If lstB <> "" Then myJsonText = myJsonText & "]"
lstB = Sht.Range("B" & a).Value
lstC = ""
myJsonText = myJsonText & """" & lstB & """: ["
End If
If Sht.Range("a" & a).Value = "" And Sht.Range("b" & a).Value = "" And Sht.Range("c" & a).Value <> "" Then
If lstC <> "" Then myJsonText = myJsonText & ","
lstC = Sht.Range("C" & a).Value
myJsonText = myJsonText & """" & lstC & """"
End If
Loop
If lstB <> "" Then myJsonText = myJsonText & "]"
myJsonText = myJsonText & "}"
End Sub

Export cell range to JSON Data

I am using this procedure to export a cell range to JSON.
Sub export_json(mysheet As Worksheet, myrange As String)
Dim fs As Object
Dim jsonfile
Dim rangetoexport As Range
Dim rowcounter As Long
Dim columncounter As Long
Dim linedata As String
Dim path As String
Dim fname As String
Set rangetoexport = mysheet.Range(myrange)
path = ThisWorkbook.path & "\"
fname = clean_filename(myrange, "") & ".json"
Set fs = CreateObject("Scripting.FileSystemObject")
Set jsonfile = fs.CreateTextFile(path & fname, True)
linedata = "{""Output"": ["
jsonfile.WriteLine linedata
For rowcounter = 2 To rangetoexport.Rows.Count
linedata = ""
For columncounter = 1 To rangetoexport.Columns.Count
linedata = linedata & """" & rangetoexport.Cells(1, columncounter) & """" & ":" & """" & rangetoexport.Cells(rowcounter, columncounter) & """" & ","
Next
linedata = Left(linedata, Len(linedata) - 1)
If rowcounter = rangetoexport.Rows.Count Then
linedata = "{" & linedata & "}"
Else
linedata = "{" & linedata & "},"
End If
jsonfile.WriteLine linedata
Next
linedata = "]}"
jsonfile.WriteLine linedata
jsonfile.Close
Set fs = Nothing
End Sub
However, if a cell value contains double quotes ", the output of JSON file becomes corrupt. Any ideas on how to fix this?
You need to escape the double quotes with a slash.
For columncounter = 1 To rangetoexport.Columns.Count
linedata = linedata & """" & Replace(rangetoexport.Cells(1, columncounter), """", "\""") & """" & ":" & """" _
& Replace(rangetoexport.Cells(rowcounter, columncounter), """", "\""") & """" & ","
Next
This answer give more details about escaping json. https://stackoverflow.com/a/19176131/7182460

Loading only HTML without any style and scripts

I have one VBA script, that find some data on web (csfd.cz) and put into Excel. But it is quite slow because it takes some time to load the website (in IE) and then extract data. My thought is how to make that IE only loads HTML and it doesn't load any graphics and functional things - only pure HTML. Is it somehow possible? Thanks for help...
here is my code:
Sub InputData()
Dim cursor As String
Dim i As Long
Dim ie As Object
Dim lastRow As Long
Dim releasesLength As Long
Dim releases As Object
Dim oneRelease As Object
Dim datumKino As String
Dim datumDVD As String
Dim origins As String
Dim year As Long
Dim time As Long
Dim name As String
Dim genreLong As String
Dim genre As String
'zapamatování kurzoru
cursor = ActiveCell.Address
'zjištění posledního řádku
With ActiveSheet
lastRow = .Cells(.Rows.Count, "L").End(xlUp).Row
End With
'první viditelná buňka
Range("L2").Select
ActiveCell.Offset(1, 0).Select
Do Until ActiveCell.Height <> 0
ActiveCell.Offset(1, 0).Select
Loop
'inicializace
Set ie = CreateObject("InternetExplorer.Application")
'ZAČÁTEK SMYČKY--------------------------------------------------------------
For i = ActiveCell.Row To lastRow
Cells(i, 12).Select
'resetování proměných
releasesLength = vbNullLong
Set releases = Nothing
Set oneRelease = Nothing
datumKino = ""
datumDVD = ""
origins = ""
year = vbNullLong
time = vbNullLong
name = ""
genreLong = ""
genre = ""
'vyřazení
If (InStr(Cells(i, 12).Value, "csfd.cz") = 0 Or ActiveCell.Height = 0) Then
GoTo NextRow
End If
'otevření stránky
ie.Visible = False
ie.navigate Cells(i, 12).Value
Application.StatusBar = "Načítám údaje. Prosím počkejte..."
Do While ie.busy
Application.Wait DateAdd("s", 1, Now)
Loop
'úprava procent a datumů
Cells(i, 9).Value = ie.document.getElementById("rating").Children(0).innerText
releasesLength = ie.document.getElementById("releases").getElementsByClassName("content")(0).getElementsByTagName("table")(0).getElementsByTagName("tbody")(0).Children.Length
Set releases = ie.document.getElementById("releases").getElementsByClassName("content")(0).getElementsByTagName("table")(0).getElementsByTagName("tbody")(0).Children
For j = 0 To releasesLength - 1
Set oneRelease = releases(j)
If (oneRelease.getElementsByTagName("th")(0).getElementsByTagName("img")(0).getAttribute("title") = "Česko") Then
If (InStr(oneRelease.getElementsByTagName("th")(0).innerHTML, "V kinech")) Then
If (datumKino = "") Then
datumKino = Left(Replace(Replace(Replace(oneRelease.getElementsByTagName("td")(0).innerHTML, " ", ""), vbLf, ""), vbTab, ""), 10)
End If
ElseIf (InStr(oneRelease.getElementsByTagName("th")(0).innerHTML, "Na DVD")) Then
If (datumDVD = "") Then
datumDVD = Left(Replace(Replace(Replace(oneRelease.getElementsByTagName("td")(0).innerHTML, " ", ""), vbLf, ""), vbTab, ""), 10)
End If
ElseIf (InStr(oneRelease.getElementsByTagName("th")(0).innerHTML, "Na Blu-ray")) Then
If (datumDVD = "") Then
datumDVD = Left(Replace(Replace(Replace(oneRelease.getElementsByTagName("td")(0).innerHTML, " ", ""), vbLf, ""), vbTab, ""), 10)
End If
End If
Else
GoTo NextRelease
End If
NextRelease:
Next j
If (Len(datumKino) <> 0) Then
Cells(i, 1).Value = datumKino
End If
If (Len(datumDVD) <> 0) Then
Cells(i, 2).Value = datumDVD
End If
'1. první zápis do řádku
If (Cells(i, 4).Value = "") Then
year = ie.document.getElementsByClassName("origin")(0).getElementsByTagName("span")(0).innerHTML
Cells(i, 4).Value = year
origin = ie.document.getElementsByClassName("origin")(0).innerHTML
originSplit = Split(origin, ",")
time = Replace(originSplit(UBound(originSplit)), " min", "")
Cells(i, 10).Value = time
name = Replace(Replace(ie.document.getElementsByClassName("info")(0).getElementsByClassName("header")(0).getElementsByTagName("h1")(0).innerHTML, vbLf, ""), vbTab, "")
Cells(i, 3).Value = name
genreLong = ie.document.getElementsByClassName("genre")(0).innerHTML
genre = Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(genreLong, " ", ""), "Akční", "Akč."), "Animovaný", "Anim."), "Dobrodružný", "Dobr."), "Dokumentární", "Dokument."), "Fantasy", "Fant."), "Historický", "Histor."), "Katastrofický", "Katastrof."), "Komedie", "Kom."), "Mysteriózní", "Myster."), "Rodinný", "Rod."), "Romantický", "Romant."), "Thriller", "Thril."), "Životopisný", "Životopis.")
Cells(i, 5).Value = genre
End If
NextRow:
Next i
'KONEC SMYČKY----------------------------------------------------------------
'Clean
ie.Quit
Set ie = Nothing
Set releases = Nothing
Set oneRelease = Nothing
Application.StatusBar = ""
Range(cursor).Select
End Sub

Loop through each table on javascrape webpage with VBA macro

I'm trying to webscrape multiple tables from a website. So far I have built an excel VBA macro to do this. I also figured out how to get all the data when it is on multiple pages in the website. For instance, if I have 1000 results but 50 are displayed on each page. The problem is that I have the same 5 tables on multiple pages because each table has 1000 results.
My code can only loop through each page for 1 table. I also have written code to grab each table, but I cannot figure out how to do that for each of the 50 search results (each page).
How can I loop through multiple tables and click the next page in the process to capture all the data?
Sub ETFDat()
Dim IE As Object
Dim i As Long
Dim strText As String
Dim jj As Long
Dim hBody As Object
Dim hTR As Object
Dim hTD As Object
Dim tb As Object
Dim bb As Object
Dim Tr As Object
Dim Td As Object
Dim ii As Long
Dim doc As Object
Dim hTable As Object
Dim y As Long
Dim z As Long
Dim wb As Excel.Workbook
Dim ws As Excel.Worksheet
Set wb = Excel.ActiveWorkbook
Set ws = wb.ActiveSheet
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
y = 1 'Column A in Excel
z = 1 'Row 1 in Excel
Sheets("Fund Basics").Activate
Cells.Select
Selection.Clear
IE.navigate "http://www.etf.com/channels/smart-beta-etfs/channels/smart- beta-etfs?qt-tabs=0#qt-tabs" ', , , , "Content-Type: application/x-www-form-urlencoded" & vbCrLf
Do While IE.busy: DoEvents: Loop
Do While IE.ReadyState <> 4: DoEvents: Loop
Set doc = IE.document
Set hTable = doc.getElementsByTagName("table") '.GetElementByID("tablePerformance")
ii = 1
Do While ii <= 17
For Each tb In hTable
Set hBody = tb.getElementsByTagName("tbody")
For Each bb In hBody
Set hTR = bb.getElementsByTagName("tr")
For Each Tr In hTR
Set hTD = Tr.getElementsByTagName("td")
y = 1 ' Resets back to column A
For Each Td In hTD
ws.Cells(z, y).Value = Td.innerText
y = y + 1
Next Td
DoEvents
z = z + 1
Next Tr
Exit For
Next bb
Exit For
Next tb
With doc
Set elems = .getElementsByTagName("a")
For Each e In elems
If (e.getAttribute("id") = "nextPage") Then
e.Click
Exit For
End If
Next e
End With
ii = ii + 1
Application.Wait (Now + TimeValue("00:00:05"))
Loop
MsgBox "Done"
End Sub
There is the example showing how the data could be retrieved from the website using XHRs and JSON parsing, it consists of several steps.
Retrieve the data.
I looked into a little with XHRs using Chrome Developer Tools Network tab.
Most relevant data I found is JSON string returned by GET XHR from http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/50/50/1 after I clicked the next page button:
The response has the following structure for single row item:
[
{
"productId": 576,
"fund": "iShares Russell 1000 Value ETF",
"ticker": "IWD",
"inceptionDate": "2000-05-22",
"launchDate": "2000-05-22",
"hasSegmentReport": "true",
"genericReport": "false",
"hasReport": "true",
"fundsInSegment": 20,
"economicDevelopment": "Developed Markets",
"totalRows": 803,
"fundBasics": {
"issuer": "<a href='/channels/blackrock-etfs' alt='BlackRock'>BlackRock</a>",
"expenseRatio": {
"value": 20
},
"aum": {
"value": 36957230250
},
"spreadPct": {
"value": 0.000094
},
"segment": "Equity: U.S. - Large Cap Value"
},
"performance": {
"priceTrAsOf": "2017-02-27",
"priceTr1Mo": {
"value": 0.031843
},
"priceTr3Mo": {
"value": 0.070156
},
"priceTr1Yr": {
"value": 0.281541
},
"priceTr3YrAnnualized": {
"value": 0.099171
},
"priceTr5YrAnnualized": {
"value": 0.13778
},
"priceTr10YrAnnualized": {
"value": 0.061687
}
},
"analysis": {
"analystPick": null,
"opportunitiesList": null,
"letterGrade": "A",
"efficiencyScore": 97.977103,
"tradabilityScore": 99.260541,
"fitScore": 84.915658,
"leveragedFactor": null,
"exposureReset": null,
"avgDailyDollarVolume": 243848188.037378,
"avgDailyShareVolume": 2148400.688889,
"spread": {
"value": 0.010636
},
"fundClosureRisk": "Low"
},
"fundamentals": {
"dividendYield": {
"value": 0.021543
},
"equity": {
"pe": 27.529645,
"pb": 1.964124
},
"fixedIncome": {
"duration": null,
"creditQuality": null,
"ytm": {
"value": null
}
}
},
"classification": {
"assetClass": "Equity",
"strategy": "Value",
"region": "North America",
"geography": "U.S.",
"category": "Size and Style",
"focus": "Large Cap",
"niche": "Value",
"inverse": "false",
"leveraged": "false",
"etn": "false",
"selectionCriteria": "Multi-Factor",
"weightingScheme": "Multi-Factor",
"activePerSec": "false",
"underlyingIndex": "Russell 1000 Value Index",
"indexProvider": "Russell",
"brand": "iShares"
},
"tax": {
"legalStructure": "Open-Ended Fund",
"maxLtCapitalGainsRate": 20,
"maxStCapitalGainsRate": 39.6,
"taxReporting": "1099"
}
}
]
The property "totalRows": 803 specifies the total rows count. So to make data retrieving as fast as it possible, better to make the request to get the first row. As you can see from the URL, there is ../-aum/50/50/.. tail, which points sorting order, item to start from, and total items to return. Thus to get the only row it should be http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/0/1/1
Parse retrieved JSON, get the total number of rows from totalRows property.
Make another one request to get the entire table.
Parse the entire table JSON, convert it to 2d array and output. You can perform further processing with direct access to the array.
For the table shown below:
The resulting table contains 803 rows and header with columns as follows:
productId
fund
ticker
inceptionDate
launchDate
hasSegmentReport
genericReport
hasReport
fundsInSegment
economicDevelopment
totalRows
fundBasics_issuer
fundBasics_expenseRatio_value
fundBasics_aum_value
fundBasics_spreadPct_value
fundBasics_segment
performance_priceTrAsOf
performance_priceTr1Mo_value
performance_priceTr3Mo_value
performance_priceTr1Yr_value
performance_priceTr3YrAnnualized_value
performance_priceTr5YrAnnualized_value
performance_priceTr10YrAnnualized_value
analysis_analystPick
analysis_opportunitiesList
analysis_letterGrade
analysis_efficiencyScore
analysis_tradabilityScore
analysis_fitScore
analysis_leveragedFactor
analysis_exposureReset
analysis_avgDailyDollarVolume
analysis_avgDailyShareVolume
analysis_spread_value
analysis_fundClosureRisk
fundamentals_dividendYield_value
fundamentals_equity_pe
fundamentals_equity_pb
fundamentals_fixedIncome_duration
fundamentals_fixedIncome_creditQuality
fundamentals_fixedIncome_ytm_value
classification_assetClass
classification_strategy
classification_region
classification_geography
classification_category
classification_focus
classification_niche
classification_inverse
classification_leveraged
classification_etn
classification_selectionCriteria
classification_weightingScheme
classification_activePerSec
classification_underlyingIndex
classification_indexProvider
classification_brand
tax_legalStructure
tax_maxLtCapitalGainsRate
tax_maxStCapitalGainsRate
tax_taxReporting
Put the below code into VBA Project standard module:
Option Explicit
Sub GetData()
Dim sJSONString As String
Dim vJSON As Variant
Dim sState As String
Dim lRowsQty As Long
Dim aData()
Dim aHeader()
' Download and parse the only first row to get total rows qty
sJSONString = GetXHR("http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/0/1/1")
JSON.Parse sJSONString, vJSON, sState
lRowsQty = vJSON(0)("totalRows")
' Download and parse the entire data
sJSONString = GetXHR("http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/0/" & lRowsQty & "/1")
JSON.Parse sJSONString, vJSON, sState
' Convert JSON to 2d array
JSON.ToArray vJSON, aData, aHeader
' Output
With Sheets(1)
.Cells.Delete
OutputArray .Cells(1, 1), aHeader
Output2DArray .Cells(2, 1), aData
.Cells.Columns.AutoFit
End With
End Sub
Function GetXHR(sURL As String) As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", sURL, False
.Send
GetXHR = .responseText
End With
End Function
Sub OutputArray(oDstRng As Range, aCells As Variant)
With oDstRng
.Parent.Select
With .Resize( _
1, _
UBound(aCells) - LBound(aCells) + 1)
.NumberFormat = "#"
.Value = aCells
End With
End With
End Sub
Sub Output2DArray(oDstRng As Range, aCells As Variant)
With oDstRng
.Parent.Select
With .Resize( _
UBound(aCells, 1) - LBound(aCells, 1) + 1, _
UBound(aCells, 2) - LBound(aCells, 2) + 1)
.NumberFormat = "#"
.Value = aCells
End With
End With
End Sub
Create one more standard module, name it JSON and put the below code into it, this code provides JSON processing functionality:
Option Explicit
Private sBuffer As String
Private oTokens As Object
Private oRegEx As Object
Private bMatch As Boolean
Private oChunks As Object
Private oHeader As Object
Private aData() As Variant
Private i As Long
Sub Parse(ByVal sSample As String, vJSON As Variant, sState As String)
' Backus–Naur form JSON parser implementation based on RegEx
' Input:
' sSample - source JSON string
' Output:
' vJson - created object or array to be returned as result
' sState - string Object|Array|Error depending on processing
sBuffer = sSample
Set oTokens = CreateObject("Scripting.Dictionary")
Set oRegEx = CreateObject("VBScript.RegExp")
With oRegEx ' Patterns based on specification http://www.json.org/
.Global = True
.MultiLine = True
.IgnoreCase = True ' Unspecified True, False, Null accepted
.Pattern = "(?:'[^']*'|""(?:\\""|[^""])*"")(?=\s*[,\:\]\}])" ' Double-quoted string, unspecified quoted string
Tokenize "s"
.Pattern = "[+-]?(?:\d+\.\d*|\.\d+|\d+)(?:e[+-]?\d+)?(?=\s*[,\]\}])" ' Number, E notation number
Tokenize "d"
.Pattern = "\b(?:true|false|null)(?=\s*[,\]\}])" ' Constants true, false, null
Tokenize "c"
.Pattern = "\b[A-Za-z_]\w*(?=\s*\:)" ' Unspecified non-double-quoted property name accepted
Tokenize "n"
.Pattern = "\s+"
sBuffer = .Replace(sBuffer, "") ' Remove unnecessary spaces
.MultiLine = False
Do
bMatch = False
.Pattern = "<\d+(?:[sn])>\:<\d+[codas]>" ' Object property structure
Tokenize "p"
.Pattern = "\{(?:<\d+p>(?:,<\d+p>)*)?\}" ' Object structure
Tokenize "o"
.Pattern = "\[(?:<\d+[codas]>(?:,<\d+[codas]>)*)?\]" ' Array structure
Tokenize "a"
Loop While bMatch
.Pattern = "^<\d+[oa]>$" ' Top level object structure, unspecified array accepted
If .Test(sBuffer) And oTokens.Exists(sBuffer) Then
Retrieve sBuffer, vJSON
sState = IIf(IsObject(vJSON), "Object", "Array")
Else
vJSON = Null
sState = "Error"
End If
End With
Set oTokens = Nothing
Set oRegEx = Nothing
End Sub
Private Sub Tokenize(sType)
Dim aContent() As String
Dim lCopyIndex As Long
Dim i As Long
Dim sKey As String
With oRegEx.Execute(sBuffer)
If .Count = 0 Then Exit Sub
ReDim aContent(0 To .Count - 1)
lCopyIndex = 1
For i = 0 To .Count - 1
With .Item(i)
sKey = "<" & oTokens.Count & sType & ">"
oTokens(sKey) = .Value
aContent(i) = Mid(sBuffer, lCopyIndex, .FirstIndex - lCopyIndex + 1) & sKey
lCopyIndex = .FirstIndex + .Length + 1
End With
Next
End With
sBuffer = Join(aContent, "") & Mid(sBuffer, lCopyIndex, Len(sBuffer) - lCopyIndex + 1)
bMatch = True
End Sub
Private Sub Retrieve(sTokenKey, vTransfer)
Dim sTokenValue As String
Dim sName As String
Dim vValue As Variant
Dim aTokens() As String
Dim i As Long
sTokenValue = oTokens(sTokenKey)
With oRegEx
.Global = True
Select Case Left(Right(sTokenKey, 2), 1)
Case "o"
Set vTransfer = CreateObject("Scripting.Dictionary")
aTokens = Split(sTokenValue, "<")
For i = 1 To UBound(aTokens)
Retrieve "<" & Split(aTokens(i), ">", 2)(0) & ">", vTransfer
Next
Case "p"
aTokens = Split(sTokenValue, "<", 4)
Retrieve "<" & Split(aTokens(1), ">", 2)(0) & ">", sName
Retrieve "<" & Split(aTokens(2), ">", 2)(0) & ">", vValue
If IsObject(vValue) Then
Set vTransfer(sName) = vValue
Else
vTransfer(sName) = vValue
End If
Case "a"
aTokens = Split(sTokenValue, "<")
If UBound(aTokens) = 0 Then
vTransfer = Array()
Else
ReDim vTransfer(0 To UBound(aTokens) - 1)
For i = 1 To UBound(aTokens)
Retrieve "<" & Split(aTokens(i), ">", 2)(0) & ">", vValue
If IsObject(vValue) Then
Set vTransfer(i - 1) = vValue
Else
vTransfer(i - 1) = vValue
End If
Next
End If
Case "n"
vTransfer = sTokenValue
Case "s"
vTransfer = Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace( _
Mid(sTokenValue, 2, Len(sTokenValue) - 2), _
"\""", """"), _
"\\", "\"), _
"\/", "/"), _
"\b", Chr(8)), _
"\f", Chr(12)), _
"\n", vbLf), _
"\r", vbCr), _
"\t", vbTab)
.Global = False
.Pattern = "\\u[0-9a-fA-F]{4}"
Do While .Test(vTransfer)
vTransfer = .Replace(vTransfer, ChrW(("&H" & Right(.Execute(vTransfer)(0).Value, 4)) * 1))
Loop
Case "d"
vTransfer = Evaluate(sTokenValue)
Case "c"
Select Case LCase(sTokenValue)
Case "true"
vTransfer = True
Case "false"
vTransfer = False
Case "null"
vTransfer = Null
End Select
End Select
End With
End Sub
Function Serialize(vJSON As Variant) As String
Set oChunks = CreateObject("Scripting.Dictionary")
SerializeElement vJSON, ""
Serialize = Join(oChunks.Items(), "")
Set oChunks = Nothing
End Function
Private Sub SerializeElement(vElement As Variant, ByVal sIndent As String)
Dim aKeys() As Variant
Dim i As Long
With oChunks
Select Case VarType(vElement)
Case vbObject
If vElement.Count = 0 Then
.Item(.Count) = "{}"
Else
.Item(.Count) = "{" & vbCrLf
aKeys = vElement.Keys
For i = 0 To UBound(aKeys)
.Item(.Count) = sIndent & vbTab & """" & aKeys(i) & """" & ": "
SerializeElement vElement(aKeys(i)), sIndent & vbTab
If Not (i = UBound(aKeys)) Then .Item(.Count) = ","
.Item(.Count) = vbCrLf
Next
.Item(.Count) = sIndent & "}"
End If
Case Is >= vbArray
If UBound(vElement) = -1 Then
.Item(.Count) = "[]"
Else
.Item(.Count) = "[" & vbCrLf
For i = 0 To UBound(vElement)
.Item(.Count) = sIndent & vbTab
SerializeElement vElement(i), sIndent & vbTab
If Not (i = UBound(vElement)) Then .Item(.Count) = "," 'sResult = sResult & ","
.Item(.Count) = vbCrLf
Next
.Item(.Count) = sIndent & "]"
End If
Case vbInteger, vbLong
.Item(.Count) = vElement
Case vbSingle, vbDouble
.Item(.Count) = Replace(vElement, ",", ".")
Case vbNull
.Item(.Count) = "null"
Case vbBoolean
.Item(.Count) = IIf(vElement, "true", "false")
Case Else
.Item(.Count) = """" & _
Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(vElement, _
"\", "\\"), _
"""", "\"""), _
"/", "\/"), _
Chr(8), "\b"), _
Chr(12), "\f"), _
vbLf, "\n"), _
vbCr, "\r"), _
vbTab, "\t") & _
""""
End Select
End With
End Sub
Function ToString(vJSON As Variant) As String
Select Case VarType(vJSON)
Case vbObject, Is >= vbArray
Set oChunks = CreateObject("Scripting.Dictionary")
ToStringElement vJSON, ""
oChunks.Remove 0
ToString = Join(oChunks.Items(), "")
Set oChunks = Nothing
Case vbNull
ToString = "Null"
Case vbBoolean
ToString = IIf(vJSON, "True", "False")
Case Else
ToString = CStr(vJSON)
End Select
End Function
Private Sub ToStringElement(vElement As Variant, ByVal sIndent As String)
Dim aKeys() As Variant
Dim i As Long
With oChunks
Select Case VarType(vElement)
Case vbObject
If vElement.Count = 0 Then
.Item(.Count) = "''"
Else
.Item(.Count) = vbCrLf
aKeys = vElement.Keys
For i = 0 To UBound(aKeys)
.Item(.Count) = sIndent & aKeys(i) & ": "
ToStringElement vElement(aKeys(i)), sIndent & vbTab
If Not (i = UBound(aKeys)) Then .Item(.Count) = vbCrLf
Next
End If
Case Is >= vbArray
If UBound(vElement) = -1 Then
.Item(.Count) = "''"
Else
.Item(.Count) = vbCrLf
For i = 0 To UBound(vElement)
.Item(.Count) = sIndent & i & ": "
ToStringElement vElement(i), sIndent & vbTab
If Not (i = UBound(vElement)) Then .Item(.Count) = vbCrLf
Next
End If
Case vbNull
.Item(.Count) = "Null"
Case vbBoolean
.Item(.Count) = IIf(vElement, "True", "False")
Case Else
.Item(.Count) = CStr(vElement)
End Select
End With
End Sub
Sub ToArray(vJSON As Variant, aRows() As Variant, aHeader() As Variant)
' Input:
' vJSON - Array or Object which contains rows data
' Output:
' aData - 2d array representing JSON data
' aHeader - 1d array of property names
Dim sName As Variant
Set oHeader = CreateObject("Scripting.Dictionary")
Select Case VarType(vJSON)
Case vbObject
If vJSON.Count > 0 Then
ReDim aData(0 To vJSON.Count - 1, 0 To 0)
oHeader("#") = 0
i = 0
For Each sName In vJSON
aData(i, 0) = "#" & sName
ToArrayElement vJSON(sName), ""
i = i + 1
Next
Else
ReDim aData(0 To 0, 0 To 0)
End If
Case Is >= vbArray
If UBound(vJSON) >= 0 Then
ReDim aData(0 To UBound(vJSON), 0 To 0)
For i = 0 To UBound(vJSON)
ToArrayElement vJSON(i), ""
Next
Else
ReDim aData(0 To 0, 0 To 0)
End If
Case Else
ReDim aData(0 To 0, 0 To 0)
aData(0, 0) = ToString(vJSON)
End Select
aHeader = oHeader.Keys()
Set oHeader = Nothing
aRows = aData
Erase aData
End Sub
Private Sub ToArrayElement(vElement As Variant, sFieldName As String)
Dim sName As Variant
Dim j As Long
Select Case VarType(vElement)
Case vbObject ' collection of objects
For Each sName In vElement
ToArrayElement vElement(sName), sFieldName & IIf(sFieldName = "", "", "_") & sName
Next
Case Is >= vbArray ' collection of arrays
For j = 0 To UBound(vElement)
ToArrayElement vElement(j), sFieldName & IIf(sFieldName = "", "", "_") & "#" & j
Next
Case Else
If Not oHeader.Exists(sFieldName) Then
oHeader(sFieldName) = oHeader.Count
If UBound(aData, 2) < oHeader.Count - 1 Then ReDim Preserve aData(0 To UBound(aData, 1), 0 To oHeader.Count - 1)
End If
j = oHeader(sFieldName)
aData(i, j) = ToString(vElement)
End Select
End Sub
I tried useful the original question and found little bug that I fixed
The answer to you question is to
1 loop on Doc elements until next page link is found, and you to
2 set/reset the doc var to avoid the Doc lost
Do
Do While IE.Busy: DoEvents: Loop
Do While IE.readyState <> 4: DoEvents: Loop
Set doc = IE.document
....
Loop While nextPageFound 'exit if "next page" not found
when following the next page hyper link, during loop on Doc elements.
And then I added the
3 split of Table Header and Body (if exist) for 1st row field name set
4 RESCUE of cell value from specific class by name cell against simple td.innerText
5 loop on doc for link search until NextPageFound using
e.getAttribute("title") instead id=nextPage
6 use of a SetUp sheet with parameters to customize the script with url and destination data sheet
strUrl = ThisWorkbook.Sheets("Setup").Range("b1").Value
strDestSheet = ThisWorkbook.Sheets("Setup").Range("b2").Value
And here is the VBA function revisited:
Sub ETFDatNew()
Dim IE As Object
Dim i As Long
Dim strText As String
Dim jj As Long
Dim hBody As Object
Dim hTR As Object
Dim hTD As Object
Dim tb As Object
Dim bb As Object
Dim Tr As Object
Dim Td As Object
Dim ii As Long
Dim doc As Object
Dim hTable As Object
Dim y As Long
Dim z As Long
Dim wb As Excel.Workbook
Dim ws As Excel.Worksheet
Set wb = Excel.ActiveWorkbook
Set ws = wb.ActiveSheet
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
y = 1 'Column A in Excel
z = 1 'Row 1 in Excel
'6 I used a SetUp sheet with parameters to customize the script with url and destination data sheet
strUrl = ThisWorkbook.Sheets("Setup").Range("b1").Value
strDestSheet = ThisWorkbook.Sheets("Setup").Range("b2").Value
Sheets(strDestSheet).Activate ' Destination sheet
Cells.Select
Selection.Clear
'IE.navigate "http://halleyweb.com/c058057/mc/mc_p_ricerca.php" ', , , , "Content-Type: application/x-www-form-urlencoded" & vbCrLf
IE.navigate strUrl
ii = 1
nextPageFound = True 'set to init scope var / settato per iniziare il loop
Do '1) loop until nextPageFound instead of '' ii <= 17 Or
'2 set/reset of doc to avoid doc lost after next page link click (see below)
Do While IE.Busy: DoEvents: Loop
Do While IE.readyState <> 4: DoEvents: Loop
Set doc = IE.document
' all tables or single one by Id
Set hTable = doc.getElementsByTagName("table") 'or by .getElementById("table-albo-pretorio")
For Each tb In hTable
'3) splitting header vs body for 1st row field name search
'tHeader
If z = 1 Then
Set hBody = tb.getElementsByTagName("thead")
For Each bb In hBody
Set hTR = bb.getElementsByTagName("tr")
For Each Tr In hTR
Set hTD = Tr.getElementsByTagName("th") 'header th
y = 1 ' Resets back to column A
For Each Th In hTD
ws.Cells(z, y).Value = Th.innerText
y = y + 1
Next Th
DoEvents
z = z + 1
Next Tr
Exit For
Next bb
End If
'tBody
Set hBody = tb.getElementsByTagName("tbody")
For Each bb In hBody
Set hTR = bb.getElementsByTagName("tr")
For Each Tr In hTR
Set hTD = Tr.getElementsByTagName("td")
y = 1 ' Resets back to column A
For Each Td In hTD
'4) RESCUE cell value from td.innerText against specific class name cell
'ws.Cells(z, y).Value = Td.innerText
If CBool(Td.getElementsByClassName("tablesaw-cell-content").Length) Then 'there is at least 1
'use the first
ws.Cells(z, y).Value = Td.getElementsByClassName("tablesaw-cell-content")(0).innerText
End If
y = y + 1 'colonna successiva /next col
Next Td
DoEvents
z = z + 1 'riga successiva /next row
Next Tr
Exit For
Next bb
Exit For
Next tb
'5 looping on doc for link search until NextPageFound using e.getAttribute("title") instead id=nextPage
With doc 'ricerca dei link
Set elems = .getElementsByTagName("a")
nextPageFound = False ' si predispone per concludere nel caso non sia presente una pagina successiva
For Each e In elems
If (e.getAttribute("title") = "Pagina successiva") Then ' alla ricerca di link con title="Pagina successiva" / instead of id=nextPage
e.Click
nextPageFound = True ' trovata pagina successiva /found next page
Exit For
End If
Next e
End With
ii = ii + 1
Application.Wait (Now + TimeValue("00:00:01"))
Loop While nextPageFound ' conclude nel caso non sia stato trovato il link Pagina successiva / exit if not found
IE.Quit
Set IE = Nothing
Application.StatusBar = ""
MsgBox "Estrazione completata" ' completed
End Sub