I have been parsing data from JSON to Excel and the code is working fine but it takes much time to write data which is more than 1 minute.
Every Column has 5K rows of data. I have searched to find better way of parsing data into excel with less time but no success.
I do hope there will be an way of achieving this. Any help will be much appreciated
Sub parsejson()
Dim t As Single
t = Timer
Dim objRequest As Object
Dim strUrl As String
Dim blnAsync As Boolean
Dim strResponse As String
Dim idno, r As Long
Dim ws, ws2 As Worksheet
Dim JSON As Object
Dim lrow As Long
Application.Calculation = xlCalculationManual
Application.ScreenUpdating = False
Application.DisplayStatusBar = False
Set ws = Sheet1
Set ws2 = Sheet2
Set objRequest = CreateObject("MSXML2.XMLHTTP")
strUrl = ""
blnAsync = True
With objRequest
.Open "GET", strUrl, blnAsync
.setRequestHeader "Content-Type", "application/json"
.send
While objRequest.readyState <> 4
DoEvents
Wend
strResponse = .ResponseText
End With
Dim resultDict As Object
Set resultDict = ParseJson("{""result"":" & strResponse & "}")
Dim i As Long
Dim resultNum As Long
resultNum = resultDict("result").Count
r = 2
For i = 1 To resultNum
ws.Cells(r, "B").Value = resultDict("result")(i)("productName")
ws.Cells(r, "C").Value = resultDict("result")(i)("upc")
ws.Cells(r, "D").Value = resultDict("result")(i)("asin")
ws.Cells(r, "E").Value = resultDict("result")(i)("epid")
ws.Cells(r, "G").Value = resultDict("result")(i)("platform")
ws.Cells(r, "I").Value = resultDict("result")(i)("uniqueID")
ws.Cells(r, "L").Value = resultDict("result")(i)("productShortName")
ws.Cells(r, "M").Value = resultDict("result")(i)("coverPicture")
ws.Cells(r, "N").Value = resultDict("result")(i)("realeaseYear")
ws.Cells(r, "Q").Value = resultDict("result")(i)("verified")
ws.Cells(r, "S").Value = resultDict("result")(i)("category")
ws2.Cells(r, "E").Value = resultDict("result")(i)("brand")
ws2.Cells(r, "F").Value = resultDict("result")(i)("compatibleProduct")
ws2.Cells(r, "G").Value = resultDict("result")(i)("type")
ws2.Cells(r, "H").Value = resultDict("result")(i)("connectivity")
ws2.Cells(r, "I").Value = resultDict("result")(i)("compatibleModel")
ws2.Cells(r, "J").Value = resultDict("result")(i)("color")
ws2.Cells(r, "K").Value = resultDict("result")(i)("material")
ws2.Cells(r, "L").Value = resultDict("result")(i)("cableLength")
ws2.Cells(r, "M").Value = resultDict("result")(i)("mpn")
ws2.Cells(r, "O").Value = resultDict("result")(i)("features")
ws2.Cells(r, "Q").Value = resultDict("result")(i)("wirelessRange")
ws2.Cells(r, "T").Value = resultDict("result")(i)("bundleDescription")
r = r + 1
Next i
Application.Calculation = xlCalculationAutomatic
Application.ScreenUpdating = True
Application.DisplayStatusBar = True
MsgBox "RunTime : " & Format((Timer - t) / 86400, "hh:mm:ss")
End Sub
As already discussed, your code is not slow because of parsing the JSON, but because you write every value cell by cell. The interface between VBA and Excel is slow compared to things done in memory, so the way to go is to write the data into a 2-dimensional array that can be written all at once into Excel.
As the destination in Excel is not a single Range, I suggest to have a small routine that collects and writes data for one column. Easy to understand and easy to adapt if columns or field names changes.
Sub writeColumn(destRange As Range, resultDict As Object, colName As String)
Dim resultNum As Long, i As Long
resultNum = resultDict("result").Count
' Build a 2-dimesional array. 2nd index is always 1 as we write only one column.
ReDim columnData(1 To resultNum, 1 To 1) As Variant
For i = 1 To resultNum
columnData(i, 1) = resultDict("result")(i)(colName)
Next
' Write the data into the column
destRange.Cells(1, 1).Resize(resultNum, 1) = columnData
End Sub
For every field/column, you need a call in your main routine (but without any loop)
Call writeColumn(ws.Cells(r, "B"), resultDict, "productName")
(...)
Call writeColumn(ws2.Cells(r, "E"), resultDict, "brand")
(...)
Writing/Reading value to/from cell is a very slow operation, even more so when you are doing that so many times in a row therefore populating your data in an array and write into the cells in blocks is the best way.
Since your requirement involves multiple continuous range, you will have to write into the sheet multiple times.
Replace your entire For loop with the below code, not the prettiest but should work:
Dim dataArr() As Variant
ReDim dataArr(1 To resultNum, 1 To 4) As Variant
For i = 1 To resultNum
dataArr(i, 1) = resultDict("result")(i)("productName")
dataArr(i, 2) = resultDict("result")(i)("upc")
dataArr(i, 3) = resultDict("result")(i)("asin")
dataArr(i, 4) = resultDict("result")(i)("epid")
Next i
ws.Range(ws.Cells(2, "B"), ws.Cells(1 + resultNum, "E")).Value = dataArr
ReDim dataArr(1 To resultNum, 1 To 1) As Variant
For i = 1 To resultNum
dataArr(i, 1) = resultDict("result")(i)("platform")
Next i
ws.Range(ws.Cells(2, "G"), ws.Cells(1 + resultNum, "G")).Value = dataArr
ReDim dataArr(1 To resultNum, 1 To 1) As Variant
For i = 1 To resultNum
dataArr(i, 1) = resultDict("result")(i)("uniqueID")
Next i
ws.Range(ws.Cells(2, "I"), ws.Cells(1 + resultNum, "I")).Value = dataArr
ReDim dataArr(1 To resultNum, 1 To 3) As Variant
For i = 1 To resultNum
dataArr(i, 1) = resultDict("result")(i)("productShortName")
dataArr(i, 2) = resultDict("result")(i)("coverPicture")
dataArr(i, 3) = resultDict("result")(i)("realeaseYear")
Next i
ws.Range(ws.Cells(2, "L"), ws.Cells(1 + resultNum, "N")).Value = dataArr
ReDim dataArr(1 To resultNum, 1 To 1) As Variant
For i = 1 To resultNum
dataArr(i, 1) = resultDict("result")(i)("verified")
Next i
ws.Range(ws.Cells(2, "Q"), ws.Cells(1 + resultNum, "Q")).Value = dataArr
ReDim dataArr(1 To resultNum, 1 To 1) As Variant
For i = 1 To resultNum
dataArr(i, 1) = resultDict("result")(i)("category")
Next i
ws.Range(ws.Cells(2, "S"), ws.Cells(1 + resultNum, "S")).Value = dataArr
ReDim dataArr(1 To resultNum, 1 To 9) As Variant
For i = 1 To resultNum
dataArr(i, 1) = resultDict("result")(i)("brand")
dataArr(i, 2) = resultDict("result")(i)("compatibleProduct")
dataArr(i, 3) = resultDict("result")(i)("type")
dataArr(i, 4) = resultDict("result")(i)("connectivity")
dataArr(i, 5) = resultDict("result")(i)("compatibleModel")
dataArr(i, 6) = resultDict("result")(i)("color")
dataArr(i, 7) = resultDict("result")(i)("material")
dataArr(i, 8) = resultDict("result")(i)("cableLength")
dataArr(i, 9) = resultDict("result")(i)("mpn")
Next i
ws2.Range(ws2.Cells(2, "E"), ws2.Cells(1 + resultNum, "M")).Value = dataArr
ReDim dataArr(1 To resultNum, 1 To 2) As Variant
For i = 1 To resultNum
dataArr(i, 1) = resultDict("result")(i)("features")
dataArr(i, 2) = resultDict("result")(i)("wirelessRange")
Next i
ws2.Range(ws2.Cells(2, "O"), ws2.Cells(1 + resultNum, "Q")).Value = dataArr
ReDim dataArr(1 To resultNum, 1 To 1) As Variant
For i = 1 To resultNum
dataArr(i, 1) = resultDict("result")(i)("bundleDescription")
Next i
ws2.Range(ws2.Cells(2, "T"), ws2.Cells(1 + resultNum, "T")).Value = dataArr
Related
Hello i want to ask about some problem about excel, i have some data like this:
and i have import that JSON data to excel with modules from https://github.com/TheEricBurnett/Excellent-JSON
and my code form are
Private Sub ImportJSONFIle_Click()
Dim fd As Office.FileDialog
Set fd = Application.FileDialog(msoFileDialogFilePicker)
With fd
.Title = "Select a JSON File"
.AllowMultiSelect = False
If .Show() Then
Filename = .SelectedItems(1)
Dim content As String
Dim iFile As Integer: iFile = FreeFile
Open Filename For Input As #iFile
content = Input(LOF(iFile), iFile)
' Parse JSON String
Dim dummyData As Object
Set dummyData = JsonConverter.ParseJson(content)
i = 1
For Each dummyDatas In dummyData
Cells(i, 1) = dummyDatas("nama")
Cells(i, 2) = dummyDatas("email")
i = i + 1
Next
Close #iFile
End If
End With End Sub
finally the result is:
Here i want to ask how to make the data written horizontally not vertically? Here the result what i want :
Since you could potentially deal with alot of entries from the JSON, it is recommended to populate the values in an array first then write into your worksheet.
Replace this:
For Each dummyDatas In dummyData
Cells(i, 1) = dummyDatas("nama")
Cells(i, 2) = dummyDatas("email")
i = i + 1
Next
To this:
Dim outputArr() As Variant
ReDim outputArr(1 To 1, 1 To dummyData.Count * 2) As Variant
For Each dummyDatas In dummyData
outputArr(1, i) = dummyDatas("nama")
i = i + 1
outputArr(1, i) = dummyDatas("email")
i = i + 1
Next
Cells(1, 1).Resize(, UBound(outputArr, 2)).Value = outputArr
EDIT - To insert result after the last column
Dim outputArr() As Variant
ReDim outputArr(1 To 1, 1 To dummyData.Count * 2) As Variant
For Each dummyDatas In dummyData
outputArr(1, i) = dummyDatas("nama")
i = i + 1
outputArr(1, i) = dummyDatas("email")
i = i + 1
Next
Dim lastCol As Long
lastCol = Cells(1, Columns.Count).End(xlToLeft).Column
Cells(1, lastCol + 1).Resize(, UBound(outputArr, 2)).Value = outputArr
You may try to replace :
Cells(i, 1) = dummyDatas("nama")
Cells(i, 2) = dummyDatas("email")
with
Cells(1,i) = dummyDatas("nama")
i=i+1
Cells(1,i) = dummyDatas("email")
Not tested but this should work. Replace this:
Cells(i, 1) = dummyDatas("nama")
Cells(i, 2) = dummyDatas("email")
i = i + 1
With:
Cells(1, i) = dummyDatas("nama")
Cells(1, i+1) = dummyDatas("email")
i=i+2
I got this code from a competent user, not sure if he wants to be named. The code searches the HTML content for innerText of certain tags and transfers them to an Excel table, well sorted under the headers, structured as pivot.
Public Sub GetDataFromURL()
Const URL = "URL"
Dim html As MSHTML.HTMLDocument, xhr As Object
Set html = New MSHTML.HTMLDocument
Set xhr = CreateObject("MSXML2.ServerXMLHTTP.6.0")
With xhr
.Open "POST", URL, False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send "...parameters..."
html.body.innerHTML = .responseText
End With
Dim table As MSHTML.HTMLTable, r As Long, c As Long, headers(), row As MSHTML.HTMLTableRow
Dim results() As Variant, html2 As MSHTML.HTMLDocument
headers = Array("HDR01", "HDR02", "HDR03", "HDR04")
ReDim results(1 To 100, 1 To UBound(headers) + 1)
Set table = html.querySelector("table")
Set html2 = New MSHTML.HTMLDocument
Dim lastRow As Boolean
For Each row In table.Rows
lastRow = False
Dim header As String
html2.body.innerHTML = row.innerHTML
header = Trim$(row.Children(0).innerText)
If header = "HDR01" Then
r = r + 1
Dim dict As Scripting.Dictionary: Set dict = GetBlankDictionary(headers)
On Error Resume Next
dict("HDR02") = Replace$(html2.querySelector("a").href, "about:", "https://URL")
On Error GoTo 0
End If
If dict.Exists(header) Then dict(header) = Trim$(row.Children(1).innerText)
If (header = vbNullString And html2.querySelectorAll("a").Length > 0) Then
dict("HDR03") = Replace$(html2.querySelector("a").href, "about:blank", "URL")
lastRow = True
ElseIf header = "HDR04" Then
If row.NextSibling.NodeType = 1 Then lastRow = True
End If
If lastRow Then
populateArrayFromDict dict, results, r
End If
Next
results = Application.Transpose(results)
ReDim Preserve results(1 To UBound(headers) + 1, 1 To r)
results = Application.Transpose(results)
Dim re As Object
Set re = CreateObject("VBScript.RegExp")
With re
.Global = False
.MultiLine = False
.IgnoreCase = True
.Pattern = "\s([0-9.]+)\sm²"
End With
Dim ie As SHDocVw.InternetExplorer
Set ie = New SHDocVw.InternetExplorer
With ie
.Visible = True
For r = LBound(results, 1) To UBound(results, 1)
If results(r, 7) <> vbNullString Then
.Navigate2 results(r, 7), headers:="Referer: " & URL
While .Busy Or .readyState <> READYSTATE_COMPLETE: DoEvents: Wend
'On Error Resume Next
results(r, 8) = re.Execute(.document.querySelector("#anz").innerHTML)(0).Submatches(0)
'On Error GoTo 0
End If
Next
.Quit
End With
With ActiveSheet
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
It works perfectly in Excel, but I need it for an Access-table. My Aceess-table named tblTab01 contains all the fields that are present in the code in the headers = array("..."), and I have disabled the following lines in the code:
results = Application.Transpose(results)
and
ActiveSheet.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
Instead, I added the following lines:
Dim db As DAO.Database
Dim strInsert
Set db = CurrentDb
strInsert = "INSERT INTO tblTab01 VALUES (results);"
db.Execute strInsert
But I only get all possible errors!
How would the code need to be modified for use with the Access table? THX
This produces same output as the Excel code. I attempted a solution that eliminated looping array but this version is actually faster.
Had to use Excel WorksheetFunction to make the Transpose method work. Make sure Excel library is selected in References.
results = Excel.WorksheetFunction.Transpose(results)
ReDim Preserve results(1 To UBound(headers) + 1, 1 To r)
results = Excel.WorksheetFunction.Transpose(results)
Uncomment the On Error lines:
On Error Resume Next
results(r, 8) = re.Execute(.document.querySelector("#anz").innerHTML)(0).Submatches(0)
On Error GoTo 0
Then instead of the With ActiveSheet block, loop through array.
Dim db As DAO.Database
Dim rs As DAO.Recordset
CurrentDb.Execute "DELETE * FROM tblNetzPortDwnLd"
Set db = CurrentDb
Set rs = db.OpenRecordset("tblNetzPortDwnLd", dbOpenDynaset)
For r = LBound(results, 1) To UBound(results, 1)
With rs
.AddNew
.Fields("zpID") = r
.Fields("zpAktenzeichen") = results(r, 1)
.Fields("zpAmtsgericht") = results(r, 2)
.Fields("zpObjekt") = results(r, 3)
.Fields("zpVerkehrswert") = results(r, 4)
.Fields("zpTermin") = results(r, 5)
.Fields("zpPdfLink") = results(r, 6)
.Fields("zpAdditLink") = results(r, 7)
.Fields("zpm2") = results(r, 8)
.Update
End With
Next
All fields in table are text type, per our chat discussion.
So I've been able to write a script that is able to pull stock fundamental data from Excel. Yay. I know the update has stumped many. Here is my current code. I am looking to interact with the webpage so that I can pull balance sheet and cashflow data as well. Also, I want to look at the data from a quarterly perspective and not annual. For ease of reference: https://finance.yahoo.com/quote/AAPL/financials?p=AAPL
This will require me to press a button on the screen; however, I am not sure how to go about doing this.
Sub importData()
For Each Chart In ActiveWorkbook.Charts
Chart.Delete
Next
Dim dashboardSheet As Worksheet
Dim dataSheet As Worksheet
Dim market As String
Dim startDate As String
Dim endDate As String
Dim frequencyCode As String ' Time Period
Dim dataURL As String 'URL for Historical Data
Dim dataURL2 As String 'URL for Balance Sheet
Dim i As Long ' Counter for Existing Connections
Application.ScreenUpdating = False
Set dashboardSheet = cnDash
Set dataSheet = cnData
market = dashboardSheet.Range("C2").Value
startDate = dashboardSheet.Range("A3").Value
endDate = dashboardSheet.Range("A4").Value
frequencyCode = dashboardSheet.Range("C6").Value
dataURL = "https://query1.finance.yahoo.com/v7/finance/download/" + market + "?period1=" + startDate + "&period2=" + endDate + "&interval=" + frequencyCode + "&events=history"
' Clear the existing connections
For i = ActiveWorkbook.Connections.Count To 1 Step -1
ActiveWorkbook.Connections.Item(i).Delete
Next
' Clear the Data
dataSheet.Cells.Delete
If dashboardSheet.ChartObjects.Count > 0 Then ' Delete sheet if it exists
dashboardSheet.ChartObjects.Delete
dashboardSheet.Paste
Else
dashboardSheet.Paste
End If
' Pull data from Yahoo for Historical Graph '
dataSheet.Activate
With ActiveSheet.QueryTables.Add(Connection:="TEXT;" & dataURL, _
Destination:=dataSheet.Range("A1"))
.Name = "import"
.FieldNames = True ' field names in source data appear as column headers
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.TextFilePromptOnRefresh = False
.TextFilePlatform = 1252
.TextFileStartRow = 1 ' Start data from row 2 to exclude headings
.TextFileParseType = xlDelimited
.TextFileTextQualifier = xlTextQualifierDoubleQuote
.TextFileConsecutiveDelimiter = False
.TextFileTabDelimiter = True
.TextFileSemicolonDelimiter = False
.TextFileCommaDelimiter = True
.TextFileSpaceDelimiter = False
.TextFileColumnDataTypes = Array(1, 1, 1, 1, 1, 1, 1)
.TextFileTrailingMinusNumbers = True
.Refresh BackgroundQuery:=False
End With
'Pull data from Yahoo for Fundamentals '
Dim http As Object, s As String
Set http = CreateObject("MSXML2.XMLHTTP")
With http
.Open "GET", "https://finance.yahoo.com/quote/" + market + "/financials?p=", False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send
s = .responseText
End With
Dim html As MSHTML.HTMLDocument, html2 As MSHTML.HTMLDocument, re As Object, matches As Object
Set html = New MSHTML.HTMLDocument: Set html2 = New MSHTML.HTMLDocument
Set re = CreateObject("VBScript.RegExp")
Set element = html.getElementsByClassName("Fz(s) Fw(500) D(ib) H(18px) C($primaryColor):h C($linkColor)")
element.Click
html.body.innerHTML = s
Dim headers(), rows As Object
headers = Array("Breakdown", "TTM")
Set rows = html.querySelectorAll(".fi-row")
With re
.Global = True
.MultiLine = True
.Pattern = "\d{1,2}/\d{1,2}/\d{4}"
Set matches = .Execute(s)
End With
Dim results(), match As Object, r As Long, c As Long, startHeaderCount As Long
startHeaderCount = UBound(headers)
ReDim Preserve headers(0 To matches.Count + startHeaderCount)
c = 1
For Each match In matches
headers(startHeaderCount + c) = match
c = c + 1
Next
Dim row As Object
ReDim results(1 To rows.Length, 1 To UBound(headers) + 1)
For r = 0 To rows.Length - 1
html2.body.innerHTML = rows.Item(r).outerHTML
Set row = html2.querySelectorAll("[title],[data-test=fin-col]")
For c = 0 To row.Length - 1
results(r + 1, c + 1) = row.Item(c).innerText
Next c
Next
Dim ws As Worksheet
Set ws = ThisWorkbook.Worksheets("data")
With ws
.Cells(1, 10).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 10).Resize(UBound(results, 1), UBound(results, 2)) = results
Columns("K:K").EntireColumn.AutoFit
Columns("L:L").EntireColumn.AutoFit
Columns("M:M").EntireColumn.AutoFit
Columns("N:N").EntireColumn.AutoFit
Columns("O:O").EntireColumn.AutoFit
Columns("J:J").EntireColumn.AutoFit
End With
Application.ScreenUpdating = True
Call HistoricalGraph(dataSheet, dashboardSheet, market)
End Sub
Here is my answer to scrape the exact page in R: Giving consent to cookies using rvest
To scrape this page, you need to run JS code. To do that in VBA, I think this is a good reference: https://ramblings.mcpher.com/get-started-snippets/step-by-step-guides/how-to-add-flightpaths-on-a-map/how-to-use-javascript-from-vba/
I have looked at the solution provided in this link Extract Table from Webpage in Excel using VBA and it was very helpful. But I need to extract the values in the div classes (cscore_score) and not a table Please refer to image below
The URL is: https://www.espncricinfo.com/scores
The div class is: cscore_score
The scores to extract is in nested divs. The sample data for each nested div I want to extract is like Country and Score i.e INDIA and in the next column "416..." into the Excel sheet.
Here's a screenshot of the table structure:
Public Sub GetInfo()
Const URL As String = "https://www.espncricinfo.com/scores"
Dim html As HTMLDocument, hTable As HTMLTable, ws As Worksheet, headers()
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.send
html.body.innerHTML = .responseText
End With
Set hDiv = html.querySelector("div.cscore")
Dim ul As Object, div As Object, r As Long, c As Long
r = 1
With ws
For Each div In hDiv.getElementsByClassName("cscore_link")
r = r + 1: c = 1
If r > 3 Then
For Each ul In div.getElementsByClassName("cscore_score")
.Cells(r - 2, c) = IIf(c = 2, "'" & div.innerText, div.innerText)
c = c + 1
Next
End If
Next
End With
End Sub
I would be grateful to receive any help to extract those scores from each div into the sheet.
You could use faster css selectors (using only class is faster than tag/type) which if used as shown below will allow you to also reduce your code complexity and improve performance by having only a single loop. Results can then be stored in an array and written out in one go - again another efficiency gain.
Note I am ensuring scores remain correctly formatted on output by concatenating "'" in front.
If you want scores for same match on same row:
Option Explicit
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object, results(), i As Long, r As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
ReDim results(1 To countries.Length / 2, 1 To 4)
For i = 0 To countries.Length - 1 Step 2
results(r, 1) = countries.item(i).innerText: results(r, 2) = "'" & scores.item(i).innerText
results(r, 3) = countries.item(i + 1).innerText: results(r, 4) = "'" & scores.item(i + 1).innerText
r = r + 1
Next
ws.Cells(1, 1).Resize(1, 4) = Array("Home", "Score", "Away", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Scores on different rows for every team:
Option Explicit
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object, results(), i As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
ReDim results(1 To countries.Length, 1 To 2)
For i = 0 To countries.Length - 1
results(i + 1, 1) = countries.item(i).innerText: results(i + 1, 2) = "'" & scores.item(i).innerText
Next
ws.Cells(1, 1) = "Country": ws.Cells(1, 2) = "Score"
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Additional column:
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object
Dim descs As Object, results(), i As Long, r As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
Set descs = html.querySelectorAll(".cscore--watchNotes .cscore_info-overview")
ReDim results(1 To countries.Length / 2, 1 To 5)
For i = 0 To countries.Length - 1 Step 2
results(r, 1) = descs.Item(i / 2).innerText
results(r, 2) = countries.Item(i).innerText: results(r, 3) = "'" & scores.Item(i).innerText
results(r, 4) = countries.Item(i + 1).innerText: results(r, 5) = "'" & scores.Item(i + 1).innerText
r = r + 1
Next
ws.Cells(1, 1).Resize(1, 5) = Array("Desc", "Home", "Score", "Away", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Your request seems to be just fine. Parsing the HTML is where your problem is.
You could do something like the following (you can ignore the request part):
Option Explicit
Sub espn()
Dim req As New WinHttpRequest
Dim HTMLDocument As New HTMLDocument
Dim listElement As HTMLUListElement
Dim listItem As HTMLLIElement
Dim sht As Worksheet
Dim i As Long
Dim j As Long
Dim url As String
url = "https://www.espncricinfo.com/scores"
With req
.Open "GET", url, False
.send
HTMLDocument.body.innerHTML = .responseText
End With
Set sht = ThisWorkbook.Worksheets("Name of your worksheet")
i = 2
For Each listElement In HTMLDocument.getElementsByClassName("cscore_competitors")
j = 1
For Each listItem In listElement.getElementsByTagName("li")
sht.Cells(i, j) = listItem.getElementsByClassName("cscore_name cscore_name--long")(0).innerText
sht.Cells(i, j + 1) = listItem.getElementsByClassName("cscore_score")(0).innerText
j = j + 2
Next listItem
i = i + 1
Next listElement
End Sub
The results would look like so:
Basically each game is represented by a ul (unnumbered list) element which consists of two li elements which contain the info about the names and the score.
This code (like other codes) written by Precious #QHarr works well. However, when importing data, I want to retrieve the data saved in the connection. The output of the code and the data I want to receive are shown in the attached image. What kind of code can I solve? (Google Translate)
Public Sub DYarislar()
Dim asays(), ws As Worksheet, asay As Long, html As HTMLDocument
Dim http As clsHTTP, url As String, headers(), numberOfRequests As Long
headers = Array("Asay", "Tarih", "Sehir", "K.Cinsi", "Gr", "Msf/Pist", "Derece", "S", "Jokey", "Kilo", "G.Ç", "Hnd", "Gny", "Taki")
Set http = New clsHTTP
Set ws = ThisWorkbook.Worksheets("X")
Set html = New HTMLDocument
asays = Application.Transpose(Sheets("Y").Range("A2:A" & Sheets("Y").Columns("A:A").Find(What:="boş").Row - 1).Value)
Const numTableRows As Long = 250
Const numTableColumns As Long = 14
Const BASE_URL As String = "https://yenibeygir.com/at/"
numberOfRequests = UBound(asays)
Dim results(), headerRow As Boolean, tRows As Object, tRow As Object, iRow As Long
Dim tCells As Object, tCell As Object, r As Long, c As Long, hTable As HTMLTable
ReDim results(1 To numTableRows * numberOfRequests, 1 To numTableColumns)
Application.ScreenUpdating = False
For asay = 1 To numberOfRequests
headerRow = True
url = BASE_URL & asays(asay)
html.body.innerHTML = http.GetString(url)
Set hTable = html.querySelector(".at_Yarislar")
Set tRows = hTable.getElementsByTagName("tr")
Const numberOfRaces As Long = 22
Dim counter As Long
counter = 1
For Each tRow In tRows
If Not headerRow Then
counter = counter + 1
If counter > numberOfRaces Then Exit For
c = 2: r = r + 1
results(r, 1) = asays(asay)
Set tCells = tRow.getElementsByTagName("td")
For Each tCell In tCells
results(r, c) = tCell.innerText
c = c + 1
Next
End If
headerRow = False
Next
Next
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
Application.ScreenUpdating = True
End Sub
You only need a few small changes. You use the same Class, clsHTTP, as before, then use the module 1 code below.
Notes:
Within each source page table row the jockey column contains an a tag link element
You can access this using:
tRow.getElementsByTagName("a")(1).href
As the link is relative you need to do a text replacement to add in the base part of the URL i.e.
Replace$(tRow.getElementsByTagName("a")(1).href, "about:", BASE_URL2)
The id is part of the href and can be extracted using Split:
Split(tRow.getElementsByTagName("a")(1).href, "/")(2)
To allow for these additional elements in the results you need to increase the number of output column count:
Const numTableColumns As Long = 16
And adapt your table rows loop to populate the additional columns:
results(r, 2) = Split(tRow.getElementsByTagName("a")(1).href, "/")(2)
results(r, 3) = Replace$(tRow.getElementsByTagName("a")(1).href, "about:", BASE_URL2)
Also, adjust in the loop to ensure other columns are populated from the 4th onwards (as 2 extra columns):
c = 4
Finally, adjust your headers to include the 2 new columns:
headers = Array("Asay", "JokeyId", "JokeyLink", "Tarih", "Sehir", "K.Cinsi", "Gr", "Msf/Pist", "Derece", "S", "Jokey", "Kilo", "G.Ç", "Hnd", "Gny", "Taki")
VBA:
Module 1:
Option Explicit
Public Sub DYarislar()
Dim asays(), ws As Worksheet, asay As Long, html As HTMLDocument
Dim http As clsHTTP, url As String, headers(), numberOfRequests As Long
headers = Array("Asay", "JokeyId", "JokeyLink", "Tarih", "Sehir", "K.Cinsi", "Gr", "Msf/Pist", "Derece", "S", "Jokey", "Kilo", "G.Ç", "Hnd", "Gny", "Taki")
Set http = New clsHTTP
Set ws = ThisWorkbook.Worksheets("X")
Set html = New HTMLDocument
asays = Application.Transpose(Sheets("Y").Range("A2:A" & Sheets("Y").Columns("A:A").Find(What:="boş").Row - 1).Value)
Const numTableRows As Long = 250
Const numTableColumns As Long = 16
Const BASE_URL As String = "https://yenibeygir.com/at/"
Const BASE_URL2 As String = "https://yenibeygir.com"
numberOfRequests = UBound(asays)
Dim results(), headerRow As Boolean, tRows As Object, tRow As Object, iRow As Long
Dim tCells As Object, tCell As Object, r As Long, c As Long, hTable As HTMLTable
ReDim results(1 To numTableRows * numberOfRequests, 1 To numTableColumns)
Application.ScreenUpdating = False
For asay = 1 To numberOfRequests
headerRow = True
url = BASE_URL & asays(asay)
html.body.innerHTML = http.GetString(url)
Set hTable = html.querySelector(".at_Yarislar")
Set tRows = hTable.getElementsByTagName("tr")
For Each tRow In tRows
If Not headerRow Then
c = 4: r = r + 1
results(r, 1) = asays(asay)
On Error Resume Next
results(r, 2) = Split(tRow.getElementsByTagName("a")(1).href, "/")(2)
results(r, 3) = Replace$(tRow.getElementsByTagName("a")(1).href, "about:", BASE_URL2)
On Error GoTo 0
Set tCells = tRow.getElementsByTagName("td")
For Each tCell In tCells
results(r, c) = tCell.innerText
c = c + 1
Next
End If
headerRow = False
Next
Next
With ws
.Cells(1, 3).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 3).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
Application.ScreenUpdating = True
End Sub
Sample results: