Related
I'd like to know more about how array function is applied when scrape data from website. I'm currently using this vba to copy data from website. The code can scrape the data I want, however when it comes to copy data to the destination worksheet it copies all data to A1 cell. Since this vba was developed for my previous project and works fine I'm not sure which part went wrong.
Sub CopyFromHKAB()
Dim ie As Object, btnmore As Object, tbl As Object
Dim rr As Object, cc As Object, r As Integer, c As Integer, i As Integer, j As Integer
ThisWorkbook.Sheets("data").UsedRange.Clear
Set ie = CreateObject("internetexplorer.application")
With ie
.Visible = True
.navigate "https://www.hkab.org.hk/DisplayMemberAction.do?sectionid=4&subsectionid=0"
Do
DoEvents
Loop While .readyState <> 4 Or .Busy
Set tbl = .document.getElementsByClassName("etxtmed")(2)
End With
'get data from table
r = tbl.Rows.Length - 1
c = tbl.Rows(0).Cells.Length - 1
ReDim arr(0 To r, 0 To c)
Set rr = tbl.Rows
For i = 0 To r
Set cc = rr(i).Cells
For j = 0 To c
arr(i, j) = cc(j).innertext
Next
Next
ie.Quit
Application.ScreenUpdating = False
ThisWorkbook.Sheets("Sheet1").Cells(1, 1).Resize(r + 1, c + 1) = arr
With ThisWorkbook.Sheets("data")
.UsedRange.WrapText = False
.Columns.AutoFit
End With
End Sub
You need to pick up the right table given they are nested so change the index to 3. Otherwise, you are picking up the shared parent and thus all the listings are in fact within the one child element hence your current output.
Then you need to adjust your code to skip the first row.
N.B. You don't actually need IE for this as the content you want is static. You can use XMLHTTP. And you are writing out data to a different sheet than the one you end format.
Sub CopyFromHKAB()
Dim ie As Object, btnmore As Object, tbl As Object
Dim rr As Object, cc As Object, r As Integer, c As Integer, i As Integer, j As Integer
ThisWorkbook.Sheets("data").UsedRange.Clear
Set ie = CreateObject("internetexplorer.application")
With ie
.Visible = True
.navigate "https://www.hkab.org.hk/DisplayMemberAction.do?sectionid=4&subsectionid=0"
Do
DoEvents
Loop While .readyState <> 4 Or .Busy
Set tbl = .document.getElementsByClassName("etxtmed")(3)
End With
'get data from table
r = tbl.Rows.Length - 1
c = tbl.Rows(1).Cells.Length - 1
ReDim arr(0 To r, 0 To c)
Set rr = tbl.Rows
For i = 1 To r
Set cc = rr(i).Cells
For j = 0 To c
arr(i - 1, j) = cc(j).innertext
Next
Next
ie.Quit
'Application.ScreenUpdating = False
ThisWorkbook.Sheets("Sheet1").Cells(1, 1).Resize(r + 1, c + 1) = arr
With ThisWorkbook.Worksheets("data")
.UsedRange.WrapText = False
.Columns.AutoFit
End With
End Sub
I would consider switching to XHR to avoid overhead of browser, and using querySelectorAll to allow for using a css selector list to target only the nodes of interest
Option Explicit
Public Sub GetHKABInfo()
'tools > references > Microsoft HTML Object Library
Dim html As MSHTML.HTMLDocument, xhr As Object
Set xhr = CreateObject("MSXML2.XMLHTTP")
Set html = New MSHTML.HTMLDocument
With xhr
.Open "GET", "https://www.hkab.org.hk/DisplayMemberAction.do?sectionid=4&subsectionid=0", False
.setRequestHeader "User-Agent", "Safari/537.36"
.send
html.body.innerHTML = .responseText
End With
Dim arr() As Variant, nodes As MSHTML.IHTMLDOMChildrenCollection, i As Long
Set nodes = html.querySelectorAll(".etxtmed .etxtmed td")
ReDim arr(1 To nodes.Length - 1)
For i = LBound(arr) To UBound(arr)
arr(i) = nodes.Item(i).innertext
Next
ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).Resize(UBound(arr), 1) = Application.Transpose(arr)
End Sub
So I've been able to write a script that is able to pull stock fundamental data from Excel. Yay. I know the update has stumped many. Here is my current code. I am looking to interact with the webpage so that I can pull balance sheet and cashflow data as well. Also, I want to look at the data from a quarterly perspective and not annual. For ease of reference: https://finance.yahoo.com/quote/AAPL/financials?p=AAPL
This will require me to press a button on the screen; however, I am not sure how to go about doing this.
Sub importData()
For Each Chart In ActiveWorkbook.Charts
Chart.Delete
Next
Dim dashboardSheet As Worksheet
Dim dataSheet As Worksheet
Dim market As String
Dim startDate As String
Dim endDate As String
Dim frequencyCode As String ' Time Period
Dim dataURL As String 'URL for Historical Data
Dim dataURL2 As String 'URL for Balance Sheet
Dim i As Long ' Counter for Existing Connections
Application.ScreenUpdating = False
Set dashboardSheet = cnDash
Set dataSheet = cnData
market = dashboardSheet.Range("C2").Value
startDate = dashboardSheet.Range("A3").Value
endDate = dashboardSheet.Range("A4").Value
frequencyCode = dashboardSheet.Range("C6").Value
dataURL = "https://query1.finance.yahoo.com/v7/finance/download/" + market + "?period1=" + startDate + "&period2=" + endDate + "&interval=" + frequencyCode + "&events=history"
' Clear the existing connections
For i = ActiveWorkbook.Connections.Count To 1 Step -1
ActiveWorkbook.Connections.Item(i).Delete
Next
' Clear the Data
dataSheet.Cells.Delete
If dashboardSheet.ChartObjects.Count > 0 Then ' Delete sheet if it exists
dashboardSheet.ChartObjects.Delete
dashboardSheet.Paste
Else
dashboardSheet.Paste
End If
' Pull data from Yahoo for Historical Graph '
dataSheet.Activate
With ActiveSheet.QueryTables.Add(Connection:="TEXT;" & dataURL, _
Destination:=dataSheet.Range("A1"))
.Name = "import"
.FieldNames = True ' field names in source data appear as column headers
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.TextFilePromptOnRefresh = False
.TextFilePlatform = 1252
.TextFileStartRow = 1 ' Start data from row 2 to exclude headings
.TextFileParseType = xlDelimited
.TextFileTextQualifier = xlTextQualifierDoubleQuote
.TextFileConsecutiveDelimiter = False
.TextFileTabDelimiter = True
.TextFileSemicolonDelimiter = False
.TextFileCommaDelimiter = True
.TextFileSpaceDelimiter = False
.TextFileColumnDataTypes = Array(1, 1, 1, 1, 1, 1, 1)
.TextFileTrailingMinusNumbers = True
.Refresh BackgroundQuery:=False
End With
'Pull data from Yahoo for Fundamentals '
Dim http As Object, s As String
Set http = CreateObject("MSXML2.XMLHTTP")
With http
.Open "GET", "https://finance.yahoo.com/quote/" + market + "/financials?p=", False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send
s = .responseText
End With
Dim html As MSHTML.HTMLDocument, html2 As MSHTML.HTMLDocument, re As Object, matches As Object
Set html = New MSHTML.HTMLDocument: Set html2 = New MSHTML.HTMLDocument
Set re = CreateObject("VBScript.RegExp")
Set element = html.getElementsByClassName("Fz(s) Fw(500) D(ib) H(18px) C($primaryColor):h C($linkColor)")
element.Click
html.body.innerHTML = s
Dim headers(), rows As Object
headers = Array("Breakdown", "TTM")
Set rows = html.querySelectorAll(".fi-row")
With re
.Global = True
.MultiLine = True
.Pattern = "\d{1,2}/\d{1,2}/\d{4}"
Set matches = .Execute(s)
End With
Dim results(), match As Object, r As Long, c As Long, startHeaderCount As Long
startHeaderCount = UBound(headers)
ReDim Preserve headers(0 To matches.Count + startHeaderCount)
c = 1
For Each match In matches
headers(startHeaderCount + c) = match
c = c + 1
Next
Dim row As Object
ReDim results(1 To rows.Length, 1 To UBound(headers) + 1)
For r = 0 To rows.Length - 1
html2.body.innerHTML = rows.Item(r).outerHTML
Set row = html2.querySelectorAll("[title],[data-test=fin-col]")
For c = 0 To row.Length - 1
results(r + 1, c + 1) = row.Item(c).innerText
Next c
Next
Dim ws As Worksheet
Set ws = ThisWorkbook.Worksheets("data")
With ws
.Cells(1, 10).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 10).Resize(UBound(results, 1), UBound(results, 2)) = results
Columns("K:K").EntireColumn.AutoFit
Columns("L:L").EntireColumn.AutoFit
Columns("M:M").EntireColumn.AutoFit
Columns("N:N").EntireColumn.AutoFit
Columns("O:O").EntireColumn.AutoFit
Columns("J:J").EntireColumn.AutoFit
End With
Application.ScreenUpdating = True
Call HistoricalGraph(dataSheet, dashboardSheet, market)
End Sub
Here is my answer to scrape the exact page in R: Giving consent to cookies using rvest
To scrape this page, you need to run JS code. To do that in VBA, I think this is a good reference: https://ramblings.mcpher.com/get-started-snippets/step-by-step-guides/how-to-add-flightpaths-on-a-map/how-to-use-javascript-from-vba/
I have looked at the solution provided in this link Extract Table from Webpage in Excel using VBA and it was very helpful. But I need to extract the values in the div classes (cscore_score) and not a table Please refer to image below
The URL is: https://www.espncricinfo.com/scores
The div class is: cscore_score
The scores to extract is in nested divs. The sample data for each nested div I want to extract is like Country and Score i.e INDIA and in the next column "416..." into the Excel sheet.
Here's a screenshot of the table structure:
Public Sub GetInfo()
Const URL As String = "https://www.espncricinfo.com/scores"
Dim html As HTMLDocument, hTable As HTMLTable, ws As Worksheet, headers()
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.send
html.body.innerHTML = .responseText
End With
Set hDiv = html.querySelector("div.cscore")
Dim ul As Object, div As Object, r As Long, c As Long
r = 1
With ws
For Each div In hDiv.getElementsByClassName("cscore_link")
r = r + 1: c = 1
If r > 3 Then
For Each ul In div.getElementsByClassName("cscore_score")
.Cells(r - 2, c) = IIf(c = 2, "'" & div.innerText, div.innerText)
c = c + 1
Next
End If
Next
End With
End Sub
I would be grateful to receive any help to extract those scores from each div into the sheet.
You could use faster css selectors (using only class is faster than tag/type) which if used as shown below will allow you to also reduce your code complexity and improve performance by having only a single loop. Results can then be stored in an array and written out in one go - again another efficiency gain.
Note I am ensuring scores remain correctly formatted on output by concatenating "'" in front.
If you want scores for same match on same row:
Option Explicit
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object, results(), i As Long, r As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
ReDim results(1 To countries.Length / 2, 1 To 4)
For i = 0 To countries.Length - 1 Step 2
results(r, 1) = countries.item(i).innerText: results(r, 2) = "'" & scores.item(i).innerText
results(r, 3) = countries.item(i + 1).innerText: results(r, 4) = "'" & scores.item(i + 1).innerText
r = r + 1
Next
ws.Cells(1, 1).Resize(1, 4) = Array("Home", "Score", "Away", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Scores on different rows for every team:
Option Explicit
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object, results(), i As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
ReDim results(1 To countries.Length, 1 To 2)
For i = 0 To countries.Length - 1
results(i + 1, 1) = countries.item(i).innerText: results(i + 1, 2) = "'" & scores.item(i).innerText
Next
ws.Cells(1, 1) = "Country": ws.Cells(1, 2) = "Score"
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Additional column:
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object
Dim descs As Object, results(), i As Long, r As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
Set descs = html.querySelectorAll(".cscore--watchNotes .cscore_info-overview")
ReDim results(1 To countries.Length / 2, 1 To 5)
For i = 0 To countries.Length - 1 Step 2
results(r, 1) = descs.Item(i / 2).innerText
results(r, 2) = countries.Item(i).innerText: results(r, 3) = "'" & scores.Item(i).innerText
results(r, 4) = countries.Item(i + 1).innerText: results(r, 5) = "'" & scores.Item(i + 1).innerText
r = r + 1
Next
ws.Cells(1, 1).Resize(1, 5) = Array("Desc", "Home", "Score", "Away", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Your request seems to be just fine. Parsing the HTML is where your problem is.
You could do something like the following (you can ignore the request part):
Option Explicit
Sub espn()
Dim req As New WinHttpRequest
Dim HTMLDocument As New HTMLDocument
Dim listElement As HTMLUListElement
Dim listItem As HTMLLIElement
Dim sht As Worksheet
Dim i As Long
Dim j As Long
Dim url As String
url = "https://www.espncricinfo.com/scores"
With req
.Open "GET", url, False
.send
HTMLDocument.body.innerHTML = .responseText
End With
Set sht = ThisWorkbook.Worksheets("Name of your worksheet")
i = 2
For Each listElement In HTMLDocument.getElementsByClassName("cscore_competitors")
j = 1
For Each listItem In listElement.getElementsByTagName("li")
sht.Cells(i, j) = listItem.getElementsByClassName("cscore_name cscore_name--long")(0).innerText
sht.Cells(i, j + 1) = listItem.getElementsByClassName("cscore_score")(0).innerText
j = j + 2
Next listItem
i = i + 1
Next listElement
End Sub
The results would look like so:
Basically each game is represented by a ul (unnumbered list) element which consists of two li elements which contain the info about the names and the score.
Need to use VBA-JSON to pull data from different URLs where the numbers in the URL change
I am collecting data from a crypto-game that I play. I am already able to parse data using the site's API for just my "mons". I am trying to to collect the same data for ALL of the mons in the game. The API lets you pull data for 99 mons at a time (caps at 99 at a time). There are approx. 48,000 mons in existence and that number continues to go up. Each mon has an ID number (1 being the first ever caught and n+1 for each one after that).
This is the link to access the data for mons 1-99: https://www.etheremon.com/api/monster/get_data?monster_ids=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99
I need to pull data for mons 1-99, then 100-198, then 199-297 and so on all the way to 48000.
From each mon I want to collect the ID Number, "class_name", "total_level", "perfect_rate", "create_index" (which are all dicts) and most importantly I want the "total_battle_stats" (which is an array).
Here is the code I have for pulling all of those variables for just the mons in my inventory (it references a different link), but it already includes the arrangement of how I want it.
I just need those same variables but referencing a bunch of different links, not just one.
Option Explicit
Public Sub WriteOutBattleInfo()
Dim headers(), r As Long, i As Long, json As Object, key As Variant, ws As Worksheet, battleStats As Object
Set ws = ThisWorkbook.Worksheets("Sheet1")
headers = Array("Monster #", "Name", "Total Level", "Perfection", "Catch Number", "HP", "PA", "PD", "SA", "SD", "SPD")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.etheremon.com/api/user/get_my_monster?trainer_address=0x2Fef65e4D69a38bf0dd074079f367CDF176eC0De", False
.Send
Set json = JsonConverter.ParseJson(.ResponseText)("data")("monsters") 'dictionary of dictionaries
End With
r = 2
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For Each key In json.Keys
With ws
.Cells(r, 1) = key
.Cells(r, 2) = json(key)("class_name")
.Cells(r, 3) = json(key)("total_level")
.Cells(r, 4) = json(key)("perfect_rate")
.Cells(r, 5) = json(key)("create_index")
Set battleStats = json(key)("total_battle_stats")
For i = 1 To battleStats.Count
.Cells(r, i + 5) = battleStats.Item(i)
Next i
End With
r = r + 1
Next
Range(Selection, ActiveCell.SpecialCells(xlLastCell)).Select
ActiveWorkbook.Worksheets("Sheet1").Sort.SortFields.Clear
ActiveWorkbook.Worksheets("Sheet1").Sort.SortFields.Add2 key:=Range("C2:C110" _
), SortOn:=xlSortOnValues, Order:=xlDescending, DataOption:=xlSortNormal
With ActiveWorkbook.Worksheets("Sheet1").Sort
.SetRange Range("A1:K110")
.Header = xlYes
.MatchCase = False
.Orientation = xlTopToBottom
.SortMethod = xlPinYin
.Apply
End With
Selection.Columns.AutoFit
End Sub
I would like it to look exactly like this: https://imgur.com/a/xPA9T7W
But I want all of the Mons from ID 1 to 48000.
You could use a function to increment the ids to concatenate onto a base url. The site throttles/blocks if you request too quickly/possibly too many times. Check the documentation for any advice on this.
I show how you could retrieve all. I include a test case for 1 to 5 requests (uncomment to get the full number of requests. Note: I give a line, for you to tweak, which allows for adding in a delay every x requests to try and avoid throttling/blocking. It seems likely the number is quite low before this happens.
Later on, you can consider moving this into a class to hold the xmlhttp object and provide it methods such as getItems. Example here.
Option Explicit
Public Sub WriteOutBattleInfo()
Const BASE_URL As String = " https://www.etheremon.com/api/monster/get_data?monster_ids="
Const END_COUNT As Long = 48000
Const BATCH_SIZE As Long = 99
Dim numberOfRequests As Long, i As Long, j As Long, ids As String
Dim headers(), r As Long, json As Object, key As Variant, ws As Worksheet, battleStats As Object
Set ws = ThisWorkbook.Worksheets("Sheet1")
headers = Array("Monster #", "Name", "Total Level", "Perfection", "Catch Number", "HP", "PA", "PD", "SA", "SD", "SPD")
numberOfRequests = Application.WorksheetFunction.RoundDown(END_COUNT / BATCH_SIZE, 0)
ids = "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99"
Dim results()
ReDim results(1 To END_COUNT, 1 To 11)
r = 1
With CreateObject("MSXML2.XMLHTTP")
For i = 1 To 5 'numberOfRequests + 1
If i Mod 10 = 0 Then Application.Wait Now + TimeSerial(0, 0, 1)
If i > 1 Then ids = IncrementIds(ids, BATCH_SIZE, END_COUNT)
.Open "GET", BASE_URL & ids, False
.send
Set json = JsonConverter.ParseJson(.responseText)("data")
For Each key In json.keys
results(r, 1) = key
results(r, 2) = json(key)("class_name")
results(r, 3) = json(key)("total_level")
results(r, 4) = json(key)("perfect_rate")
results(r, 5) = json(key)("create_index")
Set battleStats = json(key)("total_battle_stats")
For j = 1 To battleStats.Count
results(r, j + 5) = battleStats.item(j)
Next j
r = r + 1
Next
Next
End With
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Public Function IncrementIds(ByVal ids As String, ByVal BATCH_SIZE As Long, ByVal END_COUNT) As String
Dim i As Long, arrayIds() As String
arrayIds = Split(ids, ",")
For i = LBound(arrayIds) To UBound(arrayIds)
If CLng(arrayIds(i)) + BATCH_SIZE <= END_COUNT Then
arrayIds(i) = arrayIds(i) + BATCH_SIZE
Else
ReDim Preserve arrayIds(0 To i - 1)
Exit For
End If
Next
IncrementIds = Join(arrayIds, ",")
End Function
This code (like other codes) written by Precious #QHarr works well. However, when importing data, I want to retrieve the data saved in the connection. The output of the code and the data I want to receive are shown in the attached image. What kind of code can I solve? (Google Translate)
Public Sub DYarislar()
Dim asays(), ws As Worksheet, asay As Long, html As HTMLDocument
Dim http As clsHTTP, url As String, headers(), numberOfRequests As Long
headers = Array("Asay", "Tarih", "Sehir", "K.Cinsi", "Gr", "Msf/Pist", "Derece", "S", "Jokey", "Kilo", "G.Ç", "Hnd", "Gny", "Taki")
Set http = New clsHTTP
Set ws = ThisWorkbook.Worksheets("X")
Set html = New HTMLDocument
asays = Application.Transpose(Sheets("Y").Range("A2:A" & Sheets("Y").Columns("A:A").Find(What:="boş").Row - 1).Value)
Const numTableRows As Long = 250
Const numTableColumns As Long = 14
Const BASE_URL As String = "https://yenibeygir.com/at/"
numberOfRequests = UBound(asays)
Dim results(), headerRow As Boolean, tRows As Object, tRow As Object, iRow As Long
Dim tCells As Object, tCell As Object, r As Long, c As Long, hTable As HTMLTable
ReDim results(1 To numTableRows * numberOfRequests, 1 To numTableColumns)
Application.ScreenUpdating = False
For asay = 1 To numberOfRequests
headerRow = True
url = BASE_URL & asays(asay)
html.body.innerHTML = http.GetString(url)
Set hTable = html.querySelector(".at_Yarislar")
Set tRows = hTable.getElementsByTagName("tr")
Const numberOfRaces As Long = 22
Dim counter As Long
counter = 1
For Each tRow In tRows
If Not headerRow Then
counter = counter + 1
If counter > numberOfRaces Then Exit For
c = 2: r = r + 1
results(r, 1) = asays(asay)
Set tCells = tRow.getElementsByTagName("td")
For Each tCell In tCells
results(r, c) = tCell.innerText
c = c + 1
Next
End If
headerRow = False
Next
Next
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
Application.ScreenUpdating = True
End Sub
You only need a few small changes. You use the same Class, clsHTTP, as before, then use the module 1 code below.
Notes:
Within each source page table row the jockey column contains an a tag link element
You can access this using:
tRow.getElementsByTagName("a")(1).href
As the link is relative you need to do a text replacement to add in the base part of the URL i.e.
Replace$(tRow.getElementsByTagName("a")(1).href, "about:", BASE_URL2)
The id is part of the href and can be extracted using Split:
Split(tRow.getElementsByTagName("a")(1).href, "/")(2)
To allow for these additional elements in the results you need to increase the number of output column count:
Const numTableColumns As Long = 16
And adapt your table rows loop to populate the additional columns:
results(r, 2) = Split(tRow.getElementsByTagName("a")(1).href, "/")(2)
results(r, 3) = Replace$(tRow.getElementsByTagName("a")(1).href, "about:", BASE_URL2)
Also, adjust in the loop to ensure other columns are populated from the 4th onwards (as 2 extra columns):
c = 4
Finally, adjust your headers to include the 2 new columns:
headers = Array("Asay", "JokeyId", "JokeyLink", "Tarih", "Sehir", "K.Cinsi", "Gr", "Msf/Pist", "Derece", "S", "Jokey", "Kilo", "G.Ç", "Hnd", "Gny", "Taki")
VBA:
Module 1:
Option Explicit
Public Sub DYarislar()
Dim asays(), ws As Worksheet, asay As Long, html As HTMLDocument
Dim http As clsHTTP, url As String, headers(), numberOfRequests As Long
headers = Array("Asay", "JokeyId", "JokeyLink", "Tarih", "Sehir", "K.Cinsi", "Gr", "Msf/Pist", "Derece", "S", "Jokey", "Kilo", "G.Ç", "Hnd", "Gny", "Taki")
Set http = New clsHTTP
Set ws = ThisWorkbook.Worksheets("X")
Set html = New HTMLDocument
asays = Application.Transpose(Sheets("Y").Range("A2:A" & Sheets("Y").Columns("A:A").Find(What:="boş").Row - 1).Value)
Const numTableRows As Long = 250
Const numTableColumns As Long = 16
Const BASE_URL As String = "https://yenibeygir.com/at/"
Const BASE_URL2 As String = "https://yenibeygir.com"
numberOfRequests = UBound(asays)
Dim results(), headerRow As Boolean, tRows As Object, tRow As Object, iRow As Long
Dim tCells As Object, tCell As Object, r As Long, c As Long, hTable As HTMLTable
ReDim results(1 To numTableRows * numberOfRequests, 1 To numTableColumns)
Application.ScreenUpdating = False
For asay = 1 To numberOfRequests
headerRow = True
url = BASE_URL & asays(asay)
html.body.innerHTML = http.GetString(url)
Set hTable = html.querySelector(".at_Yarislar")
Set tRows = hTable.getElementsByTagName("tr")
For Each tRow In tRows
If Not headerRow Then
c = 4: r = r + 1
results(r, 1) = asays(asay)
On Error Resume Next
results(r, 2) = Split(tRow.getElementsByTagName("a")(1).href, "/")(2)
results(r, 3) = Replace$(tRow.getElementsByTagName("a")(1).href, "about:", BASE_URL2)
On Error GoTo 0
Set tCells = tRow.getElementsByTagName("td")
For Each tCell In tCells
results(r, c) = tCell.innerText
c = c + 1
Next
End If
headerRow = False
Next
Next
With ws
.Cells(1, 3).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 3).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
Application.ScreenUpdating = True
End Sub
Sample results: