I am trying to import the bullet point from a website into an excel table (each bulletpoint filling with a li tag).
Yet I am facing an important difficulty as some page I would like to scrape have several "Part" (Part #1, Part #2, like this one https://www.thewindpower.net/windfarm_en_793_virtsu-i.php) and other haven't (like this one https://www.thewindpower.net/windfarm_en_7410_khizi.php)
I having already come up with a draft of a code that I believe could start, yet, I still have some issue and I get an error message ("Time out").
Do you have any idea how I could fix it ?
Thanks in advance for your help,
Sub Page()
GetPage ("https://www.thewindpower.net/windfarm_en_1922_a-capelada-i.php")
End Sub
Sub GetPage(URL As String)
Dim count As Integer
Dim Request As MSXML2.ServerXMLHTTP60: Set Request = New MSXML2.ServerXMLHTTP60
Dim Result As HTMLDocument: Set Result = New HTMLDocument
Request.Open "GET", URL, False
Request.send
Result.body.innerHTML = Request.responseText
Dim oRows As MSHTML.IHTMLElementCollection
Dim oRow As MSHTML.IHTMLElement
Dim oCells As MSHTML.IHTMLElementCollection
Dim oCell As MSHTML.IHTMLElement
Dim oLinks As MSHTML.IHTMLElementCollection
'Set Generalities
Set oRows = Result.getElementsByTagName("ul")(4).getElementsByTagName("li")
Dim iRow As Integer 'output li counter
Dim iColumn As Integer 'output column counter
Dim Sheet As Worksheet 'output sheet
iRow = 1
iColumn = 1
Set Sheet = ThisWorkbook.Worksheets("Sheet1")
count = Result.getElementsByTagName("h3").Length
If count > 0 Then
'# f Part on the page, 2 for the moment
Dim p As Integer
Dim o As Integer
p = count / 2
'Counter for each Part identified
For o = 1 To p
'Set Generalities data
iRow = 1
iColumn = 1
For Each oRow In oRows
Set oCells = oRow.getElementsByTagName("li")
For Each oCell In oCells
Sheet.Cells(iRow, iColumn).Value = oCell.innerText
iColumn = iColumn + 1
Next oCell
iRow = iRow + 1
Next oRow
'Set Detail data
Set oRows2 = Result.getElementsByTagName("h3")(o).getElementsByTagName("li")
For Each oRow In oRows2
Set oCells = oRow.getElementsByTagName("li")
For Each oCell In oCells
Sheet.Cells(iRow, iColumn).Value = oCell.innerText
iColumn = iColumn + 1
Next oCell
iRow = iRow + 1
iColumn = 1
Next oRow
iRow = iRow + 1
'insert a row
Range("iRow").Insert CopyOrigin:=xlFormatFromRightOrBelow
'increment Part counter
Next o
Else
'Set Generalities data
For Each oRow In oRows
Set oCells = oRow.getElementsByTagName("li")
For Each oCell In oCells
Sheet.Cells(iRow, iColumn).Value = oCell.innerText
iColumn = iColumn + 1
Next oCell
iRow = iRow + 1
Next oRow
'Set Detail data
Set oRows2 = Result.getElementsByTagName("ul")(5).getElementsByTagName("li")
For Each oRow In oRows2
Set oCells = oRow.getElementsByTagName("li")
For Each oCell In oCells
Sheet.Cells(iRow, iColumn).Value = oCell.innerText
iColumn = iColumn + 1
Next oCell
iRow = iRow + 1
iColumn = 1
Next oRow
End If
End Sub
Summary
I would gather a nodeList via css selectors to match on the relevant nodes. I would have two separate nodeLists. One for the generalities and another for the parts. I would determine the number of parts (as they repeat) and loop to those number of parts; concatenating the html for the repeated part that comes later with the former. Then put that combined html into a surrogate HTMLDocument variable and make a new nodeList of all the li elements contained. Use a helper function to return the text of the nodeList nodes in an array and then write that out to the sheet on a new combined text per row basis.
VBA:
Option Explicit
Public Sub WindInfo()
'VBE> Tools > References:
'1. Microsoft, XML v6
'2. Microsoft HTML Object Library
'3. Microsoft Scripting Runtime
Dim xhr As MSXML2.XMLHTTP60: Set xhr = New MSXML2.XMLHTTP60
Dim html As MSHTML.HTMLDocument: Set html = New MSHTML.HTMLDocument
Dim ws As Worksheet: Set ws = ThisWorkbook.Worksheets("Sheet1")
With xhr
.Open "GET", "https://www.thewindpower.net/windfarm_en_7410_khizi.php", False
.send
html.body.innerHTML = .responseText
End With
Dim generalities As Object, arrGen(), partsList As Object
Dim r As Long
Set generalities = html.querySelectorAll("#bloc_texte table ~ table li")
arrGen = GetNodesTextAsArray(generalities)
Dim parts As Object, numberOfParts As Long
Set partsList = html.querySelectorAll("h1 ~ h3, ul ~ h3")
r = 1
If partsList.Length > 0 Then
numberOfParts = html.querySelectorAll("h1 ~ h3, ul ~ h3").Length / 2
Set parts = html.querySelectorAll("h3 + ul")
Dim i As Long, liNodes As Object, arr()
Dim html2 As MSHTML.HTMLDocument: Set html2 = New MSHTML.HTMLDocument
For i = 0 To numberOfParts - 1
ws.Cells(r, 1).Resize(1, UBound(arrGen)) = arrGen
html2.body.innerHTML = parts.Item(i).outerHTML & parts.Item(i + numberOfParts).outerHTML
Set liNodes = html2.querySelectorAll("li")
arr = GetNodesTextAsArray(liNodes)
ws.Cells(r, 5).Resize(1, UBound(arr)) = arr
r = r + 1
Next
Else
Dim alternateNodeList As Object: Set alternateNodeList = html.querySelectorAll("#bloc_texte h1 + ul")
If alternateNodeList.Length >= 1 Then
arr = GetNodesTextAsArray(alternateNodeList.Item(1).getElementsByTagName("li"))
Else
arr = Array("No", "Data", vbNullString)
End If
ws.Cells(r, 1).Resize(1, UBound(arrGen)) = arrGen
ws.Cells(r, 5).Resize(1, UBound(arr)) = arr
End If
End Sub
Public Function GetNodesTextAsArray(ByVal nodeList As Object) As Variant()
Dim i As Long, results()
If nodeList.Length = 0 Then
GetNodesTextAsArray = Array("No", "Data", vbNullString)
Exit Function
End If
ReDim results(1 To nodeList.Length)
For i = 0 To nodeList.Length - 1
results(i + 1) = nodeList.Item(i).innerText
Next i
GetNodesTextAsArray = results
End Function
References:
CSS selectors
Related
I am trying to extract the Futures data from MRCI.com and restructure it into one continous table in an excel worksheet so I can manipulate from there.
How can I repeat the Futures Contract in each row to get the following table layout:
Table Structure
Here's my code so far:
Sub MRCIData()
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim FutTable As MSHTML.IHTMLElement
Dim FutRows As MSHTML.IHTMLElementCollection
Dim FutRow As MSHTML.IHTMLElement
Dim FutCells As MSHTML.IHTMLElementCollection
Dim FutCell As MSHTML.IHTMLElement
Dim FutContracts As MSHTML.IHTMLElementCollection
Dim FutContract As MSHTML.IHTMLElement
Dim FutRowText As String
Dim MrciURLHist As String
MrciURLHist = "https://www.mrci.com/ohlc/2020/200320.php"
XMLReq.Open "GET", MrciURLHist, False
XMLReq.send
If XMLReq.Status <> 200 Then
MsgBox "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
Exit Sub
End If
HTMLDoc.body.innerHTML = XMLReq.responseText
Set XMLReq = Nothing
Set FutContracts = HTMLDoc.getElementsByClassName("note1")
For Each FutContract In FutContracts
Next
Set FutTable = HTMLDoc.getElementsByClassName("strat")(0)
Set FutRows = FutTable.getElementsByTagName("tr")
For Each FutRow In FutRows
Set FutCells = FutRow.getElementsByTagName("td")
FutRowText = ""
If InStr(FutRow.innerText, "Total Volume") = 0 Then
For Each FutCell In FutCells
FutRowText = FutRowText & vbTab & FutCell.innerText
Next
End If
Debug.Print , FutRowText
Next
End Sub
The following code looks through the table line by line and determines what future to apply to the next rows until it finds a the next one, and so on. the output is not pretty so more a proof of concept. The code now outputs the table correctly.
Sub Main(ByVal Sheet As Worksheet)
Dim oRequest As New MSXML2.XMLHTTP60
Dim oDocument As New MSHTML.HTMLDocument
Dim oRows As MSHTML.IHTMLElementCollection
Dim oRow As MSHTML.IHTMLElement
Dim oCells As MSHTML.IHTMLElementCollection
Dim oCell As MSHTML.IHTMLElement
oRequest.Open "GET", "https://www.mrci.com/ohlc/2020/200320.php", False
oRequest.send
If oRequest.Status <> 200 Then
MsgBox "Error"
Exit Sub
End If
oDocument.body.innerHTML = oRequest.responseText
Set oRequest = Nothing
Dim Skip As Boolean
Dim Current As String
Dim RowIndex As Integer
Dim ColumnIndex As Integer
Set oRows = oDocument.getElementsByClassName("strat")(0).getElementsByTagName("tr")
Current = ""
Application.ScreenUpdating = False
For Each oRow In oRows
Skip = False
If oRow.getElementsByTagName("th").Length > 0 Then
Current = oRow.innerText
Skip = True
End If
If Not Current = "" And Skip = False Then
If InStr(oRow.innerText, "Total Volume") = 0 Then
Set oCells = oRow.getElementsByTagName("td")
ColumnIndex = 2
Sheet.Cells(RowIndex, 1).Value = Current
For Each oCell In oCells
Sheet.Cells(RowIndex, ColumnIndex).Value = oCell.innerText
ColumnIndex = ColumnIndex + 1
Next oCell
RowIndex = RowIndex + 1
End If
End If
Next oRow
Application.ScreenUpdating = True
End Sub
I have looked at the solution provided in this link Extract Table from Webpage in Excel using VBA and it was very helpful. But I need to extract the values in the div classes (cscore_score) and not a table Please refer to image below
The URL is: https://www.espncricinfo.com/scores
The div class is: cscore_score
The scores to extract is in nested divs. The sample data for each nested div I want to extract is like Country and Score i.e INDIA and in the next column "416..." into the Excel sheet.
Here's a screenshot of the table structure:
Public Sub GetInfo()
Const URL As String = "https://www.espncricinfo.com/scores"
Dim html As HTMLDocument, hTable As HTMLTable, ws As Worksheet, headers()
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.send
html.body.innerHTML = .responseText
End With
Set hDiv = html.querySelector("div.cscore")
Dim ul As Object, div As Object, r As Long, c As Long
r = 1
With ws
For Each div In hDiv.getElementsByClassName("cscore_link")
r = r + 1: c = 1
If r > 3 Then
For Each ul In div.getElementsByClassName("cscore_score")
.Cells(r - 2, c) = IIf(c = 2, "'" & div.innerText, div.innerText)
c = c + 1
Next
End If
Next
End With
End Sub
I would be grateful to receive any help to extract those scores from each div into the sheet.
You could use faster css selectors (using only class is faster than tag/type) which if used as shown below will allow you to also reduce your code complexity and improve performance by having only a single loop. Results can then be stored in an array and written out in one go - again another efficiency gain.
Note I am ensuring scores remain correctly formatted on output by concatenating "'" in front.
If you want scores for same match on same row:
Option Explicit
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object, results(), i As Long, r As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
ReDim results(1 To countries.Length / 2, 1 To 4)
For i = 0 To countries.Length - 1 Step 2
results(r, 1) = countries.item(i).innerText: results(r, 2) = "'" & scores.item(i).innerText
results(r, 3) = countries.item(i + 1).innerText: results(r, 4) = "'" & scores.item(i + 1).innerText
r = r + 1
Next
ws.Cells(1, 1).Resize(1, 4) = Array("Home", "Score", "Away", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Scores on different rows for every team:
Option Explicit
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object, results(), i As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
ReDim results(1 To countries.Length, 1 To 2)
For i = 0 To countries.Length - 1
results(i + 1, 1) = countries.item(i).innerText: results(i + 1, 2) = "'" & scores.item(i).innerText
Next
ws.Cells(1, 1) = "Country": ws.Cells(1, 2) = "Score"
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Additional column:
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object
Dim descs As Object, results(), i As Long, r As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
Set descs = html.querySelectorAll(".cscore--watchNotes .cscore_info-overview")
ReDim results(1 To countries.Length / 2, 1 To 5)
For i = 0 To countries.Length - 1 Step 2
results(r, 1) = descs.Item(i / 2).innerText
results(r, 2) = countries.Item(i).innerText: results(r, 3) = "'" & scores.Item(i).innerText
results(r, 4) = countries.Item(i + 1).innerText: results(r, 5) = "'" & scores.Item(i + 1).innerText
r = r + 1
Next
ws.Cells(1, 1).Resize(1, 5) = Array("Desc", "Home", "Score", "Away", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Your request seems to be just fine. Parsing the HTML is where your problem is.
You could do something like the following (you can ignore the request part):
Option Explicit
Sub espn()
Dim req As New WinHttpRequest
Dim HTMLDocument As New HTMLDocument
Dim listElement As HTMLUListElement
Dim listItem As HTMLLIElement
Dim sht As Worksheet
Dim i As Long
Dim j As Long
Dim url As String
url = "https://www.espncricinfo.com/scores"
With req
.Open "GET", url, False
.send
HTMLDocument.body.innerHTML = .responseText
End With
Set sht = ThisWorkbook.Worksheets("Name of your worksheet")
i = 2
For Each listElement In HTMLDocument.getElementsByClassName("cscore_competitors")
j = 1
For Each listItem In listElement.getElementsByTagName("li")
sht.Cells(i, j) = listItem.getElementsByClassName("cscore_name cscore_name--long")(0).innerText
sht.Cells(i, j + 1) = listItem.getElementsByClassName("cscore_score")(0).innerText
j = j + 2
Next listItem
i = i + 1
Next listElement
End Sub
The results would look like so:
Basically each game is represented by a ul (unnumbered list) element which consists of two li elements which contain the info about the names and the score.
I want to download the Excel file attached to html via Excel vba and output it in Excel sheet. This homepage is a list of the current status of the box office that has been popular in Korean movie theaters.
http://www.kobis.or.kr/kobis/business/stat/boxs/findFormerBoxOfficeList.do?loadEnd=0&searchType=search&sMultiMovieYn=&sRepNationCd=&sWideAreaCd=
Attached file is Excel. I understand that downloading can be done using the click method through Internet search. However, an alert window appears during downloading of the file, and the date is inserted in the name of the Excel file to be downloaded. As a beginner in Excel VBA it is very difficult. So I left this question, and what logic would it be useful to implement to spread this file on an Excel sheet? I am a beginner in Excel VBA so if you give me a detailed answer, it will be really helpful.
<p class = "btn_regi">
<a href="#none" class="btn_type01" onclick="chkform('excel'); return false ;">
<strong> Excel </ strong> </a>
</ p>
The following logic I have coded myself until dawn. However, the logic was too inefficient and the results did not work so I asked for help.
Sub program_()
Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
Dim bridge As String
Dim WinHttp As New WinHttpRequest
Dim sResponse As String, html As New HTMLDocument, hStructure As Object, hTable As HTMLTable
Dim ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set WinHttp = CreateObject("WinHttp.WinHttpRequest.5.1")
Dim Url As String
Url = "http://www.kobis.or.kr/kobis/business/stat/boxs/findFormerBoxOfficeList.do?"
Dim p1 As String 'parameter
Dim v1 As String
Dim p2 As String
Dim v2 As String
Dim p3 As String
Dim v3 As String
Dim p4 As String
Dim v4 As String
Dim p5 As String
Dim v5 As String
Dim v As Integer
Dim g As Integer
bridge = "&"
p1 = "loadEnd="
v1 = 0
p2 = "searchType="
v2 = "search"
p3 = "sMultiMovieYn="
v3 = ""
p4 = "sRepNationCd="
v4 = ""
p5 = "sWideAreaCd="
v5 = ""
With WinHttp
.Open "get", "" & Url & p1 & v1 & bridge & p2 & v2 & bridge & p3 & v3 & bridge & p4 & v4 & bridge & p5 & v5 & ""
.SetRequestHeader "Referer", "http://www.kobis.or.kr/kobis/business/stat/boxs/findFormerBoxOfficeList.do?"
.SetRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.Send
.WaitForResponse ': DoEvents
sResponse = StrConv(.responseBody, vbUnicode)
Dim hforms As HTMLFormElement
With html
.body.innerHTML = sResponse
sResponse = ""
Set hTable = .getElementsByClassName("boardList03")(0)
End With
Dim Arr0() As Variant
Dim tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long
r = 0
With ws
Set tRow = hTable.getElementsByTagName("tbody")(0).getElementsByTagName("tr")
ReDim Arr0(tRow.Length - 1, 10)
For Each tr In tRow
r = r + 1
Set tCell = tr.getElementsByTagName("td")
Dim j As Integer
c = 1
For Each td In tCell
If td.ID = "td_rank" Then
Arr0(r - 1, 0) = td.innerText
End If
If td.ID = "td_movie" Then
Arr0(r - 1, 1) = td.getElementsByTagName("a")(0).innerText
End If
If td.ID = "td_openDt" Then
Arr0(r - 1, 2) = td.innerText
End If
If td.ID = "td_salesAcc" Then
Arr0(r - 1, 3) = td.innerText
End If
If td.ID = "td_audiAcc" Then
Arr0(r - 1, 4) = td.innerText
End If
If td.ID = "td_scrnCnt" Then
Arr0(r - 1, 5) = td.innerText
End If
If td.ID = "td_showCnt" Then
Arr0(r - 1, 6) = td.innerText
End If
c = c + 1
Next td
Next tr
Dim k As Integer
Dim i As Integer
k = 0
For i = LBound(Arr0, 1) To UBound(Arr0, 1)
.Cells(2 + k + g, 2) = Arr0(i, 0)
.Cells(2 + k + g, 3) = Arr0(i, 1)
.Cells(2 + k + g, 4) = Arr0(i, 2)
.Cells(2 + k + g, 5) = Arr0(i, 3)
.Cells(2 + k + g, 6) = Arr0(i, 4)
.Cells(2 + k + g, 7) = Arr0(i, 5)
.Cells(2 + k + g, 8) = Arr0(i, 6)
k = k + 1
Next i
End With
Erase Arr0
Set tRow = Nothing: Set tCell = Nothing: Set tr = Nothing: Set td = Nothing
Set hforms = Nothing
Set hTable = Nothing
Application.Calculation = xlCalculationAutomatic
Application.ScreenUpdating = True
End Sub
You could just grab the table by its id and then loop the table rows and table cells within rows.
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer, hTable As HTMLTable
Const MAX_WAIT_SEC As Long = 5
Application.ScreenUpdating = False
With IE
.Visible = True
.navigate "http://www.kobis.or.kr/kobis/business/stat/boxs/findFormerBoxOfficeList.do?loadEnd=0&searchType=search&sMultiMovieYn=&sRepNationCd=&sWideAreaCd="
While .Busy Or .readyState < 4: DoEvents: Wend
Set hTable = .document.getElementById("table_former")
WriteTable hTable, 1, ThisWorkbook.Worksheets("Sheet1")
.Quit
Application.ScreenUpdating = True
End With
End Sub
Public Sub WriteTable(ByVal hTable As HTMLTable, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
If ws Is Nothing Then Set ws = ActiveSheet
Dim tSection As Object, tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long, tBody As Object
r = startRow
With ws
Dim headers As Object, header As Object, columnCounter As Long
Set headers = hTable.getElementsByTagName("th")
For Each header In headers
columnCounter = columnCounter + 1
.Cells(startRow, columnCounter) = header.innerText
Next header
startRow = startRow + 1
Set tBody = hTable.getElementsByTagName("tbody")
For Each tSection In tBody 'HTMLTableSection
Set tRow = tSection.getElementsByTagName("tr") 'HTMLTableRow
For Each tr In tRow
r = r + 1
Set tCell = tr.getElementsByTagName("td")
c = 1
For Each td In tCell 'DispHTMLElementCollection
.Cells(r, c).Value = td.innerText 'HTMLTableCell
c = c + 1
Next td
Next tr
Next tSection
End With
End Sub
References (VBE> Tools > References):
Microsoft HTML Object Library
Microsoft Internet Controls
This code (like other codes) written by Precious #QHarr works well. However, when importing data, I want to retrieve the data saved in the connection. The output of the code and the data I want to receive are shown in the attached image. What kind of code can I solve? (Google Translate)
Public Sub DYarislar()
Dim asays(), ws As Worksheet, asay As Long, html As HTMLDocument
Dim http As clsHTTP, url As String, headers(), numberOfRequests As Long
headers = Array("Asay", "Tarih", "Sehir", "K.Cinsi", "Gr", "Msf/Pist", "Derece", "S", "Jokey", "Kilo", "G.Ç", "Hnd", "Gny", "Taki")
Set http = New clsHTTP
Set ws = ThisWorkbook.Worksheets("X")
Set html = New HTMLDocument
asays = Application.Transpose(Sheets("Y").Range("A2:A" & Sheets("Y").Columns("A:A").Find(What:="boş").Row - 1).Value)
Const numTableRows As Long = 250
Const numTableColumns As Long = 14
Const BASE_URL As String = "https://yenibeygir.com/at/"
numberOfRequests = UBound(asays)
Dim results(), headerRow As Boolean, tRows As Object, tRow As Object, iRow As Long
Dim tCells As Object, tCell As Object, r As Long, c As Long, hTable As HTMLTable
ReDim results(1 To numTableRows * numberOfRequests, 1 To numTableColumns)
Application.ScreenUpdating = False
For asay = 1 To numberOfRequests
headerRow = True
url = BASE_URL & asays(asay)
html.body.innerHTML = http.GetString(url)
Set hTable = html.querySelector(".at_Yarislar")
Set tRows = hTable.getElementsByTagName("tr")
Const numberOfRaces As Long = 22
Dim counter As Long
counter = 1
For Each tRow In tRows
If Not headerRow Then
counter = counter + 1
If counter > numberOfRaces Then Exit For
c = 2: r = r + 1
results(r, 1) = asays(asay)
Set tCells = tRow.getElementsByTagName("td")
For Each tCell In tCells
results(r, c) = tCell.innerText
c = c + 1
Next
End If
headerRow = False
Next
Next
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
Application.ScreenUpdating = True
End Sub
You only need a few small changes. You use the same Class, clsHTTP, as before, then use the module 1 code below.
Notes:
Within each source page table row the jockey column contains an a tag link element
You can access this using:
tRow.getElementsByTagName("a")(1).href
As the link is relative you need to do a text replacement to add in the base part of the URL i.e.
Replace$(tRow.getElementsByTagName("a")(1).href, "about:", BASE_URL2)
The id is part of the href and can be extracted using Split:
Split(tRow.getElementsByTagName("a")(1).href, "/")(2)
To allow for these additional elements in the results you need to increase the number of output column count:
Const numTableColumns As Long = 16
And adapt your table rows loop to populate the additional columns:
results(r, 2) = Split(tRow.getElementsByTagName("a")(1).href, "/")(2)
results(r, 3) = Replace$(tRow.getElementsByTagName("a")(1).href, "about:", BASE_URL2)
Also, adjust in the loop to ensure other columns are populated from the 4th onwards (as 2 extra columns):
c = 4
Finally, adjust your headers to include the 2 new columns:
headers = Array("Asay", "JokeyId", "JokeyLink", "Tarih", "Sehir", "K.Cinsi", "Gr", "Msf/Pist", "Derece", "S", "Jokey", "Kilo", "G.Ç", "Hnd", "Gny", "Taki")
VBA:
Module 1:
Option Explicit
Public Sub DYarislar()
Dim asays(), ws As Worksheet, asay As Long, html As HTMLDocument
Dim http As clsHTTP, url As String, headers(), numberOfRequests As Long
headers = Array("Asay", "JokeyId", "JokeyLink", "Tarih", "Sehir", "K.Cinsi", "Gr", "Msf/Pist", "Derece", "S", "Jokey", "Kilo", "G.Ç", "Hnd", "Gny", "Taki")
Set http = New clsHTTP
Set ws = ThisWorkbook.Worksheets("X")
Set html = New HTMLDocument
asays = Application.Transpose(Sheets("Y").Range("A2:A" & Sheets("Y").Columns("A:A").Find(What:="boş").Row - 1).Value)
Const numTableRows As Long = 250
Const numTableColumns As Long = 16
Const BASE_URL As String = "https://yenibeygir.com/at/"
Const BASE_URL2 As String = "https://yenibeygir.com"
numberOfRequests = UBound(asays)
Dim results(), headerRow As Boolean, tRows As Object, tRow As Object, iRow As Long
Dim tCells As Object, tCell As Object, r As Long, c As Long, hTable As HTMLTable
ReDim results(1 To numTableRows * numberOfRequests, 1 To numTableColumns)
Application.ScreenUpdating = False
For asay = 1 To numberOfRequests
headerRow = True
url = BASE_URL & asays(asay)
html.body.innerHTML = http.GetString(url)
Set hTable = html.querySelector(".at_Yarislar")
Set tRows = hTable.getElementsByTagName("tr")
For Each tRow In tRows
If Not headerRow Then
c = 4: r = r + 1
results(r, 1) = asays(asay)
On Error Resume Next
results(r, 2) = Split(tRow.getElementsByTagName("a")(1).href, "/")(2)
results(r, 3) = Replace$(tRow.getElementsByTagName("a")(1).href, "about:", BASE_URL2)
On Error GoTo 0
Set tCells = tRow.getElementsByTagName("td")
For Each tCell In tCells
results(r, c) = tCell.innerText
c = c + 1
Next
End If
headerRow = False
Next
Next
With ws
.Cells(1, 3).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 3).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
Application.ScreenUpdating = True
End Sub
Sample results:
There is a program that parse a certain table from the site . Works great . I want to parse another table from the site . By the tag number “table” they are the same . I am trying to use the same program , but it gives an error : Run-time error 91 in the line :
If oRow.Cells(y).Children.Length > 0 Then
New table : http://allscores.ru/soccer/fstats.php?champ=2604&team=439&team2=420&tour=110
Old table : http://allscores.ru/soccer/new_ftour.php?champ=2604&f_team=439
New table : in the attached picture
Function extractTable(Ssilka As String, book1 As Workbook, iLoop As Long)
Dim oDom As Object, oTable As Object, oRow As Object
Dim iRows As Integer, iCols As Integer
Dim x As Integer, y As Integer
Dim data()
Dim vata()
Dim tata()
Dim oHttp As Object
Dim oRegEx As Object
Dim sResponse As String
Dim oRange As Range
Dim odRange As Range
' get page
Set oHttp = CreateObject("MSXML2.XMLHTTP")
oHttp.Open "GET", Ssilka, False
oHttp.Send
' cleanup response
sResponse = StrConv(oHttp.responseBody, vbUnicode)
Set oHttp = Nothing
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
Set oRegEx = CreateObject("vbscript.regexp")
With oRegEx
.MultiLine = True
.Global = True
.IgnoreCase = False
.Pattern = "<(script|SCRIPT)[\w\W]+?</\1>"
sResponse = .Replace(sResponse, "")
End With
Set oRegEx = Nothing
' create Document from response
Set oDom = CreateObject("htmlFile")
oDom.Write sResponse
DoEvents
' table with results, indexes starts with zero
Set oTable = oDom.getelementsbytagname("table")(3)
DoEvents
iRows = oTable.Rows.Length
iCols = oTable.Rows(1).Cells.Length
' first row and first column contain no intresting data
ReDim data(1 To iRows - 1, 1 To iCols - 1)
ReDim vata(1 To iRows - 1, 1 To iCols - 1)
ReDim tata(1 To iRows - 1, 1 To iCols - 1)
' fill in data array
For x = 1 To iRows - 1
Set oRow = oTable.Rows(x)
For y = 1 To iCols - 1
If oRow.Cells(y).Children.Length > 0 Then
data(x, y) = oRow.Cells(y).getelementsbytagname("a")(0).getattribute("href")
data(x, y) = Replace(data(x, y), "about:", "http://allscores.ru/soccer/")
vata(x, y) = oRow.Cells(y).innerText
End If
Next y
Next x
Set oRow = Nothing
Set oTable = Nothing
Set oDom = Nothing
Set oRange = book1.ActiveSheet.Cells(110, 26 + (iLoop * 21)).Resize(iRows - 1, iCols - 1)
oRange.NumberFormat = "#"
oRange.Value = data
Set odRange = book1.ActiveSheet.Cells(34, 26 + (iLoop * 21)).Resize(iRows - 1, iCols - 1)
odRange.NumberFormat = "#"
odRange.Value = vata
Set oRange = Nothing
Set odRange = Nothing
End Function
This is not particularly robust but does grab the values from the table. iLoop is not used.
Option Explicit
Public Sub test()
extractTable "http://allscores.ru/soccer/fstats.php?champ=2604&team=439&team2=420&tour=110", ThisWorkbook, 1
End Sub
Public Sub extractTable(Ssilka As String, book1 As Workbook)
Dim oDom As Object, oTable As Object
Dim oHttp As Object
Dim oRegEx As Object
Dim sResponse As String
Set oHttp = CreateObject("MSXML2.XMLHTTP")
oHttp.Open "GET", Ssilka, False
oHttp.send
sResponse = StrConv(oHttp.responseBody, vbUnicode)
Set oHttp = Nothing
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
Set oRegEx = CreateObject("vbscript.regexp")
With oRegEx
.MultiLine = True
.Global = True
.IgnoreCase = False
.Pattern = "<(script|SCRIPT)[\w\W]+?</\1>"
sResponse = .Replace(sResponse, "")
End With
Set oRegEx = Nothing
Set oDom = CreateObject("htmlFile")
oDom.Write sResponse
Set oTable = oDom.getElementsByTagName("table")(3)
Dim b As Object, a As Object
Set b = oTable.getElementsByTagName("TR") 'DispHTMLElementCollection
Dim i As Long, y As Long
With ActiveSheet
For i = 3 To 17 '17-3 gives the 15 rows of interest. Start at 3 to avoid header and empty row.
Set a = b(i).ChildNodes
For y = 1 To a.Length - 1
.Cells(i - 2, y) = a(y).innerText
Next y
Next i
End With
End Sub