I would like to extract the table from html code into Excel using VBA.
I have tried the following code several times with changing some of the code but keep on getting error.
Sub GrabTable()
'dimension (set aside memory for) our variables
Dim objIE As InternetExplorer
Dim ele As Object
Dim y As Integer
'start a new browser instance
Set objIE = New InternetExplorer
'make browser visible
objIE.Visible = False
'navigate to page with needed data
objIE.navigate "http://www.bursamalaysia.com/market/listed-companies/company-announcements/5923061"
'wait for page to load
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
'we will output data to excel, starting on row 1
y = 1
'look at all the 'tr' elements in the 'table' with id 'InputTable2',
'and evaluate each, one at a time, using 'ele' variable
For Each ele In objIE.document.getElementByClassName("InputTable2").getElementsByTagName("tr")
'show the text content of 'td' element being looked at
Debug.Print ele.textContent
'each 'tr' (table row) element contains 2 children ('td') elements
'put text of 1st 'td' in col A
Sheets("Sheet1").Range("A" & y).Value = ele.Children(0).textContent
'put text of 2nd 'td' in col B
Sheets("Sheet1").Range("B" & y).Value = ele.Children(1).textContent
y = y + 1
'repeat until last ele has been evaluated
Next
End Sub
I show you two methods:
Using IE: The data is inside an iframe which needs to be negotiated
Using XMLHTTP request - much faster and without browser opening. It uses the first part of the iframe document URL which is what the iframe is navigating to.
In both cases I access the tables containing the company name and then the disclosure info table. For the disclosure main info table I copy the outerHTML to the clipboard and paste to Excel to avoid looping all the rows and columns. You can simply set loop the tr (table rows) and td (table cells) within instead.
IE:
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer, clipboard As Object
With IE
.Visible = True
.navigate "http://www.bursamalaysia.com/market/listed-companies/company-announcements/5923061"
While .Busy Or .readyState < 4: DoEvents: Wend
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
With .document.getElementById("bm_ann_detail_iframe").contentDocument
ThisWorkbook.Worksheets("Sheet1").Cells(1, 1) = .getElementsByClassName("company_name")(0).innerText
clipboard.SetText .getElementsByTagName("table")(1).outerHTML
clipboard.PutInClipboard
End With
ThisWorkbook.Worksheets("Sheet1").Cells(2, 1).PasteSpecial
.Quit
End With
End Sub
XMLHTTP:
You can extract a different URL from the front-end of the iframe URL and use that as shown below.
Here is the section of your original HTML that shows the iframe and the associated new URL info:
Option Explicit
Public Sub GetTable()
Dim sResponse As String, html As HTMLDocument, clipboard As Object
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "http://disclosure.bursamalaysia.com/FileAccess/viewHtml?e=2891609", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
html.body.innerHTML = sResponse
With html
ThisWorkbook.Worksheets("Sheet1").Cells(1, 1) = .querySelector(".company_name").innerText
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
clipboard.SetText .querySelector(".InputTable2").outerHTML
clipboard.PutInClipboard
End With
ThisWorkbook.Worksheets("Sheet1").Cells(2, 1).PasteSpecial
End Sub
Try it this way.
Sub Web_Table_Option_Two()
Dim HTMLDoc As New HTMLDocument
Dim objTable As Object
Dim lRow As Long
Dim lngTable As Long
Dim lngRow As Long
Dim lngCol As Long
Dim ActRw As Long
Dim objIE As InternetExplorer
Set objIE = New InternetExplorer
objIE.Navigate "http://www.bursamalaysia.com/market/listed-companies/company-announcements/5923061"
Do Until objIE.ReadyState = 4 And Not objIE.Busy
DoEvents
Loop
Application.Wait (Now + TimeValue("0:00:03")) 'wait for java script to load
HTMLDoc.body.innerHTML = objIE.Document.body.innerHTML
With HTMLDoc.body
Set objTable = .getElementsByTagName("table")
For lngTable = 0 To objTable.Length - 1
For lngRow = 0 To objTable(lngTable).Rows.Length - 1
For lngCol = 0 To objTable(lngTable).Rows(lngRow).Cells.Length - 1
ThisWorkbook.Sheets("Sheet1").Cells(ActRw + lngRow + 1, lngCol + 1) = objTable(lngTable).Rows(lngRow).Cells(lngCol).innerText
Next lngCol
Next lngRow
ActRw = ActRw + objTable(lngTable).Rows.Length + 1
Next lngTable
End With
objIE.Quit
End Sub
Related
I'm trying to write a simple code for studying vocabulary and want this code to look up the words in column "A" using my favorite online dictionary "Cambridge" automatically and then print the definitions to the cells next to the words. I have written the code below so far and it goes to the site and searches the word. The question is what code is needed to get the definitions and print them to the cells?
Sub SearchWords()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
Dim HTMLInput As MSHTML.IHTMLElement
Dim HTMLButtons As MSHTML.IHTMLElementCollection
Dim HTMLButton As MSHTML.IHTMLElement
IE.Visible = True
IE.Navigate "www.dictionary.cambridge.org"
Do While IE.ReadyState <> READYSTATE_COMPLETE
Loop
Set HTMLDoc = IE.Document
Set HTMLInput = HTMLDoc.getElementById("cdo-search-input")
HTMLInput.Value = ThisWorkbook.Sheets(1).Range("A1").Value
Set HTMLButtons = HTMLDoc.getElementsByClassName("cdo-search__button")
HTMLButtons(0).Click
End Sub
Thanks in advance.
The result appears to be in an element with classname entry. I read your column A search words in to an array and loop that to look up each word. The result is written back out to the sheet. I use css selectors mostly as a more flexible and faster method for selecting elements. css selectors, in this instance, are applied via querySelector method of HTMLDocument (i.e. ie.Document)
Proper page loads waits are used throughout.
Option Explicit
'entry
Public Sub SearchWords()
Dim IE As SHDocVw.InternetExplorer, lookups(), dataSheet As Worksheet, iRow As Long
Set dataSheet = ThisWorkbook.Worksheets("Sheet1")
Set IE = New SHDocVw.InternetExplorer
lookups = Application.Transpose(dataSheet.Range("A2:A3").Value) '<Read words to lookup into a 2d array and transpose into 1D
With IE
.Visible = True
.Navigate2 "www.dictionary.cambridge.org"
While .Busy Or .readyState <> 4: DoEvents: Wend
For iRow = LBound(lookups) To UBound(lookups)
.document.getElementById("cdo-search-input").Value = lookups(iRow) 'work off .document to avoid stale elements
.document.querySelector(".cdo-search__button").Click
While .Busy Or .readyState <> 4: DoEvents: Wend 'wait for page reload
Application.Wait Now + TimeSerial(0, 0, 1)
Do
Loop While .document.querySelectorAll(".entry").Length = 0
dataSheet.Cells(iRow + 1, 2) = .document.querySelector(".entry").innerText
Next
.Quit
End With
End Sub
Done! Perfectly working. (Since this post is too long for a comment, I had to post this as an answer) Now I am trying to get some more data from the page(since I need the other explanations and Turkish definitions as well). When I inspect the page, I see that full descriptions are placed in "di $ entry-body__el entry-body__el--smalltop clrd js-share-holder" class. I added "/turkish" to the URL and tried to get the related element using the class name I mentioned instead of ".def-block", but it didn't work. Then I tried a different way using this code:
Sub GetMeaningsFromCambridgeDictionary()
Dim ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Meanings")
Dim sourceWord As String
sourceWord = ws.Range("A2").Value
Dim i As Integer
Dim çeviri As String
Dim ilkSatir As Integer
ilkSatir = ws.Cells(ws.Rows.Count, "B").End(xlUp).Row + 1
Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
Dim URL As String
Dim countElement As Integer
Range("B2:B1000").Delete
IE.Visible = False
URL = "https://dictionary.cambridge.org/dictionary/turkish/" & sourceWord
IE.Navigate URL
Do While IE.Busy: DoEvents: Loop
Application.Wait (Now + TimeValue("0:00:01"))
Do While IE.readyState <> 4
Application.Wait (Now + TimeValue("0:00:01"))
Loop
countElement = IE.document.getElementsByClassName("di $ entry-body__el entry-body__el--smalltop clrd js-share-holder").Length
For i = 0 To countElement - 1
çeviri = IE.document.getElementsByClassName("di $ entry-body__el entry-body__el--smalltop clrd js-share-holder")(i).innerText
Range("B" & i + 2).Value = çeviri
Range("B" & i + 2).Rows.AutoFit
Next i
Columns(2).AutoFit
IE.Quit
MsgBox "All meanings have been copied."
End Sub
This code is also working, and I see all the definitions in detail, but this time the problem is only the first word is done. What should I do to do the same thing for the other words?
The web in which I browse has several pages. I want to click on those elements or play with urls and thus be able to copy the data. The initial URL ends at = 1 & playerType = ALL & ts = 1558502019375, in my code there is a loop that is supposed to go page after page to get the data but I can not get it done.
Sub UPDATE_DATA_MLB()
Application.ScreenUpdating = False
'dimension (set aside memory for) our variables
Dim objIE As InternetExplorer
Dim ele As Object
Dim y As Integer
Dim EstaPagina As Byte
Dim EstaURL As String
'Página inicial
EstaPagina = 1
'we will output data to excel, starting on row 1
y = 1
EstaURL = "http://mlb.mlb.com/stats/sortable.jsp#elem=%5Bobject+Object%5D&tab_level=child&click_text=Sortable+Player+hitting&game_type='R'&season=2018&season_type=ANY&league_code='MLB'§ionType=sp&statType=hitting&page=1&playerType=ALL&ts=1558502019375" '&ts=1526432697176"
'start a new browser instance
Set objIE = New InternetExplorer
'make browser visible
objIE.Visible = False
Do Until EstaPagina = 255
'navigate to page with needed data
objIE.navigate EstaURL & EstaPagina
'wait for page to load
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
'If UCase(Left(EstaURL, 211) & "1") = UCase(Left(objIE.LocationURL, (211 + Len(EstaPagina)))) And y > 1 Then Exit Do
'look at all the 'tr' elements in the 'table' with id 'myTable',
'and evaluate each, one at a time, using 'ele' variable
For Each ele In objIE.document.getElementById("datagrid").getElementsByTagName("tr")
'show the text content of 'tr' element being looked at
'Debug.Print ele.textContent
'each 'tr' (table row) element contains 4 children ('td') elements
'put text of 1st 'td' in col A
Sheets("Sheet1").Range("A" & y).Value = ele.Children(0).textContent
'put text of 2nd 'td' in col B
Sheets("Sheet1").Range("B" & y).Value = ele.Children(1).textContent
'put text of 3rd 'td' in col C
Sheets("Sheet1").Range("C" & y).Value = ele.Children(2).textContent
'put text of 4th 'td' in col D
Sheets("Sheet1").Range("D" & y).Value = ele.Children(5).textContent
'put text of 4th 'td' in col f
Sheets("Sheet1").Range("E" & y).Value = ele.Children(22).textContent
'increment row counter by 1
y = y + 1
Next
EstaPagina = EstaPagina + 1
Loop
lobjIE.Quit
Set objIE = Nothing
Set ele = Nothing
Range("A1").Select
Range(Selection, Selection.End(xlToRight)).Select
Range(Selection, Selection.End(xlDown)).Select
Selection.RemoveDuplicates Columns:=Array(1, 2, 3, 4, 5), _
Header:=xlNo
Application.ScreenUpdating = True
MsgBox "Volcado terminado", vbInformation
Range("A1").Select
'save the Excel workbook
ActiveWorkbook.Save
End Sub ```
XMLHTTP:
The page does ajax calls to get json which it uses to update the content for each page. It does this using query string parameters, one of which is the records per page (default 50). You can view this activity in the browser network tab via dev tools F12
You can avoid using a browser and issue xmlhttp requests to the same endpoint, altering the query string parameters to get all the results. It seems, in my testing, you cannot request all the results in one go. Instead, I issue a request for 1000 results, then check if there are any additional results to obtain and issue further requests, in batches of 1000, to get them.
I am using jsonconverter.bas to parse the json. I extract the info from the json and load into an array, results, so I can write all results out to the sheet in one go - much more efficient this way as reduces i/o with sheet.
After copying the code from the link above into a module called jsonConverter, you need to go 'VBE > Tools > References > Add a reference to Microsoft Scripting Runtime
TODO:
Add error handling for failed request
VBA:
Option Explicit
Public Sub GetResults()
'VBE > Tools > References > Microsoft Scripting Runtime
Dim ws As Worksheet, results(), i As Long, totalResults As Long
Dim headers(), columnCount As Long, pageNumber As Long
Dim numberOfPages As Long, resultsPerPage As Long, json As Object
resultsPerPage = 1000
pageNumber = 1
Set ws = ThisWorkbook.Worksheets("Sheet1")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "http://mlb.mlb.com/pubajax/wf/flow/stats.splayer?season=2018&sort_order=%27desc%27&sort_column=%27avg%27&stat_type=hitting&page_type=SortablePlayer&game_type=%27R%27&player_pool=ALL&season_type=ANY&sport_code=%27mlb%27&results=" & resultsPerPage & "&recSP=" & pageNumber & "&recPP=" & resultsPerPage, False
.send
Set json = JsonConverter.ParseJson(.responseText)
totalResults = json("stats_sortable_player")("queryResults")("totalSize")
headers = json("stats_sortable_player")("queryResults")("row").item(1).keys
numberOfPages = json("stats_sortable_player")("queryResults")("totalP")
columnCount = UBound(headers) + 1
ReDim results(1 To totalResults, 1 To columnCount)
Dim r As Long, c As Long, dict As Object, key As Variant
For pageNumber = 1 To numberOfPages
If pageNumber > 1 Then
.Open "GET", "http://mlb.mlb.com/pubajax/wf/flow/stats.splayer?season=2018&sort_order=%27desc%27&sort_column=%27avg%27&stat_type=hitting&page_type=SortablePlayer&game_type=%27R%27&player_pool=ALL&season_type=ANY&sport_code=%27mlb%27&results=" & resultsPerPage & "&recSP=" & pageNumber & "&recPP=" & resultsPerPage, False
.send
Set json = JsonConverter.ParseJson(.responseText)
End If
For Each dict In json("stats_sortable_player")("queryResults")("row")
r = r + 1: c = 1
For Each key In dict.keys
results(r, c) = dict(key)
c = c + 1
Next
Next
Next
End With
With ws
.Cells(1, 1).Resize(1, columnCount) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Sample of output ( appreciate can't really read as is - but idea of layout):
image link: https://i.stack.imgur.com/jiDTP.png
Internet Explorer:
If you want to use slower browser solution you can concatenate the page number into the url and loop to cover all pages. The number of pages can be extracted from the pagination on page 1.
You can see how to write the tables out below each other by looking at this answer. Change the lines with GetLastRow(ws, 1) + 2 to GetLastRow(ws, 1) + 1
Option Explicit
'VBE > Tools > References: Microsoft Internet Controls
Public Sub GetData()
Dim ie As New InternetExplorer, numberOfPages As Long
Dim url As String, i As Long
Const PLAYERS_PER_PAGE = 50
url = "http://mlb.mlb.com/stats/sortable.jsp#elem=%5Bobject+Object%5D&tab_level=child&click_text=Sortable+Player+hitting&game_type='R'&season=2018&season_type=ANY&league_code='MLB'§ionType=sp&statType=hitting&page=1&playerType=ALL&ts="
With ie
.Visible = True
.Navigate2 url
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
numberOfPages = CLng(.querySelector(".paginationWidget-last").innerText)
'do something with page 1
If numberOfPages > 1 Then
For i = 2 To numberOfPages
ie.Navigate2 Replace$(url, "page=1", "page=" & CStr(i))
While ie.Busy Or ie.readyState < 4: DoEvents: Wend
'do something with other pages
Next
Stop 'delete me later
End If
End With
.Quit
End With
End Sub
I have managed to complement his second code with mine, although I have not been very professional, I am interested in how to achieve combertir the url of "official page" to this "official page for jsone"
Option Explicit
'VBE > Tools > References: Microsoft Internet Controls
Public Sub GetData()
Dim ele As Object
Dim y As Integer
Dim EstaPagina As Byte
EstaPagina = 1
'we will output data to excel, starting on row 1
y = 1
Dim ie As New InternetExplorer, numberOfPages As Long
Dim url As String, i As Long
Const PLAYERS_PER_PAGE = 50
url = "http://mlb.mlb.com/stats/sortable.jsp#elem=%5Bobject+Object%5D&tab_level=child&click_text=Sortable+Player+pitching&game_type='R'&season=2018&season_type=ANY&league_code='MLB'§ionType=sp&statType=pitching&page=1&playerType=ALL&ts="
With ie
.Visible = True
.Navigate2 url
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
numberOfPages = CLng(.querySelector(".paginationWidget-last").innerText)
'do something with page 1
If numberOfPages > 1 Then
For i = 1 To numberOfPages
ie.Navigate2 Replace$(url, "page=1", "page=" & CStr(i))
For Each ele In ie.document.getElementById("datagrid").getElementsByTagName("tr")
'show the text content of 'tr' element being looked at
'Debug.Print ele.textContent
'each 'tr' (table row) element contains 4 children ('td') elements
'put text of 1st 'td' in col A
Sheets("Sheet1").Range("A" & y).value = ele.Children(1).textContent
'put text of 2nd 'td' in col B
Sheets("Sheet1").Range("B" & y).value = ele.Children(2).textContent
'put text of 3rd 'td' in col C
Sheets("Sheet1").Range("C" & y).value = ele.Children(3).textContent
'put text of 4th 'td' in col D
Sheets("Sheet1").Range("D" & y).value = ele.Children(4).textContent
'put text of 4th 'td' in col f
Sheets("Sheet1").Range("E" & y).value = ele.Children(5).textContent
'increment row counter by 1
y = y + 1
Next
While ie.Busy Or ie.readyState < 4: DoEvents: Wend
' do something with other pages
Next
' Stop 'delete me later
End If
End With
.Quit
End With
On Error Resume Next
Range("A1").Select
Range(Selection, Selection.End(xlToRight)).Select
Range(Selection, Selection.End(xlDown)).Select
Selection.RemoveDuplicates Columns:=Array(1, 2, 3, 4), _
header:=xlNo
On Error Resume Next
Application.ScreenUpdating = True
MsgBox "Volcado terminado", vbInformation
Range("A1").Select
'save the Excel workbook
ActiveWorkbook.Save
End Sub
I had been successfully pulling mutual fund performance data from Marketwatch.com using the following code:
Dim A As Long
Dim B As Long
Dim C As Long
Dim Z As Long
For Z = 1 To 35
Range("A1").Select
ActiveCell.Offset((37 + (Z * 10)), 0).Select
If ActiveCell.Value = "" Then
Exit For
Else
End If
Dim oHTML As Object
Dim oTable As Object
Dim x As Long
Dim Y As Long
Dim vData As Variant
Set oHTML = CreateObject("HTMLFile")
With CreateObject("WinHTTP.WinHTTPRequest.5.1")
.Open "GET", "http://www.marketwatch.com/investing/fund/vfinx", False
.send
oHTML.body.innerhtml = .responsetext
End With
For Each oTable In oHTML.Getelementsbytagname("table")
If oTable.classname = "fundstable" Then
ReDim vData(1 To oTable.Rows.Length, 1 To oTable.Rows(1).Cells.Length)
For x = 1 To UBound(vData)
For Y = 1 To UBound(vData, 2)
vData(x, Y) = oTable.Rows(x - 1).Cells(Y - 1).innertext
Next Y
Next x
With ActiveCell.Offset(1, 0)
.Resize(UBound(vData), UBound(vData, 2)).Value = vData
End With
Exit For
End If
Next oTable
Next Z
Unfortunately, Marketwatch has added a Captcha to stop bots (i.e. me) from scraping their data. I don't know of anyway around this, so I figured I'd try another site.
I looked at Morningstar: http://performance.morningstar.com/fund/performance-return.action?t=VFINX®ion=usa&culture=en_US
It appears that the table I want on that page would be: "table.r_table3 width955px print97" or just "r_table3 width955px print97", but neither one seems to work for me.
Any ideas?
Thanks!
The data is loaded by javascript and won't be available via XMLHTTP request as scripts won't have run to load content.
You can use that second link, for example, with IE and introduce a wait to ensure info is loaded. I show getting the table with that class name at index 1. You can change the index here:
ele.item(1).outerHTML
So, for the next table use clipboard.SetText ele.item(2).outerHTML .
You can also loop the .Length of ele to get each table but ensure you write out to a different cell when you paste:
Dim i As Long
For i = 0 To ele.Length-1
clipboard.SetText ele.item(i).outerHTML
'Etc
Next
VBA:
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer, clipboard As Object
Dim ele As Object, ws As Worksheet, t As Date, tableCount As Long
Const MAX_WAIT_SEC As Long = 5
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
With IE
.Visible = True
.navigate "http://performance.morningstar.com/fund/performance-return.action?t=VFINX®ion=usa&culture=en_US"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
t = Timer
Do
DoEvents
On Error Resume Next
Set ele = .querySelectorAll(".r_table3.print97")
tableCount = ele.Length
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While tableCount < 3
If Not ele Is Nothing Then
clipboard.SetText ele.item(1).outerHTML
clipboard.PutInClipboard
ws.Cells(1, 1).PasteSpecial
End If
End With
.Quit
End With
End Sub
How do i extract specific data (name, details )from the website into excel with excel vba?
Below I am trying to get processor and warranty:
Option Explicit
Sub GetData()
Dim objIE As InternetExplorer
Dim itemELE As Object
Dim html As IHTMLDocument
Dim Processor As String
Dim warranty As String
Dim y As Integer
'start a new browser instance
Set objIE = New InternetExplorer
'make browser visible
objIE.Visible = True
'navigate to page with needed data
objIE.navigate "https://www.harveynorman.com.sg/computers-tablets-and-gaming/computers/laptops/"
'wait for page to load
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
y = 1
For Each itemELE In objIE.document.getElementsByClassName("expandabaleContent")
Processor = itemELE.getElementsByTagName("d1")(0).innerText
warranty = itemELE.getElementsByClassName("d1")(0).getElementsByTagName("a")(0).textContent
Sheets("Sheet1").Range("A" & y).Value = Processor
Sheets("Sheet1").Range("B" & y).Value = warranty
y = y + 1
Next
End Sub
Screenshot of the page:
For the page shown (in your image) you can issue an XMLHTTP (XHR) GET request to grab the product info without opening a slow IE browser instance.
For the specific information:
Processor and warranty info:
If you inspect the page the info about processor and warranty appears associated with a classname facetedResults-feature-list
You can see the classname and then a dl tag housing a dt tag which has sibling dd tags. Two of these sibling dd tags are associated with the info for processor and warranty.
I use a CSS selector to grab all these dd tags which can be simplified,in this instance, to ignore the sibling dt and parent dl tags and use just:
.facetedResults-feature-list dd
The "." is a class selector. The CSS combination selection above says get the dd tags within elements with class facetedResults-feature-list
Product titles info:
The titles I get using another CSS selector of:
.facetedResults-title
This is elements with class facetedResults-title. This contains the product title.
Writing out product titles, processor and warranty info to the sheet:
A little maths shows me that the processor info repeats every 14, and that if I add 8 to the index for the processor I get the warranty info. You can see how you could write out each of the details as they occur at indices that repeat every 14. I combine the loop over the nodeList of dd elements with the titles to write out to the sheet.
VBA:
Option Explicit
Public Sub GetInfo()
Dim sResponse As String, i As Long, html As New HTMLDocument
Application.ScreenUpdating = False
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.lenovo.com/sg/en/laptops/c/LAPTOPS", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
Dim titles As Object, targetedInfo As Object, rowCounter As Long
With html
.body.innerHTML = sResponse
Set titles = .querySelectorAll(".facetedResults-title")
Set targetedInfo = .querySelectorAll(".facetedResults-feature-list dd")
End With
With Worksheets("Sheet1")
For i = 0 To targetedInfo.Length - 1
If i Mod 14 = 0 Then
rowCounter = rowCounter + 1
.Cells(rowCounter, 1) = titles(rowCounter - 1).innerText
.Cells(rowCounter, 2) = targetedInfo(i).innerText
.Cells(rowCounter, 3) = targetedInfo(i + 8).innerText
End If
Next i
End With
Application.ScreenUpdating = True
End Sub
Output sample:
More general info:
CSS selectors:
The product info is associated with an 'expandableContent facetedResults-expandableContent-features expandableContent-is-collapsed facetedResults-expandableContent-69' class name
The prices are associated with an 'expandableContent facetedResults-expandableContent-price expandableContent-is-collapsed' class name.
You can select these by the traditional .getElementsByClassName and then loop over the collection, or, in my case, use a CSS selector for class to do the same thing, and then traverse the length of the returned nodeList.
.getElementsByClassName("expandableContent facetedResults-expandableContent-features expandableContent-is-collapsed facetedResults-expandableContent-69")
is the same as
.querySelectorAll(".expandableContent.facetedResults-expandableContent-features.expandableContent-is-collapsed.facetedResults-expandableContent-69")
The "." is the class selector.
Titles are associated with a class facetedResults-title
VBA:
Option Explicit
Public Sub GetInfo()
Dim sResponse As String, i As Long, html As New HTMLDocument
Application.ScreenUpdating = False
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.lenovo.com/sg/en/laptops/c/LAPTOPS", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
Dim info As Object, prices As Object, titles As Object
With html
.body.innerHTML = sResponse
Set titles = .querySelectorAll(".facetedResults-title")
Set info = .querySelectorAll(".expandableContent.facetedResults-expandableContent-features.expandableContent-is-collapsed.facetedResults-expandableContent-69")
Set prices = .querySelectorAll(".expandableContent.facetedResults-expandableContent-price.expandableContent-is-collapsed")
End With
With Worksheets("Sheet1")
For i = 0 To titles.Length - 1
.Cells(i + 1, 1) = titles(i).innerText
.Cells(i + 1, 2) = info(i).innerText
.Cells(i + 1, 3) = prices(i).innerText
Next i
End With
Application.ScreenUpdating = True
End Sub
References required (VBE>Tools>References):
Microsoft HTML Object Library
Qharr has already provided some good options but in case still you want to try IE then see below code
Option Explicit
Sub GetData()
Dim objIE As InternetExplorer
Dim itemELE As Object
Dim html As IHTMLDocument
Dim Processor As String
Dim warranty As String
Dim y As Integer
'start a new browser instance
Set objIE = New InternetExplorer
'make browser visible
objIE.Visible = True
'navigate to page with needed data
objIE.navigate "https://www.lenovo.com/sg/en/laptops/c/LAPTOPS"
'wait for page to load
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
Application.Wait Now + TimeSerial(0, 0, 3)
y = 1
For Each itemELE In objIE.document.getElementsByClassName("facetedResults-feature-list")
If InStr(1, itemELE.className, "bundleList", vbTextCompare) = 0 Then
Application.Wait Now + TimeSerial(0, 0, 2)
Processor = itemELE.getElementsByTagName("dl")(0).innerText
warranty = itemELE.getElementsByTagName("dl")(4).innerText
Sheets("Sheet1").Range("A" & y).Value = Processor
Sheets("Sheet1").Range("B" & y).Value = warranty
y = y + 1
End If
Next
End Sub
Results
I'am trying to download a table from this page
to excel with VBA: http://www.merval.sba.com.ar/Vistas/Cotizaciones/Acciones.aspx --> table "Panel General"
I can download the table "Panel Merval" but i couldn't download the other table.
I use this code for table "Panel Merval":
Sub GetTable()
Dim ieApp As InternetExplorer
Dim ieDoc As Object
Dim ieTable As Object
Dim clip As DataObject
'create a new instance of ie
Set ieApp = New InternetExplorer
'you don’t need this, but it’s good for debugging
ieApp.Visible = False
'now that we’re in, go to the page we want
ieApp.Navigate "http://www.merval.sba.com.ar/Vistas/Cotizaciones/Acciones.aspx"
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop
'get the table based on the table’s id
Set ieDoc = ieApp.Document
Set ieTable = ieDoc.all.Item("ctl00_ContentCentral_tcAcciones_tpMerval_grdMerval")
'copy the tables html to the clipboard and paste to teh sheet
If Not ieTable Is Nothing Then
Set clip = New DataObject
clip.SetText "" & ieTable.outerHTML & ""
clip.PutInClipboard
Sheet1.Select
Sheet1.Range("b2").Select
Sheet1.PasteSpecial "Unicode Text"
End If
'close 'er up
ieApp.Quit
Set ieApp = Nothing
End Sub
or this one
Public Sub PanelLider()
Dim oDom As Object: Set oDom = CreateObject("htmlFile")
Dim x As Long, y As Long
Dim oRow As Object, oCell As Object
Dim vData As Variant
Dim link As String
link = "http://www.merval.sba.com.ar/Vistas/Cotizaciones/Acciones.aspx"
y = 1: x = 1
With CreateObject("msxml2.xmlhttp")
.Open "GET", link, False
.Send
oDom.body.innerHTML = .ResponseText
End With
With oDom.getElementsByTagName("table")(27)
Dim dataObj As Object
Set dataObj = CreateObject("new:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
dataObj.SetText "<table>" & .innerHTML & "</table>"
dataObj.PutInClipboard
End With
Sheets(2).Paste Sheets(2).Cells(1, 1)
End Sub
Could someone help me to download the table "Panel General"?
Many thanks.
Selenium
The following gets the table using selenium basic.
Option Explicit
Public Sub GetTable()
Dim html As New HTMLDocument, htable As HTMLTable, headers()
headers = Array("Especie", "Hora Cotización", "Cierre Anterior", "Precio Apertura", "Precio Máximo", _
"Precio Mínimo", "Último Precio", "Variación Diaria", "Volumen Efectivo ($)", "Volumen Nominal", "Precio Prom. Pon")
With New ChromeDriver
.get "http://www.merval.sba.com.ar/Vistas/Cotizaciones/Acciones.aspx"
.FindElementById("__tab_ctl00_ContentCentral_tcAcciones_tpGeneral").Click
Do
DoEvents
Loop While .FindElementById("ctl00_ContentCentral_tcAcciones_tpGeneral_dgrGeneral", timeout:=7000).Text = vbNullString
html.body.innerHTML = .PageSource
Set htable = html.getElementById("ctl00_ContentCentral_tcAcciones_tpGeneral_dgrGeneral")
WriteTable2 htable, headers, 1, ActiveSheet
.Quit
End With
End Sub
Public Sub WriteTable2(ByVal htable As HTMLTable, ByRef headers As Variant, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
If ws Is Nothing Then Set ws = ActiveSheet
Dim tRow As Object, tCell As Object, tr As Object, td As Object, R As Long, c As Long, tBody As Object
R = startRow: c = 1
With ActiveSheet
Set tRow = htable.getElementsByTagName("tr")
For Each tr In tRow
Set tCell = tr.getElementsByTagName("td")
For Each td In tCell
.Cells(R, c).Value = td.innerText
c = c + 1
Next td
R = R + 1: c = 1
Next tr
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
End With
End Sub
References:
HTML Object Library
Selenium Type Library
With IE (Using WriteTable2 sub from above):
Option Explicit
Public Sub GetInfo()
Dim ie As New InternetExplorer, html As HTMLDocument, hTable As HTMLTable, headers(), a As Object
headers = Array("Especie", "Hora Cotización", "Cierre Anterior", "Precio Apertura", "Precio Máximo", _
"Precio Mínimo", "Último Precio", "Variación Diaria", "Volumen Efectivo ($)", "Volumen Nominal", "Precio Prom. Pon")
Application.ScreenUpdating = False
With ie
.Visible = True
.navigate "http://www.merval.sba.com.ar/Vistas/Cotizaciones/Acciones.aspx"
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("__tab_ctl00_ContentCentral_tcAcciones_tpGeneral").Click
Do
DoEvents
On Error Resume Next
Set hTable = .document.getElementById("ctl00_ContentCentral_tcAcciones_tpGeneral_dgrGeneral")
On Error GoTo 0
Loop While hTable Is Nothing
WriteTable2 hTable, headers, 1, ActiveSheet
.Quit '<== Remember to quit application
Application.ScreenUpdating = True
End With
End Sub
References:
Microsoft Internet Explorer Controls