I'm trying to pull some info from a website that provides oil well data by API number (API is a unique number for every well in the US)
Website: http://sonlite.dnr.state.la.us/sundown/cart_prod/cart_con_wellapi1
API example: 1708300502
The issue is, when I get to the 2nd page, IE.document.getElementsByTagName("body")(0).innerText still returns data from the initial page. How do I fetch the updated page data?
The ultimate goal is to get to the 2nd page, click on "30570" via IE.document.getElementsByTagName("a")(0).Click and then read the final 3rd page. I just cannot figure out how to read the updated page :(
Option Explicit
Sub sonris_WellData()
Dim IE As InternetExplorer
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
Dim i As Integer
'Open SONRIS website
Application.StatusBar = "Opening Website"
IE.navigate "http://sonlite.dnr.state.la.us/sundown/cart_prod/cart_con_wellapi1"
Do While IE.readyState <> 4: DoEvents: Loop
Application.Wait Now() + TimeValue("00:00:01")
Application.StatusBar = False
IE.document.forms(0).p_apinum.Value = "1708300502" 'plug-in API
IE.document.forms(0).submit
' Wait until the next page opens
Application.StatusBar = "Opening Website"
Do While IE.readyState <> 4: DoEvents: Loop
Application.Wait Now() + TimeValue("00:00:01")
Application.StatusBar = False
' Read the page - this is where the issue occurs, MsgBox keeps returning text from the very 1st page
MsgBox IE.document.getElementsByTagName("body")(0).innerText
IE.Quit
End Sub
This seems to be working. Rather than DoEvents use the WinAPI Sleep function. I also added a call to the Sleep function after the form submit.
MOre often we are seeing sites that are dynamically served by some javascript/etc., in these cases the browser may appear to be READYSTATE_COMPLETE or not Busy but the page has not yet rendered the "new" results.
Option Explicit
Public Declare Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long)
Sub sonris_WellData()
Dim IE As Object 'InternetExplorer
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
Dim i As Integer
'Open SONRIS website
Application.StatusBar = "Opening Website"
IE.navigate "http://sonlite.dnr.state.la.us/sundown/cart_prod/cart_con_wellapi1"
Do While IE.readyState <> 4
Sleep 1000
Loop
Application.StatusBar = False
IE.document.forms(0).p_apinum.Value = "1708300502" 'plug-in API
IE.document.forms(0).submit
Sleep 1000
' Wait until the next page opens
Application.StatusBar = "Opening Website"
Do While IE.readyState <> 4
Sleep 1000
Loop
Application.StatusBar = False
' Read the page - this is where the issue occurs, MsgBox keeps returning text from the very 1st page
MsgBox IE.document.getElementsByTagName("body")(0).innerText
IE.Quit
End Sub
You can experiment maybe with a slightly longer Sleep after the .submit.
Alternatively, I notice that after you submit, the URL changes, so you could also try changing the second waiting loop to:
Do While IE.LocationURL ="http://sonlite.dnr.state.la.us/sundown/cart_prod/cart_con_wellapi1"
Sleep 1000
Loop
This should put the Excel.Application to wait until the URL has changed.
Alternatively, you may have better luck using an XMLHTTPRequest (there are many examples of this here on SO and elsewhere on the internet). This allows you to send a request just like the browser, without actually using a web browser. Then you can simply parse the return text as HTML or XML. I would use the Microsoft XML, v6.0 library reference for this.
POST requests:
① Entering the Well API number
I examined the web page making the selections you mention. I inspected the web traffic using fiddler and noticed that the initial request, when you submit the API number is handled by a POST request.
② POST request:
The POST body has the following parameter:
p_apinum is the key and the associated value is the original Well API number.
Using this info I formulated a POST request direct thus avoiding your first landing page.
③ Pressing the hyperlink:
Next, I noticed that the element you wanted to press:
Looking at the associated HTML it has an associated relative hyperlink:
I use a helper function to parse the page HTML to get this relative link and construct the absolute path: GetNextURL(page.body.innerHTML).
④ Making a new request:
I re-use my HTTPRequest function GetPage to send a second request, with an empty body, and grab all the tables from the HTML document returned via: page.getElementsByTagName("table").
⑤ Writing the tables to the Excel worksheet:
I loop all the tables on the page using helper function AddHeaders to write out the table headers, and WriteTables to write the current table to the sheet.
Example page content:
Example code output:
VBA:
Option Explicit
Public Sub GetWellInfo()
Dim ws As Worksheet, page As HTMLDocument, targetTable As HTMLTable, apiNumbers(), currNumber As Long
Const PARAM1 As String = "p_apinum"
Const BASESTRING As String = "http://sonlite.dnr.state.la.us/sundown/cart_prod/"
apiNumbers = Array(1708300502, 1708300503)
Application.ScreenUpdating = False
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ws
.Cells.ClearContents
For currNumber = LBound(apiNumbers) To UBound(apiNumbers)
Set page = GetPage(BASESTRING & "cart_con_wellapi2", apiNumbers(currNumber), PARAM1)
Set page = GetPage(BASESTRING & GetNextURL(page.body.innerHTML))
Dim allTables As Object
Set allTables = page.getElementsByTagName("table")
For Each targetTable In allTables
AddHeaders targetTable, GetLastRow(ws, 1) + 2, ws
WriteTables targetTable, GetLastRow(ws, 1), ws
Next targetTable
Next currNumber
End With
Application.ScreenUpdating = True
End Sub
Public Function GetPage(ByVal url As String, Optional ByVal apiNumber As Long, Optional ByVal paramN As String = vbNullString) As HTMLDocument
Dim objHTTP As Object, html As New HTMLDocument
Set objHTTP = CreateObject("WinHttp.WinHttpRequest.5.1")
Dim sBody As String
If Not paramN = vbNullString Then sBody = paramN & "=" & apiNumber
With objHTTP
.SetTimeouts 10000, 10000, 10000, 10000
.Open "POST", url, False
.setRequestHeader "User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
On Error Resume Next
.send (sBody)
If Err.Number = 0 Then
If .Status = "200" Then
html.body.innerHTML = .responseText
Set GetPage = html
Else
Debug.Print "HTTP " & .Status & " " & .statusText
Exit Function
End If
Else
Debug.Print "Error " & Err.Number & " " & Err.Source & " " & Err.Description
Exit Function
End If
On Error GoTo 0
End With
End Function
Public Function GetNextURL(ByVal inputString As String)
GetNextURL = Replace$(Replace$(Split(Split(inputString, "href=")(1), ">")(0), Chr$(34), vbNullString), "about:", vbNullString)
End Function
Public Sub AddHeaders(ByVal hTable As Object, ByVal startRow As Long, ByVal ws As Worksheet)
Dim headers As Object, header As Object, columnCounter As Long
Set headers = hTable.getElementsByTagName("th")
For Each header In headers
columnCounter = columnCounter + 1
ws.Cells(startRow, columnCounter) = header.innerText
Next header
End Sub
Public Sub WriteTables(ByVal hTable As HTMLTable, Optional ByVal startRow As Long = 1, Optional ByRef ws As Worksheet)
If ws Is Nothing Then Set ws = ActiveSheet
Dim tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long
r = startRow
With ActiveSheet
Set tRow = hTable.getElementsByTagName("tr")
For Each tr In tRow
Set tCell = tr.getElementsByTagName("td")
For Each td In tCell
.Cells(r, c).Value = td.innerText
c = c + 1
Next td
r = r + 1: c = 1
Next tr
End With
End Sub
Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
With ws
GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
End With
End Function
References:
VBE > Tools > References > HTML Object Library.
Related
The code below navigates to a webpage, fills search boxes with queries, and submits to the results page. However, the final element collection in the script, tdtags, which is defined after the redirect, is pulling data from the original search page, rather than the results page. I currently have the while ie.busy loop and a timed delay in the script, neither of which works. I have also tried waiting until an element only present in the results page becomes available in the html, but this also does not work.
Dim twb As Workbook
Dim ie As Object
Set twb = ThisWorkbook
twb.Activate
Set ie = CreateObject("internetexplorer.application")
'church = Sheets("Control").Range("A2").Value
'minister = Sheets("Control").Range("A4").Value
location = "London" 'Sheets("Control").Range("A6").Value
'denomination = Sheets("Control").Range("A8").Value
With ie
.navigate "http://www.ukchurch.org/index.php"
.Visible = True
Do While .Busy Or .ReadyState <> 4
DoEvents
Loop
End With
Application.Wait (Now + TimeValue("00:00:02"))
Set intags = ie.document.getelementsbytagname("input")
For Each intag In intags
If intag.getattribute("name") = "name" Then
If church <> "" Then
intag.Value = church
End If
ElseIf intag.getattribute("name") = "minister" Then
If minister <> "" Then
intag.Value = minister
End If
ElseIf intag.getattribute("name") = "location" Then
If location <> "" Then
intag.Value = location
End If
Else
End If
Next intag
Set dropopt = ie.document.getelementsbytagname("select")
For Each dropo In dropopt
If dropo.classname = "DenominationDropDown" Then
Set opttags = dropo.getelementsbytagname("option")
For Each opt In opttags
If opt.innertext = denomination Then
opt.Selected = True
End If
Next opt
End If
Next dropo
On Error Resume Next
For Each intag In intags
If intag.getattribute("src") = "images/ukchurch/button-go.jpg" Then
intag.Click
Do While ie.Busy Or ie.ReadyState <> 4
DoEvents
Loop
Application.Wait (Now + TimeValue("00:00:03"))
Exit For
End If
Next intag
Application.Wait (Now + TimeValue("00:00:03"))
Set tdtags = ie.document.getelementsbytagname("td")
For Each td In tdtags
If td.classname = "pText" Then
Debug.Print td.innertext
Debug.Print ie.locationURL
pagecount = Right(td.innertext, InStr(td.innertext, ":"))
End If
Next td
Debug.Print pagecount
End Sub
Any diagnosis would be appreciated.
Automating IE is a pain, so avoid it.
The following function requests the results page directly.
Public Function GetSearchResult(Optional ByVal ResultPage As Integer = 0, Optional ByVal ChurchName As String = "", Optional ByVal Minister As String = "", Optional ByVal ChurchLocation As String = "", Optional ByVal Denomination As String = "") As Object
Dim Request As Object: Set Request = CreateObject("MSXML2.serverXMLHTTP")
Dim Result As Object: Set Result = CreateObject("htmlfile")
Request.Open "POST", "http://www.ukchurch.org/searchresults1.php", False
Request.setRequestHeader "content-type", "application/x-www-form-urlencoded"
Request.send IIf(ResultPage = 0, "", "page=" & ResultPage & "&") & "name=" & ChurchName & "&minister=" & Minister & "&location=" & ChurchLocation & "&denomination=" & Denomination
Result.body.innerHTML = Request.responseText
Set GetSearchResult = Result
End Function
An example which prints the contents of the td with classname pText inside the table containing the search results
Sub Main()
Dim Document As Object
Set Document = GetSearchResult(ChurchLocation:="London")
Dim ResultRows as Object
Dim ResultRow As Object
Set ResultRows = Document.getElementsByTagName("table")(8).getElementsByTagName("td")
For Each ResultRow in ResultRows
If ResultRow.Classname = "pText" Then
Debug.print ResultRow.innerText
End If
Next
End Sub
Update
You need to add a couple of References to your VBA project to make the following code work.
In the VBA Editor, Goto the Tools Menu, Click References and in the dialog that opens add a check next to the following two items: Microsoft XML, v6.0 and Microsoft HTML Object Library (
Public Function GetChurchDetails(ByVal ChurchID As String) As MSHTML.HTMLDocument
Dim Request As New MSXML2.ServerXMLHTTP60
Dim Result As New MSHTML.HTMLDocument
Request.Open "GET", "http://www.ukchurch.org/churchdetails.php?churchid=" & ChurchID, False
Request.send
Result.body.innerHTML = Request.responseText
Set GetChurchDetails = Result
End Function
Sub Main2()
Dim Document As MSHTML.HTMLDocument
Dim Church As MSHTML.HTMLDocument
Set Document = GetSearchResult(ChurchLocation:="London")
Dim ResultRows As MSHTML.IHTMLElementCollection
Dim ResultRow As MSHTML.IHTMLElement
Dim ChurchID As String
'Set ResultRows = Document.getElementsByTagName("table")(8).getElementsByTagName("td")
' all result links on searchresults1.php have a classname of resultslink which makes getting them much easier
Set ResultRows = Document.getElementsByClassName("resultslink")
For Each ResultRow In ResultRows
ChurchID = ResultRow.getAttribute("href")
ChurchID = Mid(ChurchID, InStr(1, ChurchID, "=") + 1)
Set Church = GetChurchDetails(ChurchID)
' code to read data from the page using Church as the Document
' eg: Church.getElemenetsByTagName("td").....
Next
End Sub
You only need to use the "post" mode when your submitting data, for everything else you can use "get"
I am not a programmer but I have managed to learn just a few things in VBA but now on a certain website I face a problem that does not exist on some other.
What should happen is that a page form should be completed with data, submit button clicked and then I want to get some data from the result page.
The first phase works fine but it seems that no matter what I do the VBA still reads data from the page before submit was clicked.
The code is:
Sub VIES2()
'Uruchomienie Internet Explorera i wstrzymanie dalszej akcji aż uzyska stan gotowości
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
IE.Navigate "http://ec.europa.eu/taxation_customs/vies/?locale=pl"
Do While IE.ReadyState <> 4: DoEvents: Loop
'Wypełnienie formularza odpowiednimi wartościami i kliknięcie przycisku sprawdzenia
IE.document.getElementbyId("countryCombobox").Value = "IT"
IE.document.getElementbyId("number").Value = "01802840023"
IE.document.getElementbyId("requesterCountryCombobox").Value = "IT"
IE.document.getElementbyId("requesterNumber").Value = "01802840023"
IE.document.getElementbyId("submit").Click
'Test uzyskiwania opisu i identyfikatora zapytania
For t = 1 To 999999
Next t
Application.Wait Now + TimeValue("00:00:10")
Do While IE.ReadyState <> 4: DoEvents: Loop
For t = 1 To 999999
Next t
Application.Wait Now + TimeValue("00:00:10")
MsgBox IE.LocationURL
Set Text = IE.document.getElementsbyClassName("layout-content")
For Each Element In Text
MsgBox Element.innerText
Next
Set Test = IE.document.getElementsbyTagName("TABLE")
For Each Element In Test
MsgBox Element.innerText
Next
End Sub
I have tried putting break, various wait loops and Application.Wait as suggested in similar questions where it seems to have worked. Here, even after the page is long after fully loaded the code still reads the old page - at least pulling the URL and some data seems to point that it is the case.
UPDATE: I should also add that I have tried to make the macro refresh the page but it clears the input content. What is interesting that target URL is:
http://ec.europa.eu/taxation_customs/vies/vatResponse.html
If I change the initial page to this the browser instantly redirects to the original page with notification that initial data is needed. The macro then completes the data and clicks submit button. In this case IE.LocationURL indicates this URL:
http://ec.europa.eu/taxation_customs/vies/vatResponse.html
but according to the content I get with getElementsbyClassName still reads elements from the initial page:
http://ec.europa.eu/taxation_customs/vies/?locale=pl
This worked to print out the VAT response table
Note:
If on 32-bit remove the PtrSafe.
Code:
Option Explicit
Declare PtrSafe Sub sleep Lib "kernel32" Alias "Sleep" (ByVal dwmilliseconds As Long)
Public Sub VIES2()
Application.ScreenUpdating = False
Dim IE As Object
'Uruchomienie Internet Explorera i wstrzymanie dalszej akcji az uzyska stan gotowosci
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
IE.navigate "http://ec.europa.eu/taxation_customs/vies/?locale=pl"
Do While IE.ReadyState <> 4: DoEvents: Loop
'Wypelnienie formularza odpowiednimi wartosciami i klikniecie przycisku sprawdzenia
IE.document.getElementById("countryCombobox").Value = "IT"
IE.document.getElementById("number").Value = "01802840023"
IE.document.getElementById("requesterCountryCombobox").Value = "IT"
IE.document.getElementById("requesterNumber").Value = "01802840023"
IE.document.getElementById("submit").Click
sleep (5000) 'or increase to 10000
Dim tbl As Object
Set tbl = IE.document.getElementById("vatResponseFormTable")
Dim ws As Worksheet
Set ws = ActiveWorkbook.Worksheets.Add
ws.Name = "Results"
Dim rng As Range, currentRow As Object, currentColumn As Object, i As Long, outputRow As Long
outputRow = outputRow + 1
Set rng = ws.Range("B" & outputRow)
For Each currentRow In tbl.Rows
For Each currentColumn In currentRow.Cells
rng.Value = currentColumn.outerText
Set rng = rng.Offset(, 1)
i = i + 1
Next currentColumn
outputRow = outputRow + 1
Set rng = rng.Offset(1, -i)
i = 0
Next currentRow
Application.ScreenUpdating = True
End Sub
Output:
Although QHarr's solution is working in my end, I'm providing with another with no hardcoded delay within the script.
Using IE as your question was:
Sub Get_Data()
Dim HTML As HTMLDocument, post As Object, elems As Object
Dim elem As Object, r&, c&
With New InternetExplorer
.Visible = False
.navigate "http://ec.europa.eu/taxation_customs/vies/?locale=pl"
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set HTML = .document
With HTML
.getElementById("countryCombobox").Value = "IT"
.getElementById("number").Value = "01802840023"
.getElementById("requesterCountryCombobox").Value = "IT"
.getElementById("requesterNumber").Value = "01802840023"
.getElementById("submit").Click
Do: Set post = .getElementById("vatResponseFormTable"): DoEvents: Loop While post Is Nothing
For Each elems In post.Rows
For Each elem In elems.Cells
c = c + 1: Cells(r + 1, c) = elem.innerText
Next elem
c = 0: r = r + 1
Next elems
End With
.Quit
End With
End Sub
Reference to add to the library:
1. Microsoft Internet Controls
2. Microsoft HTML Object Library
Using xmlhttp request (It is way faster than IE):
Sub Get_Data()
Dim elems, elem As Object
Dim QueryString$, S$, r&, c&
QueryString = "memberStateCode=IT&number=01802840023&traderName=&traderStreet=&traderPostalCode=&traderCity=&requesterMemberStateCode=IT&requesterNumber=01802840023&action=check&check=Weryfikuj"
With New XMLHTTP
.Open "POST", "http://ec.europa.eu/taxation_customs/vies/vatResponse.html", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
.send QueryString
S = .responseText
End With
With New HTMLDocument
.body.innerHTML = S
For Each elems In .getElementById("vatResponseFormTable").Rows
For Each elem In elems.Cells
c = c + 1: Cells(r + 1, c) = elem.innerText
Next elem
c = 0: r = r + 1
Next elems
End With
End Sub
Reference to add to the library:
1. Microsoft XML, V6
2. Microsoft HTML Object Library
Most of the time you should search if there isn't a REST/SOAP available to achieve that kind of task.
Using an Internet Explorer instance for this is a total overkill.
Try this simple function, that uses the SOAP service to validate VAT numbers:
Function IsVatValid(country_code, vat_number)
Dim objHTTP As Object
Dim xmlDoc As Object
Set objHTTP = CreateObject("WinHttp.WinHttpRequest.5.1")
sURL = "http://ec.europa.eu/taxation_customs/vies/services/checkVatService"
sEnv = "<s11:Envelope xmlns:s11='http://schemas.xmlsoap.org/soap/envelope/'>" & _
"<s11:Body>" & _
"<tns1:checkVat xmlns:tns1='urn:ec.europa.eu:taxud:vies:services:checkVat:types'>" & _
"<tns1:countryCode>" & country_code & "</tns1:countryCode>" & _
"<tns1:vatNumber>" & vat_number & "</tns1:vatNumber>" & _
"</tns1:checkVat>" & _
"</s11:Body>" & _
"</s11:Envelope>"
objHTTP.Open "Post", sURL, False
objHTTP.setRequestHeader "Content-Type", "text/xml"
objHTTP.setRequestHeader "SOAPAction", "checkVatService"
objHTTP.send (sEnv)
objHTTP.waitForResponse
Set xmlDoc = CreateObject("HTMLFile")
xmlDoc.body.innerHTML = objHTTP.responsetext
IsVatValid = CBool(xmlDoc.getElementsByTagName("valid")(0).innerHTML)
Set xmlDoc = Nothing
Set objHTTP = Nothing
End Function
And then you can simply validate all your vat numbers:
Debug.Print IsVatValid("IT", "01802840023")
>>> True
My goal is to scrape the source code of a web page.
The site seems to have different Frames which is why my code won't work properly.
I tried to modify a code which I found online which should solve the Frame issue.
The following code creates an error (object required) at:
Set profileFrame .document.getElementById("profileFrame")
Public Sub IE_Automation()
'Needs references to Microsoft Internet Controls and Microsoft HTML Object Library
Dim baseURL As String
Dim IE As InternetExplorer
Dim HTMLdoc As HTMLDocument
Dim profileFrame As HTMLIFrame
Dim slotsDiv As HTMLDivElement
'example URL with multiple frames
baseURL = "https://www.xing.com/search/members?section=members&keywords=IT&filters%5Bcontact_level%5D=non_contact"
Set IE = New InternetExplorer
With IE
.Visible = True
'Navigate to the main page
.navigate baseURL & "/publictrophy/index.htm?onlinename=ace_anubis"
While .Busy Or .readyState <> READYSTATE_COMPLETE: DoEvents: Wend
'Get the profileFrame iframe and navigate to it
Set profileFrame = .document.getElementById("profileFrame")
.navigate baseURL & profileFrame.src
While .Busy Or .readyState <> READYSTATE_COMPLETE: DoEvents: Wend
Set HTMLdoc = .document
End With
'Display all the text in the profileFrame iframe
MsgBox HTMLdoc.body.innerText
'Display just the text in the slots_container div
Set slotsDiv = HTMLdoc.getElementById("slots_container")
MsgBox slotsDiv.innerText
End Sub
Hummmm, I'm not exactly sure what you are doing here, but can you try the code below?
Option Explicit
Sub Sample()
Dim ie As Object
Dim links As Variant, lnk As Variant
Dim rowcount As Long
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
ie.navigate "https://www.xing.com/search/members?section=members&keywords=IT&filters%5Bcontact_level%5D=non_contact"
'Wait for site to fully load
'ie.Navigate2 URL
Do While ie.Busy = True
DoEvents
Loop
Set links = ie.document.getElementsByTagName("a")
rowcount = 1
With Sheets("Sheet1")
For Each lnk In links
'Debug.Print lnk.innerText
'If lnk.classname Like "*Real Statistics Examples Part 1*" Then
.Range("A" & rowcount) = lnk.innerText
rowcount = rowcount + 1
'Exit For
'End If
Next
End With
End Sub
General:
I think in your research you may have come across this question and misunderstood how it relates/doesn't relate to your circumstance.
I don't think iFrames are relevant to your query. If you are after the list of names, their details and the URLs to their pages you can use the code below.
CSS Selectors
To target the elements of interest I use the following two CSS selectors. These use style infomation on the page to target the elements:
.SearchResults-link
.SearchResults-item
"." means class, which is like saying .getElementsByClassName. The first gets the links, and the second gets the description information on the first page.
With respect to the first CSS selector: The actual link required is dynamically constructed, but we can use the fact that the actual profile URLs have a common base string of "https://www.xing.com/profile/", which is then followed by the profileName. So, in function GetURL, we parse the outerHTML returned by the CSS selector to get the profileName and concatenate it with the BASESTRING constant to get our actual profile link.
Code:
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer
With IE
.Visible = True
.navigate "https://www.xing.com/publicsearch/query?search%5Bq%5D=IT"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim a As Object, exitTime As Date, linksNodeList As Object, profileNodeList As Object
' exitTime = Now + TimeSerial(0, 0, 5) '<== uncomment this section if timing problems
'
' Do
' DoEvents
' On Error Resume Next
' Set linksNodeList = .document.querySelectorAll(".SearchResults-link")
' On Error GoTo 0
' If Now > exitTime Then Exit Do
' Loop While linksNodeList Is Nothing
Set linksNodeList = .document.querySelectorAll(".SearchResults-link") '<== comment this out if uncommented section above
Set profileNodeList = .document.querySelectorAll(".SearchResults-item")
Dim i As Long
For i = 0 To profileNodeList.Length - 1
Debug.Print "Profile link: " & GetURL(linksNodeList.item(i).outerHTML)
Debug.Print "Basic info: " & profileNodeList.item(i).innerText
Next i
End With
End Sub
Public Function GetURL(ByVal htmlSection As String) As String
Const BASESTRING As String = "https://www.xing.com/profile/"
Dim arr() As String
arr = Split(htmlSection, "/")
GetURL = BASESTRING & Replace$(Split((arr(UBound(arr) - 1)), ">")(0), Chr$(34), vbNullString)
End Function
Example return information:
I've been banging my head against he wall trying to figure out why this VBA code will not work :(
I am simply trying to insert the value entered into the excel input box into a website's input box. I am a novice when it comes to HTML so I'm sure that has something to do with it.
Here is the HTML element from the website Zomato.com:
<input class="dark" id="location_input" role="combobox" aria-expanded="true" aria-labelledby="label_search_location" aria-owns="explore-location-suggest" aria-autocomplete="list" placeholder="Please type a location...">
Here is my VBA code:
Sub Merchant_Extraction()
Dim IE As Object
Dim form As Variant
Dim button As Variant
Set IE = CreateObject("internetexplorer.application")
merchantzip = InputBox("Enter Zip Code")
With IE
.Visible = True
.navigate ("http://www.zomato.com")
While IE.readystate <> 4
DoEvents
Wend
IE.Document.GetElementByID(“location_input_sp”).Item.innertext = merchantzip
Set form = IE.Document.getelementsbytagname("form")
Set button = form(0).onsubmit
form(0).get
End With
Set IE = Nothing
End Sub
I am unclear why it's not working - any help would be incredible!
API XMLHTTP GET request
The API was mentioned. The documention is here.
The basic free account allows access to restaurant information and search APIs (up to 1000 calls/day).
An example first 0-20 results call, with a city id specified (68 for Manchester,UK), is as follows; a JSON response is received. The response it parsed into a JSON object with JSONConverter.bas
Option Explicit
Public Sub GetInfo()
Dim URL As String, strJSON As String, json As Object
URL = "https://developers.zomato.com/api/v2.1/search?entity_id=68&entity_type=city&start=0&count=20"
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.setRequestHeader "Content-Type", "application/json"
.setRequestHeader "user-key", "yourAPIkey"
.send
strJSON = .responseText
End With
Set json = JsonConverter.ParseJson(strJSON)
'other stuff with JSON object
End Sub
Example JSON response:
Zomato - Common APIs:
Finding your city ID:
The quickest way for me was to go to concatenate the city onto a base URL string e.g. https://www.zomato.com/manchester, then click search and right-click inspect HTML on first result. Then Ctrl+F to bring up search box, search for CITY_ID, and scan through find results for the HTML until city id found e.g.
As far as entering values into webpages the correct syntax would be:
IE.Document.all.Item("location_input").Value = ""
I've combined your routine with some code that I use so you can see an example. I have not been able to test however. In my environment, the IE object disconnects after the .navigate portion so I added in a loop to find and re-assign the object...
Option Explicit
Public Declare Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long)
Sub Merchant_Extraction()
Dim IE As Object, objShellWindows As Object
Dim MerchantZip As String, strWebPath As String
Dim Form As Variant, Button As Variant
Dim X As Long
strWebPath = "http://www.zomato.com"
MerchantZip = InputBox("Enter Zip Code")
If MerchantZip = vbNullString Then Exit Sub
Set IE = CreateObject("InternetExplorer.Application")
With IE
.Visible = True
.Navigate strWebPath
End With
Do
Sleep 250
DoEvents
Loop While IE.Busy Or IE.ReadyState <> 4
If TypeName(IE) <> "IWebBrowser2" Or IE.Name <> "Internet Explorer" Then
Set objShellWindows = CreateObject("Shell.Application").Windows
For X = 0 To objShellWindows.Count - 1
Set IE = objShellWindows.Item(X)
If Not IE Is Nothing Then
If IE.Name = "Internet Explorer" Then
If InStr(1, IE.LocationURL, strWebPath, 1) > 0 Then
Do While IE.Busy Or IE.ReadyState <> 4
Sleep 250
DoEvents
Loop
Exit For
End If
End If
End If
Set IE = Nothing
Next
Set objShellWindows = Nothing
End If
If Not IE Is Nothing Then
IE.Document.all.Item("location_input").Value = MerchantZip
Sleep 250
For Each Button In IE.Document.getelementsbytagname("form")
If StrComp(Button.Type, "Button", 1) = 0 Then
Button.Click
End If
Next
Set IE = Nothing
End If
End Sub
I created a macro, in which I can fetch each URL from any webpage.
Now, I have each URL in column.
How can I check if a URL is working.
If any one of these URL is not working then it should show me error not working next to URL in next column.
Below is the code I wrote:
Sub CommandButton1_Click()
Dim ie As Object
Dim html As Object
Dim j As Integer
j = 1
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
url = "www.mini.co.uk"
ie.navigate url
Do While ie.READYSTATE <> READYSTATE_COMPLETE
Application.StatusBar = "Trying to go to website ..."
Loop
Application.StatusBar = " "
Set html = ie.document
'Dim htmltext As Collection
Dim htmlElements As Object
Dim htmlElement As Object
Set htmlElements = html.getElementsByTagName("*")
For Each htmlElement In htmlElements
'If htmlElement.getAttribute("href") <> "" Then Debug.Print htmlElement.getAttribute("href")
If htmlElement.getAttribute("href") <> "" Then Cells(j, 1).Value = htmlElement.getAttribute("href")
j = j + 1
Next
ActiveSheet.Range("$A$1:$A$2752").removeDuplicates Columns:=1, Header:=xlNo
End Sub
This code is to fetch the URL from web-page.
Below is the code to check the status of URL, if it is working or not.
Sub CommandButton2_Click()
Dim k As Integer
Dim j As Integer
k = 1
j = 1
'Dim Value As Object
'Dim urls As Object
'urls.Value = Cells(j, 1)
For Each url In Cells(j, 1)
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = False
url = Cells(j, 1)
ie.navigate url
Do While ie.READYSTATE <> READYSTATE_COMPLETE
Application.StatusBar = "checking the Data. Please wait..."
Loop
Cells(k, 2).Value = "OK"
'Set html = ie.document
ie.Quit
j = j + 1
k = k + 1
Next
End Sub
Public Function IsURLGood(url As String) As Boolean
Dim request As New WinHttpRequest
On Error GoTo IsURLGoodError
request.Open "HEAD", url
request.Send
If request.Status = 200 Then
IsURLGood = True
Else
IsURLGood = False
End If
Exit Function
IsURLGoodError:
IsURLGood = False
End Function
Sub testLink()
Dim source As Range, req As Object, url$
Set source = Range("A2:B2")
source.Columns(2).Clear
For i = 1 To source.Rows.Count
url = source.Cells(i, 1)
If IsURLGood(url) Then
source.Cells(i, 2) = "OK"
Else
source.Cells(i, 2) = "Down"
End If
Next
MsgBox "Done"
End Sub
Since you are interested to know whether the link is working, xmlhttp may be one solution.
Set sh = ThisWorkBook.Sheets("Sheet1")
Dim column_number: column_number = 2
'Row starts from 2
For i=2 To 100
strURL = sh.cells(i,column_number)
sh.cells(i, column_number+1) = CallHTTPRequest(strURL)
Next
Function CallHTTPRequest(strURL)
Set objXMLHTTP = CreateObject("MSXML2.XMLHTTP")
objXMLHTTP.Open "GET", strURL, False
objXMLHTTP.send
status = objXMLHTTP.Status
'strContent = ""
'If objXMLHTTP.Status = 200 Then
' strContent = objXMLHTTP.responseText
'Else
' MsgBox "HTTP Request unsuccessfull!", vbCritical, "HTTP REQUEST"
' Exit Function
'End If
Set objXMLHTTP = Nothing
CallHTTPRequest = status
End Function
You can actually get the status codes using IE automation, but it requires working with events and a reference to the Microsoft Internet Controls library.
Private Declare PtrSafe Sub SleepEx Lib "Kernel32.dll" (ByVal dwMilliseconds As Long, Optional ByVal bAlertable As Boolean = True)
Private WithEvents ie As SHDocVw.InternetExplorer
Private LastStatusCode As Long
Private Sub ie_NavigateError(ByVal pDisp As Object, URL As Variant, TargetFrameName As Variant, StatusCode As Variant, Cancel As Boolean)
LastStatusCode = StatusCode
End Sub
Public Sub NavigateReturnStatus(url As String) As Long
Set ie = CreateObject("InternetExplorer.Application")
Status = 0
ie.Navigate url
Do While IEObject.ReadyState <> READYSTATE_COMPLETE Or IEObject.Busy
SleepEx 50 'No busy waiting, short wait time
DoEvents 'Need to receive events from IE application
Loop
NavigateReturnStatus = LastStatusCode
End Sub
This doesn't return a conventional HTTP status code, but instead returns a NavigateError status code. That means you can get more detailed information about errors, but no information about successful navigation. Of course, if it's 0, no error has occurred so the status is likely 200.
Speed is very much slower than a WinHTTP/MSXML approach, but I'm sharing this mainly for cases where someone's already navigating using Internet Explorer anyway.
Of course, the code can (and likely should) be modified to reuse the internet explorer application.
Sub URLWorkingorNot()
'Make Sure to Select Cells containing URL
Dim i As Long
AddReference
i= 1
Selection.Replace "#N/A", "NA": Selection.Offset(0, 1).EntireColumn.Insert
Dim IE As InternetExplorer
If ActiveWorkbook Is Nothing Then Exit Sub
For Each cell In Selection
If cell.Value <> "" Then
Set IE = New InternetExplorer
IE.Navigate2 cell.Value
IE.Left = 900
IE.Width = 900
IE.Visible = True
While IE.Busy: DoEvents: Wend
On Error Resume Next
If InStr(1, IE.document.body.innerText, "The webpage cannot be found", vbBinaryCompare) <> 0 Then cell.Offset(0, 1).Value = "Not Available"
'MsgBox IE.document.body.innerText
If err.Number <> 0 Then err.Clear: On Error GoTo 0
IE.Quit: Set IE = Nothing
End If
i = i + 1:
ProgressBar Selection.Count, i, "Working on " & i & " Cell": DoEvents
If ActiveWorkbook.Path <> "" And Left(i, 3) = "00" Then ActiveWorkbook.Save
Next cell
Unload UProgressBar
Application.StatusBar = ""
End Sub
Sub AddReference()
'cOPIED FROM iNTERNET
'Macro purpose: To add a reference to the project using the GUID for the
'reference library
Dim strGUID As String, theRef As Variant, i As Long
'Update the GUID you need below.
'strGUID = "{00020905-0000-0000-C000-000000000046}"
strGUID = "{EAB22AC0-30C1-11CF-A7EB-0000C05BAE0B}"
'iNTERNET cONTROLS - "{EAB22AC0-30C1-11CF-A7EB-0000C05BAE0B}" MAJOR 1 MINOR 1
'HTMLOBJECT "{3050F1C5-98B5-11CF-BB82-00AA00BDCE0B}" MAJOR 4 MINOR 0
'Set to continue in case of error
On Error Resume Next
'Remove any missing references
For i = ThisWorkbook.Vbproject.References.Count To 1 Step -1
Set theRef = ThisWorkbook.Vbproject.References.Item(i)
If theRef.isbroken = True Then
ThisWorkbook.Vbproject.References.Remove theRef
End If
Next i
'Clear any errors so that error trapping for GUID additions can be evaluated
err.Clear
'Add the reference
ThisWorkbook.Vbproject.References.AddFromGuid _
GUID:=strGUID, Major:=1, Minor:=0
ThisWorkbook.Vbproject.References.AddFromFile "C:\Windows\System32\UIAutomationCore.dll"
'If an error was encountered, inform the user
Select Case err.Number
Case Is = 32813
'Reference already in use. No action necessary
Case Is = vbNullString
'Reference added without issue
Case Else
'An unknown error was encountered, so alert the user
MsgBox "A problem was encountered trying to" & vbNewLine _
& "add or remove a reference in this file" & vbNewLine & "Please check the " _
& "references in your VBA project!", vbCritical + vbOKOnly, "Error!"
End Select
On Error GoTo 0
End Sub