Downloading data from a web page (list) to an Excel - html

I have to download data from here:
[http://www.bcra.gov.ar/PublicacionesEstadisticas/Evolucion_moneda.asp][1]
Then I have to save all the data in an Excel. The problem is that I have to choose several dates and several currencies. For example, I have to select 12/31/2018, Dolar, Euro and Pesos. Moreover, I have to choose one currency at a time, and I have many to download.
I've tried Import External Data with Excel, but it didn't work.
I've also tried with this VBA code
Sub descarga_monedas()
Fecha = "2018.06.05"
Moneda = 313
Path = "http://www.bcra.gob.ar/PublicacionesEstadisticas/Evolucion_moneda_3.asp?tipo=E&Fecha=" & Fecha & "&Moneda=" & Moneda & """"
Application.Workbooks.Open (Path)
End Sub
The page seems to block this kind of code.
Is any way to solve this?

You can do it the following way. I have grabbed all the dates but included only one date to be used in conjunction with all currencies. Add another outer loop over dates to add in the dates values i.e. use an outer loop over inputDates collection to get each date.
Option Explicit
Public Sub GetData()
Dim body As String, html As HTMLDocument, http As Object, i As Long
Dim codes As Object, inputCurrency As Object, inputDates As Object, dates As Object
Const BASE_URL As String = "http://www.bcra.gov.ar/PublicacionesEstadisticas/Evolucion_moneda_3.asp?tipo=E&"
Set codes = CreateObject("scripting.dictionary")
Set inputDates = New Collection
Set html = New HTMLDocument '<== VBE > Tools > References > Microsoft HTML Object library
Set http = CreateObject("MSXML2.XMLHTTP")
With http
.Open "GET", "http://www.bcra.gov.ar/PublicacionesEstadisticas/Evolucion_moneda.asp", False
.send
html.body.innerHTML = .responseText
Set inputCurrency = html.querySelectorAll("[name=Moneda] option[value]")
Set dates = html.querySelectorAll("[name=Fecha] option[value]")
For i = 0 To inputCurrency.Length - 1
codes(inputCurrency.item(i).innerText) = inputCurrency.item(i).Value
Next
For i = 0 To dates.Length - 1
inputDates.Add dates.item(i).Value
Next
Dim fecha As String, moneda As String, key As Variant, downloadURL As String
Dim clipboard As Object, ws As Worksheet
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
For Each key In codes.keys
DoEvents
fecha = inputDates.item(1) '<== use an outer loop over inputDates collection to get each date
moneda = key
downloadURL = BASE_URL & "Fecha=" & fecha & "&Moneda=" & moneda '2019.02.11 ,79
.Open "GET", downloadURL, False
.send
html.body.innerHTML = StrConv(http.responseBody, vbUnicode)
clipboard.SetText html.querySelector("table").outerHTML
clipboard.PutInClipboard
Set ws = ThisWorkbook.Worksheets.Add
ws.NAME = fecha & "_" & moneda
ws.Cells(1, 1).PasteSpecial
Next
End With
End Sub

Related

I'm getting stuck at vba runtime error 424

I'm getting
run-time error 424
in 68th row (line)
request.Open "GET", Url, False
and I don't know how to fix it.
My previous question I posted ;
How to scrape specific part of online english dictionary?
My final goal is to get result like this;
A B
beginning bɪˈɡɪnɪŋ
behalf bɪˈhæf
behave bɪˈheɪv
behaviour bɪˈheɪvjər
belong bɪˈlɔːŋ
below bɪˈloʊ
bird bɜːrd
biscuit ˈbɪskɪt
Here's code I wrote, and it's mostly based on someone else's code I found on internet.
' Microsoft ActiveX Data Objects x.x Library
' Microsoft XML, v3.0
' Microsoft VBScript Regular Expressions
Sub ParseHelp()
' Word reference from
Dim Url As String
Url = "https://www.oxfordlearnersdictionaries.com/definition/english/" & Cells(ActiveCell.Row, "B").Value
' Get dictionary's html
Dim Html As String
Html = GetHtml(Url)
' Check error
If InStr(Html, "<TITLE>Not Found</Title>") > 0 Then
MsgBox "404"
Exit Sub
End If
' Extract phonetic alphabet from HTML
Dim wrapPattern As String
wrapPattern = "<span class='name' (.*?)</span>"
Set wrapCollection = FindRegexpMatch(Html, wrapPattern)
' MsgBox StripHtml(CStr(wrapCollection(1)))
' Fill phonetic alphabet into cell
If Not wrapCollection Is Nothing Then
Dim wrap As String
On Error Resume Next
wrap = StripHtml(CStr(wrapCollection(1)))
If Err.Number <> 0 Then
wrap = ""
End If
Cells(ActiveCell.Row, "C").Value = wrap
Else
MsgBox "not found"
End If
End Sub
Public Function StripHtml(Html As String) As String
Dim RegEx As New RegExp
Dim sOut As String
Html = Replace(Html, "</li>", vbNewLine)
Html = Replace(Html, " ", " ")
With RegEx
.Global = True
.IgnoreCase = True
.MultiLine = True
.Pattern = "<[^>]+>"
End With
sOut = RegEx.Replace(Html, "")
StripHtml = sOut
Set RegEx = Nothing
End Function
Public Function GetHtml(Url As String) As String
Dim xmlhttp As Object
Set xmlhttp = CreateObject("MSXML2.serverXMLHTTP")
Dim converter As New ADODB.stream
' Get
request.Open "GET", Url, False
request.send
' raw bytes
converter.Open
converter.Type = adTypeBinary
converter.Write request.responseBody
' read
converter.Position = 0
converter.Type = adTypeText
converter.Charset = "utf-8"
' close
GetHtml = converter.ReadText
converter.Close
End Function
Public Function FindRegexpMatch(txt As String, pat As String) As Collection
Set FindRegexpMatch = New Collection
Dim rx As New RegExp
Dim matcol As MatchCollection
Dim mat As Match
Dim ret As String
Dim delimiter As String
txt = Replace(txt, Chr(10), "")
txt = Replace(txt, Chr(13), "")
rx.Global = True
rx.IgnoreCase = True
rx.MultiLine = True
rx.Pattern = pat
Set matcol = rx.Execute(txt)
'MsgBox "Match:" & matcol.Count
On Error GoTo ErrorHandler
For Each mat In matcol
'FindRegexpMatch.Add mat.SubMatches(0)
FindRegexpMatch.Add mat.Value
Next mat
Set rx = Nothing
' Insert code that might generate an error here
Exit Function
ErrorHandler:
' Insert code to handle the error here
MsgBox "FindRegexpMatch. " & Err.GetException()
Resume Next
End Function
Any kind of help would be greatly appreciated.
The following is an example of how to read in values from column A and write out pronounciations to column B. It uses css selectors to match a child node then steps up to parentNode in order to ensure entire pronounciation is grabbed. There are a number of ways you could have matched on the parent node to get the second pronounciation. Note that I use a parent node and Replace as the pronounciation may span multiple childNodes.
If doing this for lots of lookups please be a good netizen and put some waits in the code so as to not bombard the site with requests.
Option Explicit
Public Sub WriteOutPronounciations()
Dim html As MSHTML.HTMLDocument, i As Long, ws As Worksheet
Dim data As String, lastRow As Long, urls()
Set ws = ThisWorkbook.Worksheets("Sheet1")
lastRow = ws.Cells(ws.rows.Count, "A").End(xlUp).row 'you need at least two words in column A or change the redim.
urls = Application.Transpose(ws.Range("A1:A" & lastRow).Value)
ReDim results(1 To UBound(urls))
Set html = New MSHTML.HTMLDocument
With CreateObject("MSXML2.ServerXMLHTTP")
For i = LBound(urls) To UBound(urls)
.Open "GET", "https://www.oxfordlearnersdictionaries.com/definition/english/" & urls(i), False
.send
html.body.innerHTML = .responseText
data = Replace$(Replace$(html.querySelector(".name ~ .wrap").ParentNode.innerText, "/", vbNullString), Chr$(10), Chr$(32))
results(i) = Right$(data, Len(data) - 4)
Next
End With
With ThisWorkbook.Worksheets(1)
.Cells(1, 2).Resize(UBound(results, 1), 1) = Application.Transpose(results)
End With
End Sub
Required references (VBE>Tools>References):
Microsoft HTML Object Library
Should you go down the API route then here is a small example. You can make 1000 free calls in a month with Prototype account. The next best, depending on how many calls you wish to make looks like the 10,001 calls (that one extra PAYG call halves the price). # calls will be affected by whether word is head word or needs lemmas lookup call first. The endpoint construction you need is GET /entries/{source_lang}/{word_id}?fields=pronunciations though that doesn't seem to filter massively. You will need a json parser to handle the json returned e.g. github.com/VBA-tools/VBA-JSON/blob/master/JsonConverter.bas. Download raw code from there and add to standard module called JsonConverter. You then need to go VBE > Tools > References > Add reference to Microsoft Scripting Runtime. Remove the top Attribute line from the copied code.
Option Explicit
Public Sub WriteOutPronounciations()
Dim html As MSHTML.HTMLDocument, i As Long, ws As Worksheet
Dim data As String, lastRow As Long, words()
'If not performing lemmas lookup then must be head word e.g. behave, behalf
Const appId As String = "yourAppId"
Const appKey As String = "yourAppKey"
Set ws = ThisWorkbook.Worksheets("Sheet1")
lastRow = ws.Cells(ws.rows.Count, "A").End(xlUp).row
words = Application.Transpose(ws.Range("A1:A" & lastRow).Value)
ReDim results(1 To UBound(words))
Set html = New MSHTML.HTMLDocument
Dim json As Object
With CreateObject("MSXML2.ServerXMLHTTP")
For i = LBound(words) To UBound(words)
.Open "GET", "https://od-api.oxforddictionaries.com/api/v2/entries/en-us/" & LCase$(words(i)) & "?fields=pronunciations", False
.setRequestHeader "app_id", appId
.setRequestHeader "app_key", appKey
.setRequestHeader "ContentType", "application/json"
.send
Set json = JsonConverter.ParseJson(.responseText)
results(i) = IIf(json("results")(1)("type") = "headword", json("results")(1)("lexicalEntries")(1)("pronunciations")(2)("phoneticSpelling"), "lemmas lookup required")
Set json = Nothing
Next
End With
With ThisWorkbook.Worksheets(1)
.Cells(1, 2).Resize(UBound(results, 1), 1) = Application.Transpose(results)
End With
End Sub

get web page data through class name

I need to get the dates and temp from a weather website and record it on cells but I am getting a object variable or with block variable not set error.
I tried to data from web in excel but I think the website is protected or something because I keep getting "under maintenance" page when trying to load the page from excel. I got the codes below from a tutorial but I can't make it work.
Sub record()
Dim request As Object
Dim response As String
Dim html As New HTMLDocument
Dim websie As String
Dim temps As Variant
'provide link
'website = "https://finance.yahoo.com/quote/EURUSD=X?p=EURUSD=X"
website = "https://www.accuweather.com/en/us/chicago/60608/september-weather/348308"
'create the object that will make the webpage request
Set request = CreateObject("MSXML2.XMLHTTP")
'go to the link
request.Open "GET", website, False
'send request for webpage
request.send
'get web response data to variable
response = StrConv(request.responseBody, vbUnicode)
'put webpage to an html object
html.body.innerHTML = response
'get temperature from specified element
'temps = html.getElementsByClassName("Trsdu(0.3s) Fw(b) Fz(36px) Mb(-4px) D(ib)")(0).innerText
temps = html.getElementsByClassName("high")(0).innerText
Sheets("record").Range("A1") = temps
End Sub
Sample lines from the website:
<a class="monthly-daypanel is-past">
<div class="date">2</div>
<div class="icon-container"...</div>
<div class="temp">
<div class="high">83</div>
<div class="low">83</div>
</div>
</a>
I want to get the date, high and low.
You need an User-Agent header. I would also extract the json string from one of the script tags (I use regex for this) and use that as source. I add in a date comparison to work out if it is a forecast or actual value. I read the json string into json object using json library and loop the resultant collection storing items of interest in an array for faster writing out to sheet at end.
json library:
I use jsonconverter.bas. Download raw code from here and add to standard module called jsonConverter . You then need to go VBE > Tools > References > Add reference to Microsoft Scripting Runtime.
Option Explicit
Public Sub GetWeatherListings()
Dim s As String, re As Object, ws As Worksheet
Set re = CreateObject("vbscript.regexp")
Set ws = ThisWorkbook.Worksheets("Sheet1")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.accuweather.com/en/us/chicago/60608/september-weather/348308", False
' .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
s = .responsetext
End With
Dim results(), r As Long, jsonSource As String, json As Object, item As Object
jsonSource = GetString(re, s, "dailyForecast = (.*?\])")
If jsonSource = "No match" Then Exit Sub
Set json = JsonConverter.ParseJson(jsonSource)
ReDim results(1 To json.count, 1 To 4) 'date, datetime, day > dActual, night > dActual
Dim dateTime() As String, datePart As String, forecast As Boolean
For Each item In json
r = r + 1
dateTime = Split(item("dateTime"), "T")
datePart = dateTime(LBound(dateTime))
forecast = CDate(datePart) >= Date
results(r, 1) = datePart
results(r, 2) = item("dateTime")
results(r, 3) = IIf(forecast, item("day")("dTemp"), item("day")("dActual"))
results(r, 4) = IIf(forecast, item("night")("dTemp"), item("night")("dActual"))
Next
Dim headers()
headers = Array("Date", "DateTime", "Day temp", "Night temp")
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function GetString(ByVal re As Object, ByVal inputString As String, ByVal pattern As String) As String
Dim matches As Object
With re
.Global = True
.MultiLine = True
.IgnoreCase = True
.pattern = pattern
If .Test(inputString) Then
Set matches = .Execute(inputString)
GetString = matches(0).SubMatches(0)
Exit Function
End If
End With
GetString = "No match"
End Function
Sample of end of output:

Store JSON results into an array excel vba

I have tried using API for get some information for Yahoo Finance
And this is the UDF that I created
Sub Test()
'1 >> High & 2 >> Close
MsgBox YahooHigh("GOOG", "2019-07-18", 1)
MsgBox YahooHigh("GOOG", "2019-07-18", 2)
End Sub
Function YahooHigh(sTicker As String, sDate As String, idx As Integer)
Dim json As Object
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=" & sTicker & "&outputsize=full&apikey=myapikey"
.Send
Set json = JsonConverter.ParseJson(.responseText)
End With
If idx = 1 Then
YahooHigh = json("Time Series (Daily)")(sDate)("2. high")
ElseIf idx = 2 Then
YahooHigh = json("Time Series (Daily)")(sDate)("4. close")
Else
YahooHigh = Empty
End If
End Function
The UDF works fine but of course I will have to load the JSON result each time. As in my example, the UDF will run for twice the first for High value and the second for the Close value
Is there a way to store the json results into an array then instead of loading the json, the array is called. I thought of static but I am stuck at this
What I would like to do is to store all the dates in the json results for specific ticker (High value and Close value only) then to recall the desired value from the static array .. Any ideas?
Another variation:
I have tried using the HTML content and it works fine for me when using the link directly
Sub MyTest()
Dim html As Object, ele As Object
With CreateObject("MSXML2.ServerXMLHTTP")
'https://finance.yahoo.com/quote/GOOG/history?period1=1325566800&period2=1325566800&interval=1d&filter=history&frequency=1d
'.Open "GET", "https://finance.yahoo.com/quote/GOOG/history?period1=1325566800&period2=1325566800&interval=1d&filter=history&frequency=1d", False
Dim sTicker As String
sTicker = Sheets(1).Range("B1").Value 'GOOG
Dim period1 As Long, period2 As Long
period1 = ToUnix(Sheets(1).Range("B2").Value) '3 Jan 2012
period2 = ToUnix(Sheets(1).Range("B3").Value) '3 Jan 2012
.Open "GET", "https://finance.yahoo.com/quote/" & sTicker & "/history?period1=" & period1 & "&period2=" & period2 & "&interval=1d&filter=history&frequency=1d", False
.Send
If .Status <> 200 Then MsgBox "Problem" & vbNewLine & .Status & " - " & .StatusText: Exit Sub
Set html = CreateObject("htmlfile")
html.body.innerHTML = .responseText
'WriteTxtFile html.body.innerHTML
'Stop
Set ele = html.getElementsByTagName("table")(0).getElementsByTagName("tr")(1)
Dim tCell As Object
Dim cnt As Long
For Each tCell In ele.Children
cnt = cnt + 1
If cnt = 3 Then Debug.Print "High: " & tCell.innerText
If cnt = 5 Then Debug.Print "Close: " & tCell.innerText
Next tCell
End With
End Sub
Public Function ToUnix(dt) As Long
ToUnix = DateDiff("s", "1/1/1970", dt)
End Function
When using this line .Open "GET", "https://finance.yahoo.com/quote/GOOG/history?period1=1325566800&period2=1325566800&interval=1d&filter=history&frequency=1d", False it works fine and returns values from High and Close
But when trying to convert the dates from the worksheet to UNIX so as to use them in the link, it doesn't work
This is the problem for me now
Just have your function return the json object, then parse it in your sub.
The json object will contain all your data, and you can parse out what you want.
For example
In your function:
Function YahooHigh(sTicker As String) as object
Dim json As Object
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=" & sTicker & "&outputsize=full&apikey=myapikey"
.Send
Set YahooHigh = JsonConverter.ParseJson(.responseText)
End With
and in your Sub:
Sub Test()
Dim obj As Object
Set obj = YahooHigh("GOOG")
MsgBox obj("Time Series (Daily)")("2019-07-18")("2. high")
MsgBox obj("Time Series (Daily)")("2019-07-18")("4. close")
End Sub

Exploring the Instr VBA Function In Webscraping

I want to scrape this URL https://www.realtor.com/realestateandhomes-search/06510 using the VBA InStr function and extract all URLs with this substring "06510"
Here's is a sample code I've been trying to make work.
Option Explicit
Sub GetLinks()
'
'To use HTMLDocument you need to set a reference to Tools -> References -> Microsoft HTML Object Library
Dim HTML As New HTMLDocument
Dim http As Object
Dim links As Object
Dim link As HTMLHtmlElement
Dim counter As Long
Dim website As Range
Dim LastRange As Range
Dim row As Long
Dim continue As Boolean
Dim respHead As String
Dim lRow As Long
Application.ScreenUpdating = False
' The row where website addresses start
row = 30
continue = True
lRow = Cells(Rows.count, 1).End(xlUp).row + 1
' XMLHTTP gives errors where ServerXMLHTTP does not
' even when using the same URL's
'Set http = CreateObject("MSXML2.XMLHTTP")
Set http = CreateObject("MSXML2.ServerXMLHTTP.6.0")
Do While continue
' Could set this to first cell with URL then OFFSET columns to get next web site
Set website = Range("A" & row)
Set LastRange = Range("B" & lRow)
If Len(website.Value) < 1 Then
continue = False
Exit Sub
End If
If website Is Nothing Then
continue = False
End If
'Debug.Print website
With http
On Error Resume Next
.Open "GET", website.Value, False
.send
' If Err.Num is not 0 then an error occurred accessing the website
' This checks for badly formatted URL's. The website can still return an error
' which should be checked in .Status
'Debug.Print Err.Number
' Clear the row of any previous results
Range("B" & row & ":e" & row).Clear
' If the website sent a valid response to our request
If Err.Number = 0 Then
If .Status = 200 Then
HTML.body.innerHTML = http.responseText
Set links = HTML.getElementsByTagName("a")
For Each link In links
If InStr(link.outerHTML, "06510") Then
LastRange.Value = link.href
End If
Next
End If
Set website = Nothing
Else
'Debug.Print "Error loading page"
LastRange.Value = "Error with website address"
End If
On Error GoTo 0
End With
row = row + 1
Loop
Application.ScreenUpdating = True
End Sub
After inspecting the page, here's a sample of the kind of URL to extract - https://www.realtor.com/realestateandhomes-detail/239-Bradley-St_New-Haven_CT_06510_M36855-92189. Any help will be appreciated
Using QHarr's code in a simplified way...
Sub GetLinks()
Dim url As String, links_count As Integer
Dim j As Integer, row As Integer
Dim XMLHTTP As Object, html As Object
'Dim tr_coll As Object, tr As Object
'Dim elements As Object
Dim i As Long, allLinksOfInterest As Object
'Dim td_coll As Object, td As Object, td_col, objT
url = "https://www.realtor.com/realestateandhomes-search/06510"
Set XMLHTTP = CreateObject("MSXML2.XMLHTTP")
XMLHTTP.Open "GET", url, False
XMLHTTP.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = XMLHTTP.responseText
Set allLinksOfInterest = html.querySelectorAll("[href*='06510']")
For i = 0 To allLinksOfInterest.Length - 1
Debug.Print allLinksOfInterest.Item(i).href
Next
End Sub
Please check if I'm missing anything. I'm still getting the error "Object doesn't support this property or method"
Don't use Instr on entire node outerHTML during a loop of all a tags. There are times when this is required but this shouldn't be one of them (hopefully).
You want to use attribute = value css selector with contains, *, operator. It is specifically for the purpose of matching on substrings in attribute values. This is more efficient.
Set allLinksOfInterest = html.querySelectorAll("[href*='06510']")
So,
Dim i As Long, allLinksOfInterest As Object
Set allLinksOfInterest = html.querySelectorAll("[href*='06510']")
For i = 0 To allLinksOfInterest.Length - 1
Debug.Print allLinksOfInterest.Item(i).href
Next
Attribute = value with contains operator:
[attr*=value]
Represents elements with an attribute name of attr whose
value contains at least one occurrence of value within the string.
VBA:
Produces 26 links currently.All are relative links so need domain added as shown in loop. Some are duplicates so consider adding to a dictionary as keys so as remove duplicates.
Option Explicit
Public Sub GetLinks()
Dim html As HTMLDocument
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.realtor.com/realestateandhomes-search/06510", False
.send
html.body.innerHTML = .responseText
End With
Dim i As Long, allLinksOfInterest As Object
Set allLinksOfInterest = html.querySelectorAll("[href*='06510']")
For i = 0 To allLinksOfInterest.Length - 1
Debug.Print Replace$(allLinksOfInterest.item(i).href,"about:","https://www.realtor.com")
Next
End Sub
If InStr(link.outerHTML, "06510") Then
In the code above, InStr function was used like boolean function. But it is not boolean, instead it returns integer. So, you should add comparison operator after function. May be like:
If InStr(link.outerHTML, "06510")>0 Then

VBA - Number of Google News Search Results

I have a cell that contains something I would like searched in google news. I want the code to return the number of results for that search. Currently I have this code which I found elsewhere on the site and does not use google news but even then I sometimes get a
runtime error -2147024891 (80070005)
after 70 or so searched and I can't run again.
Sub HawkishSearch()
Dim url As String, lastRow As Long
Dim XMLHTTP As Object, html As Object
Dim start_time As Date
Dim end_time As Date
lastRow = Range("B" & Rows.Count).End(xlUp).Row
Dim cookie As String
Dim result_cookie As String
start_time = Time
Debug.Print "start_time:" & start_time
For i = 2 To lastRow
url = "https://www.google.co.in/search?q=" & Cells(i, 2) & "&rnd=" & WorksheetFunction.RandBetween(1, 10000)
Set XMLHTTP = CreateObject("MSXML2.XMLHTTP")
XMLHTTP.Open "GET", url, False
XMLHTTP.setRequestHeader "Content-Type", "text/xml"
XMLHTTP.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"
XMLHTTP.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = XMLHTTP.ResponseText
If html.getElementById("resultStats") Is Nothing Then
str_text = "0 Results"
Else
str_text = html.getElementById("resultStats").innerText
End If
Cells(i, 3) = str_text
DoEvents
Next
end_time = Time
Debug.Print "end_time:" & end_time
Debug.Print "done" & "Time taken : " & DateDiff("n", start_time, end_time)
MsgBox "done" & "Time taken : " & DateDiff("n", start_time, end_time)
End Sub
Best option (IMO) is to use the Google News API and register for an API key. You can then use a queryString including your search term and parse the JSON response to get the result count. I do that below and also populate a collection with the article titles and links. I use a JSON parser called JSONConverter.bas which you download and add to your project. You can then go to VBE > Tools > References > add a reference to Microsoft Scripting Runtime.
Sample JSON response from API:
The {} denotes a dictionary which you access by key, the [] denotes a collection which you access by index or by For Each loop over.
I use the key totalResults to retrieve the total results count from the initial dictionary returned by the API.
I then loop the collection of dictionaries (articles) and pull the story titles and URLs.
You can then inspect the results in the locals window or print out
Sample of results in locals window:
Option Explicit
Public Sub GetStories()
Dim articles As Collection, article As Object
Dim searchTerm As String, finalResults As Collection, json As Object, arr(0 To 1)
Set finalResults = New Collection
searchTerm = "Obama"
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://newsapi.org/v2/everything?q=" & searchTerm & "&apiKey=yourAPIkey", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
Set json = JsonConverter.ParseJson(.responseText)
End With
Debug.Print "total results = " & json("totalResults")
Set articles = json("articles")
For Each article In articles
arr(0) = article("title")
arr(1) = article("url")
finalResults.Add arr
Next
Stop '<== Delete me later
End Sub
Loop:
If deploying in a loop you can use a class clsHTTP to hold the XMLHTTP object. This is more efficient than creating and destroying. I supply this class with a method GetString to retrieve the JSON response from the API, and a GetInfo method to parse the JSON and retrieve the results count and the API results URLs and Titles.
Example of results structure in locals window:
Class clsHTTP:
Option Explicit
Private http As Object
Private Sub Class_Initialize()
Set http = CreateObject("MSXML2.XMLHTTP")
End Sub
Public Function GetString(ByVal url As String) As String
With http
.Open "GET", url, False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
GetString = .responseText
End With
End Function
Public Function GetInfo(ByVal json As Object) As Variant
Dim results(), counter As Long, finalResults(0 To 1), articles As Object, article As Object
finalResults(0) = json("totalResults")
Set articles = json("articles")
ReDim results(1 To articles.Count, 1 To 2)
For Each article In articles
counter = counter + 1
results(counter, 1) = article("title")
results(counter, 2) = article("url")
Next
finalResults(1) = results
GetInfo = finalResults
End Function
Standard module:
Option Explicit
Public Sub GetStories()
Dim http As clsHTTP, json As Object
Dim finalResults(), searchTerms(), searchTerm As Long, url As String
Set http = New clsHTTP
With ThisWorkbook.Worksheets("Sheet1")
searchTerms = Application.Transpose(.Range("A1:A2")) '<== Change to appropriate range containing search terms
End With
ReDim finalResults(1 To UBound(searchTerms))
For searchTerm = LBound(searchTerms, 1) To UBound(searchTerms, 1)
url = "https://newsapi.org/v2/everything?q=" & searchTerms(searchTerm) & "&apiKey=yourAPIkey"
Set json = JsonConverter.ParseJson(http.GetString(url))
finalResults(searchTerm) = http.GetInfo(json)
Set json = Nothing
Next
Stop '<==Delete me later
End Sub
'
Otherwise:
I would use the following where I grab story links by their class name. I get the count and write the links to a collection
Option Explicit
Public Sub GetStories()
Dim sResponse As String, html As HTMLDocument, articles As Collection
Const BASE_URL As String = "https://news.google.com/"
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://news.google.com/topics/CAAqIggKIhxDQkFTRHdvSkwyMHZNRGxqTjNjd0VnSmxiaWdBUAE?hl=en-US&gl=US&ceid=US:en", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
Set html = New HTMLDocument: Set articles = New Collection
Dim numberOfStories As Long, nodeList As Object, i As Long
With html
.body.innerHTML = sResponse
Set nodeList = .querySelectorAll(".VDXfz")
numberOfStories = nodeList.Length
Debug.Print "number of stories = " & numberOfStories
For i = 0 To nodeList.Length - 1
articles.Add Replace$(Replace$(nodeList.item(i).href, "./", BASE_URL), "about:", vbNullString)
Next
End With
Debug.Print articles.Count
End Sub
Standard Google search:
The following works an example standard google search but you will not always get the same HTML structure depending on your search term. You will need to provide some failing cases to help me determine if there is a consistent selector method that can be applied.
Option Explicit
Public Sub GetResultsCount()
Dim sResponse As String, html As HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.google.com/search?q=mitsubishi", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
Set html = New HTMLDocument
With html
.body.innerHTML = sResponse
Debug.Print .querySelector("#resultStats").innerText
End With
End Sub