Error when try to import URL Json to Excel - json

I'm trying to import the information in JSON format from the following Url by WinHttpRequest:
https://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_allodds.aspx&matchid=default
Sub test()
Dim xmlhttp As Object
Dim strUrl As String: strUrl = "https://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_allodds.aspx&matchid=default"
Dim objRequest As Object
Set objRequest = CreateObject("WinHttp.WinHttpRequest.5.1")
With objRequest
.Open "GET", strUrl, False
.send
End With
Debug.Print objRequest.responseText
End Sub
However, it just shows nothing similar to the Url but a lot of garbled messages.
I would like to know how to address this problem.
The code works fine if I use other Url.

XHR:
I believe the page has bot prevention measures in place whereby, if it suspects you are a bot a challenge is raised which requires javascript to run. If that runs successfully an XHR request is issued with info from the challenge in the headers and that, were you to be using a browser, would lead to your content being correctly updated to show expected values.
The first time I ran GET request I got the expected json response and after that I got the following:
<HTML>
<head>
<script>
Challenge=649275;
ChallengeId=473313563;
GenericErrorMessageCookies="Cookies must be enabled in order to view this page.";
</script>
<script>
function test(var1)
{
var var_str=""+Challenge;
var var_arr=var_str.split("");
var LastDig=var_arr.reverse()[0];
var minDig=var_arr.sort()[0];
var subvar1 = (2 * (var_arr[2]))+(var_arr[1]*1);
var subvar2 = (2 * var_arr[2])+var_arr[1];
var my_pow=Math.pow(((var_arr[0]*1)+2),var_arr[1]);
var x=(var1*3+subvar1)*1;
var y=Math.cos(Math.PI*subvar2);
var answer=x*y;
answer-=my_pow*1;
answer+=(minDig*1)-(LastDig*1);
answer=answer+subvar2;
return answer;
}
</script>
<script>
client = null;
if (window.XMLHttpRequest)
{
var client=new XMLHttpRequest();
}
else
{
if (window.ActiveXObject)
{
client = new ActiveXObject('MSXML2.XMLHTTP.3.0');
};
}
if (!((!!client)&&(!!Math.pow)&&(!!Math.cos)&&(!![].sort)&&(!![].reverse)))
{
document.write("Not all needed JavaScript methods are supported.<BR>");
}
else
{
client.onreadystatechange = function()
{
if(client.readyState == 4)
{
var MyCookie=client.getResponseHeader("X-AA-Cookie-Value");
if ((MyCookie == null) || (MyCookie==""))
{
document.write(client.responseText);
return;
}
var cookieName = MyCookie.split('=')[0];
if (document.cookie.indexOf(cookieName)==-1)
{
document.write(GenericErrorMessageCookies);
return;
}
window.location.reload(true);
}
};
y=test(Challenge);
client.open("POST",window.location,true);
client.setRequestHeader('X-AA-Challenge-ID', ChallengeId);
client.setRequestHeader('X-AA-Challenge-Result',y);
client.setRequestHeader('X-AA-Challenge',Challenge);
client.setRequestHeader('Content-Type' , 'text/plain');
client.send();
}
</script>
</head>
<body>
Whether you mimic what the javascript is doing and pass as a new XHR I am unsure (haven' looked closely).
You could also try browser automation e.g. IE via Microsoft Internet Controls or Chrome/FF etc via Selenium Basic, to see if letting javascript run on the page gets around this problem.
Handling challenge: (WIP)
I started looking at an attempt to handle this. Currently, I keep getting the json response so haven't fully tested the bottom part. I would expect some minute *do we care? margin for error if only because Math.PI gives 3.141592653589793, whereas Application.PI gives 3.14159265358979
Option Explicit
Public Sub GetInfo()
Dim json As Object, s As String, re As Object, ws As Worksheet
Dim pattern1 As String, pattern2 As String, challenge As Long, challengeId As Long
Const URL As String = "https://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_allodds.aspx&matchid=default"
pattern1 = "Challenge=(\d+);"
pattern2 = "ChallengeId=(\d+);"
Set re = CreateObject("vbscript.regexp")
Set ws = ThisWorkbook.Worksheets("Sheet1")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
s = .responseText
On Error Resume Next
Set json = JsonConverter.ParseJson(s)
On Error GoTo 0
If Not json Is Nothing Then
Debug.Print "No challenge issued"
Debug.Print .responseText
Else
On Error GoTo errhand
challenge = GetId(re, s, pattern1)
If challenge = 999 Then Exit Sub 'should really use more unlikely value.
challengeId = GetId(re, s, pattern2)
.Open "POST", URL, False
.setRequestHeader "X-AA-Challenge-ID", challengeId
.setRequestHeader "X-AA-Challenge-Result", CLng(GetAnswer(challenge))
.setRequestHeader "X-AA-Challenge", challenge
.setRequestHeader "Content-Type", "text/plain"
.send ""
Debug.Print .Status, .responseText
If .Status = 200 Then
.Open "GET", URL, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
s = .responseText
Debug.Print s
End If
End If
End With
Exit Sub
errhand:
Debug.Print Err.Number, Err.Description
End Sub
Public Function GetId(ByVal re As Object, ByVal s As String, ByVal pattern As String) As Long
With re
.Global = True
.MultiLine = True
.IgnoreCase = False
.pattern = pattern
If .TEST(s) Then
GetId = .Execute(s)(0).SubMatches(0)
Else
GetId = 999 '<probably should use a more unlikely number here!
End If
End With
End Function
Public Function GetAnswer(ByVal challenge As Long) As String 'var1 'challenge
Dim var_str As String, var_arr() As Long, LastDig As Long, minDig As Long
Dim i As Long
var_str = Chr$(34) & challenge & Chr$(34)
ReDim var_arr(0 To Len(var_str) - 3)
For i = 2 To Len(var_str) - 1
var_arr(i - 2) = CLng(Mid$(var_str, i, 1))
Next i
LastDig = var_arr(UBound(var_arr))
minDig = Application.Min(var_arr)
Dim my_pow As Long, x As Long, y As Long, answer As Variant
Dim subvar1 As Long, subvar2 As String
subvar1 = 2 * Application.Small(var_arr, 3) + Application.Small(var_arr, 2)
subvar2 = CStr(2 * Application.Small(var_arr, 3)) & CStr(Application.Small(var_arr, 2))
my_pow = (minDig + 2) ^ Application.Small(var_arr, 2)
x = challenge * 3 + (subvar1 * 1)
y = Evaluate("=COS(PI()* " & CLng(subvar2) & ")")
answer = x * y
answer = answer - my_pow
answer = answer + minDig - LastDig
answer = CStr(answer) & subvar2
GetAnswer = answer
End Function
Browser based solution:
Standard IE automation with Microsoft Internet Controls lead to SaveAs/Open Dialog prompt.
Using selenium you can avoid this prompt and grab the data from the pre element. Using selenium allows you to benefit from an implicit wait which allows the page to complete any challenge issued. You can increase the wait using explicit wait conditions.
Option Explicit
'download selenium https://github.com/florentbr/SeleniumBasic/releases/tag/v2.0.9.0
'Ensure latest applicable driver e.g. ChromeDriver.exe in Selenium folder
'VBE > Tools > References > Add reference to selenium type library
Public Sub DownloadFile()
Dim d As WebDriver, jsonText As String
Set d = New ChromeDriver
Const URL = "https://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_allodds.aspx&matchid=default"
With d
.Start "Chrome"
.get URL
jsonText = .FindElementByCss("pre").Text
Debug.Print jsonText
Stop
.Quit
End With
End Sub
References:
Note I am using a json parser. After adding the .bas from that link you need to go VBE > Tools > References > Add a reference to Microsoft Scripting Runtime.
1 Some perspective from the RubberDuckVBA crew 1 and 2

Related

VBA Runtime Error which works with breakpoint

I am trying to extract data from a website and parse it using VBA. The code is working absolutely fine when stepping through the code however, when running it gives an 'Object Required' run-time error. Here is the code:
Function AuthToken()
Dim XMLHTTP
Dim result As String
Dim csrf As String
Dim token As Object
Dim objHTML As Object
Set objHTML = CreateObject("htmlfile")
Set XMLHTTP = CreateObject("MSXML2.XMLHTTP.6.0")
With XMLHTTP
.Open "GET", "some website", False
.setRequestHeader "Cookie", cf
.SEND
result = .responseText
End With
With objHTML
.Open
.Write result
.Close
Set token = .getElementsByName("csrf-token")
End With
AuthToken = token(0).Content
AuthToken = Replace(Replace(Replace(AuthToken, "+", "%2B"), "/", "%2F"), "=", "%3D")
Set XMLHTTP = Nothing
End Function
The error is triggered by this line in the code:
AuthToken = token(0).Content
If I just click debug and resume, it works fine so I am really struggling to debug this. Can anyone help?
Please, instead of
result = .responseText
use
If .status = 200 Then
result = .responseText
End If
Your code needs to be sure that URL has been loaded completely...4
If you place the next debugging line:
Debug.Print token Is Nothing: Stop
immediately after
Set token = .getElementsByName("csrf-token")
Edited:
If not a matter of .status not "OK", you can try to wait a little before reaching the problematic code line. Please call the next Sub, just before that line:
copy the next API on top of the module keeping the code (in the declarations area):
Private Declare PtrSafe Function GetTickCount Lib “kernel32.dll” () As Long
Add the next waiting Sub:
Sub WaitALittle(Finish As Long)
Dim NowTick As Long,EndTick As Long
EndTick = GetTickCount + (Finish * 1000)
Do
NowTick = GetTickCount
DoEvents
Loop Until NowTick >= EndTick
End Sub
It can be simple called as WaitALittle 2. You may play a little with that 2 (time to wait)...

I'm getting stuck at vba runtime error 424

I'm getting
run-time error 424
in 68th row (line)
request.Open "GET", Url, False
and I don't know how to fix it.
My previous question I posted ;
How to scrape specific part of online english dictionary?
My final goal is to get result like this;
A B
beginning bɪˈɡɪnɪŋ
behalf bɪˈhæf
behave bɪˈheɪv
behaviour bɪˈheɪvjər
belong bɪˈlɔːŋ
below bɪˈloʊ
bird bɜːrd
biscuit ˈbɪskɪt
Here's code I wrote, and it's mostly based on someone else's code I found on internet.
' Microsoft ActiveX Data Objects x.x Library
' Microsoft XML, v3.0
' Microsoft VBScript Regular Expressions
Sub ParseHelp()
' Word reference from
Dim Url As String
Url = "https://www.oxfordlearnersdictionaries.com/definition/english/" & Cells(ActiveCell.Row, "B").Value
' Get dictionary's html
Dim Html As String
Html = GetHtml(Url)
' Check error
If InStr(Html, "<TITLE>Not Found</Title>") > 0 Then
MsgBox "404"
Exit Sub
End If
' Extract phonetic alphabet from HTML
Dim wrapPattern As String
wrapPattern = "<span class='name' (.*?)</span>"
Set wrapCollection = FindRegexpMatch(Html, wrapPattern)
' MsgBox StripHtml(CStr(wrapCollection(1)))
' Fill phonetic alphabet into cell
If Not wrapCollection Is Nothing Then
Dim wrap As String
On Error Resume Next
wrap = StripHtml(CStr(wrapCollection(1)))
If Err.Number <> 0 Then
wrap = ""
End If
Cells(ActiveCell.Row, "C").Value = wrap
Else
MsgBox "not found"
End If
End Sub
Public Function StripHtml(Html As String) As String
Dim RegEx As New RegExp
Dim sOut As String
Html = Replace(Html, "</li>", vbNewLine)
Html = Replace(Html, " ", " ")
With RegEx
.Global = True
.IgnoreCase = True
.MultiLine = True
.Pattern = "<[^>]+>"
End With
sOut = RegEx.Replace(Html, "")
StripHtml = sOut
Set RegEx = Nothing
End Function
Public Function GetHtml(Url As String) As String
Dim xmlhttp As Object
Set xmlhttp = CreateObject("MSXML2.serverXMLHTTP")
Dim converter As New ADODB.stream
' Get
request.Open "GET", Url, False
request.send
' raw bytes
converter.Open
converter.Type = adTypeBinary
converter.Write request.responseBody
' read
converter.Position = 0
converter.Type = adTypeText
converter.Charset = "utf-8"
' close
GetHtml = converter.ReadText
converter.Close
End Function
Public Function FindRegexpMatch(txt As String, pat As String) As Collection
Set FindRegexpMatch = New Collection
Dim rx As New RegExp
Dim matcol As MatchCollection
Dim mat As Match
Dim ret As String
Dim delimiter As String
txt = Replace(txt, Chr(10), "")
txt = Replace(txt, Chr(13), "")
rx.Global = True
rx.IgnoreCase = True
rx.MultiLine = True
rx.Pattern = pat
Set matcol = rx.Execute(txt)
'MsgBox "Match:" & matcol.Count
On Error GoTo ErrorHandler
For Each mat In matcol
'FindRegexpMatch.Add mat.SubMatches(0)
FindRegexpMatch.Add mat.Value
Next mat
Set rx = Nothing
' Insert code that might generate an error here
Exit Function
ErrorHandler:
' Insert code to handle the error here
MsgBox "FindRegexpMatch. " & Err.GetException()
Resume Next
End Function
Any kind of help would be greatly appreciated.
The following is an example of how to read in values from column A and write out pronounciations to column B. It uses css selectors to match a child node then steps up to parentNode in order to ensure entire pronounciation is grabbed. There are a number of ways you could have matched on the parent node to get the second pronounciation. Note that I use a parent node and Replace as the pronounciation may span multiple childNodes.
If doing this for lots of lookups please be a good netizen and put some waits in the code so as to not bombard the site with requests.
Option Explicit
Public Sub WriteOutPronounciations()
Dim html As MSHTML.HTMLDocument, i As Long, ws As Worksheet
Dim data As String, lastRow As Long, urls()
Set ws = ThisWorkbook.Worksheets("Sheet1")
lastRow = ws.Cells(ws.rows.Count, "A").End(xlUp).row 'you need at least two words in column A or change the redim.
urls = Application.Transpose(ws.Range("A1:A" & lastRow).Value)
ReDim results(1 To UBound(urls))
Set html = New MSHTML.HTMLDocument
With CreateObject("MSXML2.ServerXMLHTTP")
For i = LBound(urls) To UBound(urls)
.Open "GET", "https://www.oxfordlearnersdictionaries.com/definition/english/" & urls(i), False
.send
html.body.innerHTML = .responseText
data = Replace$(Replace$(html.querySelector(".name ~ .wrap").ParentNode.innerText, "/", vbNullString), Chr$(10), Chr$(32))
results(i) = Right$(data, Len(data) - 4)
Next
End With
With ThisWorkbook.Worksheets(1)
.Cells(1, 2).Resize(UBound(results, 1), 1) = Application.Transpose(results)
End With
End Sub
Required references (VBE>Tools>References):
Microsoft HTML Object Library
Should you go down the API route then here is a small example. You can make 1000 free calls in a month with Prototype account. The next best, depending on how many calls you wish to make looks like the 10,001 calls (that one extra PAYG call halves the price). # calls will be affected by whether word is head word or needs lemmas lookup call first. The endpoint construction you need is GET /entries/{source_lang}/{word_id}?fields=pronunciations though that doesn't seem to filter massively. You will need a json parser to handle the json returned e.g. github.com/VBA-tools/VBA-JSON/blob/master/JsonConverter.bas. Download raw code from there and add to standard module called JsonConverter. You then need to go VBE > Tools > References > Add reference to Microsoft Scripting Runtime. Remove the top Attribute line from the copied code.
Option Explicit
Public Sub WriteOutPronounciations()
Dim html As MSHTML.HTMLDocument, i As Long, ws As Worksheet
Dim data As String, lastRow As Long, words()
'If not performing lemmas lookup then must be head word e.g. behave, behalf
Const appId As String = "yourAppId"
Const appKey As String = "yourAppKey"
Set ws = ThisWorkbook.Worksheets("Sheet1")
lastRow = ws.Cells(ws.rows.Count, "A").End(xlUp).row
words = Application.Transpose(ws.Range("A1:A" & lastRow).Value)
ReDim results(1 To UBound(words))
Set html = New MSHTML.HTMLDocument
Dim json As Object
With CreateObject("MSXML2.ServerXMLHTTP")
For i = LBound(words) To UBound(words)
.Open "GET", "https://od-api.oxforddictionaries.com/api/v2/entries/en-us/" & LCase$(words(i)) & "?fields=pronunciations", False
.setRequestHeader "app_id", appId
.setRequestHeader "app_key", appKey
.setRequestHeader "ContentType", "application/json"
.send
Set json = JsonConverter.ParseJson(.responseText)
results(i) = IIf(json("results")(1)("type") = "headword", json("results")(1)("lexicalEntries")(1)("pronunciations")(2)("phoneticSpelling"), "lemmas lookup required")
Set json = Nothing
Next
End With
With ThisWorkbook.Worksheets(1)
.Cells(1, 2).Resize(UBound(results, 1), 1) = Application.Transpose(results)
End With
End Sub

Import Data using JSON

I have developed a code to scrape data from a websites but since I know very little about JSON I could be able to get the output as I want, the code was developed for this web: https://ngodarpan.gov.in/index.php/home/statewise_ngo/76/35/1 now I am replicating my code for other websites having json like this web :https://www.yelp.com/search?cflt=hvac&find_loc=San+Francisco%2C+CA ; but this code is not functioning properly. Here is my code(I want it to be generic for most webs)
Option Explicit
Public Sub FetchTabularInfo()
Dim Http As XMLHTTP60, Html As HTMLDocument, col As Variant, csrf As Variant, i&, page As Long
Dim headers(), ws As Worksheet, iCol As Collection
Set ws = ThisWorkbook.Worksheets("Sheet1")
headers = Array("SrNo", "Name", "Address", "Mobile", "Email")
Set Http = New XMLHTTP60
Set Html = New HTMLDocument
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For page = 1 To 8 'To cover all pages
With Http
.Open "GET", "https://www.yelp.com/search?cflt=hvac&find_loc=San+Francisco%2C+CA" & CStr(page), Falsev 'Last letter of URL is page number whose range will be given in outerloop
.send
Html.body.innerHTML = .responseText
End With
Set iCol = New Collection
With Html.querySelectorAll(".table tr a[onclick^='show_ngo_info']")
For i = 0 To .Length - 1
iCol.Add Split(Split(.Item(i).getAttribute("onclick"), "(""")(1), """)")(0)
Next i
End With
Dim r As Long, results()
ReDim results(1 To iCol.Count, 1 To UBound(headers) + 1)
r = 0
For Each col In iCol
r = r + 1
With Http
.Open "GET", "https://www.yelp.com/index.php/ajaxcontroller/get_csrf", False
.send
csrf = .responseText
End With
csrf = Split(Replace(Split(csrf, ":")(1), """", ""), "}")(0)
Dim json As Object
With Http
.Open "POST", "https://www.yelp.com/index.php/ajaxcontroller/show_ngo_info", False
.setRequestHeader "X-Requested-With", "XMLHttpRequest"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
.send "id=" & col & "&csrf_test_name=" & csrf
Set json = JsonConverter.ParseJson(.responseText)
Dim orgName As String, address As String, srNo As Long, city As String
Dim state As String, tel As String, mobile As String, website As String, email As String
On Error Resume Next
orgName = json("registeration_info")(1)("nr_orgName")
address = json("registeration_info")(1)("nr_add")
srNo = r '<unsure where this is coming from.
mobile = json("infor")("0")("Mobile")
email = json("infor")("0")("Email")
On Error GoTo 0
Dim arr()
arr = Array(srNo, orgName, address, tel, email)
For i = LBound(headers) To UBound(headers)
results(r, i + 1) = arr(i)
Next
End With
Next col
Set iCol = Nothing: Set json = Nothing
ws.Cells(GetLastRow(ws) + 1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
Next
End Sub
Public Function GetLastRow(ByVal sh As Worksheet) As Long
On Error Resume Next
GetLastRow = sh.Cells.Find(What:="*", _
After:=sh.Range("A1"), _
Lookat:=xlPart, _
LookIn:=xlFormulas, _
SearchOrder:=xlByRows, _
SearchDirection:=xlPrevious, _
MatchCase:=False).Row
On Error GoTo 0
End Function
Please also let me know mistakes I am doing, so that i will take care of those in future.
Short answer:
No.
I would go so far as to say it is impossible you can write something generic for most webs . One could say the generic part is the parser itself. But you need to have some familiarity with the json structure of each endpoint to appropriately direct parsing. Json itself has defined structural syntax/components, but what you want from those will have different access paths and require different handling to do so. Then there are the arguments that may need to be supplied and differences in output format.
What is the best scenario?
If you have a set list of urls (ideally API endpoints) you have a better chance of writing something that might last for a while as you can familiarise yourself with the json returned. But how generic is this? It's really just branched code.
Re-usable stuff:
Might be the non parser stuff which can be generalised e.g. methods and classes you create that parse out the paths for the entire structure and look for key words and return you those paths? Helper functions you write that might recursively write out nested structures etc. Code that makes the request and handles fails etc.... I would definitely recommend looking into classes for re-usable code in web-scraping.
Class based examples:
I will add to this as and when
https://stackoverflow.com/a/52301153/6241235
https://codereview.stackexchange.com/questions/69009/vba-clickbot-featuring-ajax-waiting-and-element-searching

Extract information from a MURAL board, pull HTML code to find the attributes/location?

I have to pull information from a MURAL board (design thinking tool, which is pretty much an online whiteboard). I need to pull the following information for the stickies:
https://app.mural.co/t/hanno1/m/hanno1/1488557783266/465baa38d35e95edc969a5ca9e2a8bb8b6f10310
Sticky Note Text
Sticky Note Attributes (Color, Size, Shape)
Sticky Note Location
Image links (and locations if possible)
I have created code that is not working. Nothing is being pulled. It pretty much skips straight from opening to quitting the browser.
Also how do I pull the actual HTML code to find the attributes/location?
Option Explicit
Public Sub GetInfo()
Dim ie As InternetExplorer, arr(), col
Set ie = New InternetExplorer
Set col = New Collection
With ie
.Visible = True
.navigate "https://app.mural.co/t/nextgencomms9753/m/nextgencomms9753/1536712668215/cd70107230d7f406058157a3bb8e951cedc9afc0"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim listedItems As Object, item As Object, prices As Object, price As Object, j As Long
Set listedItems = .document.getElementsByClassName("widget-layer-inner")
For Each item In listedItems
Set prices = item.getElementsByClassName("Linkify")
ReDim arr(0 To prices.Length - 1) 'you could limit this after by redim to 0 to 0
j = 0
For Each price In prices
arr(j) = price.innerText
j = j + 1
Next
col.Add Array(item.getElementsByClassName("widgets-container") (0).innerText, arr)
Next
.Quit
Dim item2 As Variant, rowNum As Long
For Each item2 In col
rowNum = rowNum + 1
With ThisWorkbook.Worksheets("Sheet1")
.Cells(rowNum, 1) = Replace$(Trim$(item2(0)), Chr$(10), Chr$(32))
.Cells(rowNum, 2).Resize(1, UBound(item2(1)) + 1) = item2(1)
End With
Next
End With
End Sub
In general, I think using IE automation should be avoided where possible, especially if you can figure out a method to emulate this request via a web request.
A little background on this method
I'm submitting two web requests. One to get an authorization token, and another to get the the JSON from the page which populate the widgets on screen. I figured this out by studying the web requests sent back and forth between the client (me) and the server, and emulated those requests.The approach outlined below is pretty fast, about 2 seconds without URL decoding, and 10 seconds with decoding.
Things you'll need for this to work
Explicit Reference set to Microsoft XML v6.0
Explicit Reference set to Microsoft Scripting Runtime
The VBA-JSON project included into your project, get that here
Code
I split out token and json retrieval into two functions. What you get back from getJSON is a dictionary. This dictionary is somewhat nested, so you refer to items by key to traverse the dictionary down. E.g. MyDict(property1)(childPropertyOfproperty1)(childPropertyOf...) etc.
Here's the code.
Option Explicit
Public Sub SubmitRequest()
Const URL As String = "https://app.mural.co/t/hanno1/m/hanno1/1488557783266/465baa38d35e95edc969a5ca9e2a8bb8b6f10310"
Dim returnobject As Object
Dim widgets As Object
Dim widget As Variant
Dim WidgetArray As Variant
Dim id As String
Dim i As Long
Set returnobject = getJSON(URL, getToken(URL))
Set widgets = returnobject("widgets")
ReDim WidgetArray(0 To 7, 0 To 10000)
For Each widget In widgets
'Only add if a text item, change if you like
If returnobject("widgets")(widget)("type") = "murally.widget.TextWidget" Then
WidgetArray(0, i) = URLDecode(returnobject("widgets")(widget)("properties")("text"))
WidgetArray(1, i) = returnobject("widgets")(widget)("properties")("fontSize")
WidgetArray(2, i) = returnobject("widgets")(widget)("properties")("backgroundColor")
WidgetArray(3, i) = returnobject("widgets")(widget)("x")
WidgetArray(4, i) = returnobject("widgets")(widget)("y")
WidgetArray(5, i) = returnobject("widgets")(widget)("width")
WidgetArray(6, i) = returnobject("widgets")(widget)("height")
WidgetArray(7, i) = returnobject("widgets")(widget)("id")
i = i + 1
End If
Next
ReDim Preserve WidgetArray(0 To 7, i - 1)
With ThisWorkbook.Worksheets("Sheet1")
.Range("A1:H1") = Array("Text", "FontSize", "BackgroundColor", "X Position", "Y Position", "Width", "Height", "ID")
.Range(.Cells(2, 1), .Cells(i+ 1, 8)).Value = WorksheetFunction.Transpose(WidgetArray)
End With
End Sub
Public Function getJSON(URL As String, Token As String) As Object
Dim baseURL As String
Dim getRequest As MSXML2.XMLHTTP60
Dim URLParts As Variant
Dim jsonconvert As Object
Dim id As String
dim user as String
URLParts = Split(URL, "/", , vbBinaryCompare)
id = URLParts(UBound(URLParts) - 1)
user = URLParts(UBound(URLParts) - 2)
baseURL = Replace(Replace("https://app.mural.co/api/murals/{user}/{ID}", "{ID}", id), "{user}", user)
Set getRequest = New MSXML2.XMLHTTP60
With getRequest
.Open "GET", baseURL
.setRequestHeader "Authorization", "Bearer " & Token
.setRequestHeader "Referer", URL
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0"
.send
Set getJSON = JsonConverter.ParseJson(.responseText)
End With
End Function
Public Function getToken(URL As String) As String
Dim getRequest As MSXML2.XMLHTTP60
Dim URLParts As Variant
Dim position As Long
Dim jsonconvert As Object
Dim Token As Object
Dim State As String
Dim User As String
Dim json As String
Dim referer As String
Dim id As String
Dim posturl As String
json = "{""state"": ""{STATE}""}"
posturl = "https://app.mural.co/api/v0/visitor/{user}.{ID}"
referer = "https://app.mural.co/t/{user}/m/{user}/{ID}"
URLParts = Split(URL, "/", , vbBinaryCompare)
position = InStrRev(URL, "/")
URL = Left$(URL, position - 1)
State = URLParts(UBound(URLParts))
id = URLParts(UBound(URLParts) - 1)
User = URLParts(UBound(URLParts) - 2)
json = Replace(json, "{STATE}", State)
posturl = Replace(Replace(posturl, "{user}", User), "{ID}", id)
referer = Replace(Replace(referer, "{user}", User), "{ID}", id)
Set getRequest = New MSXML2.XMLHTTP60
With getRequest
.Open "POST", posturl
.setRequestHeader "origin", "https://app.mural.co"
.setRequestHeader "Referer", referer
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0"
.setRequestHeader "Content-Type", "application/json; charset=utf-8"
.send json
Set jsonconvert = JsonConverter.ParseJson(.responseText)
End With
getToken = jsonconvert("token")
End Function
' from https://stackoverflow.com/a/12804172/4839827
Public Function URLDecode(ByVal StringToDecode As String) As String
With CreateObject("htmlfile")
.Open
.Write StringToDecode
.Close
URLDecode = .body.outerText
End With
End Function
Here's the output returned. There are other properties available, however this code is meant to just give you an idea how to pull this back.

VBA - Number of Google News Search Results

I have a cell that contains something I would like searched in google news. I want the code to return the number of results for that search. Currently I have this code which I found elsewhere on the site and does not use google news but even then I sometimes get a
runtime error -2147024891 (80070005)
after 70 or so searched and I can't run again.
Sub HawkishSearch()
Dim url As String, lastRow As Long
Dim XMLHTTP As Object, html As Object
Dim start_time As Date
Dim end_time As Date
lastRow = Range("B" & Rows.Count).End(xlUp).Row
Dim cookie As String
Dim result_cookie As String
start_time = Time
Debug.Print "start_time:" & start_time
For i = 2 To lastRow
url = "https://www.google.co.in/search?q=" & Cells(i, 2) & "&rnd=" & WorksheetFunction.RandBetween(1, 10000)
Set XMLHTTP = CreateObject("MSXML2.XMLHTTP")
XMLHTTP.Open "GET", url, False
XMLHTTP.setRequestHeader "Content-Type", "text/xml"
XMLHTTP.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"
XMLHTTP.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = XMLHTTP.ResponseText
If html.getElementById("resultStats") Is Nothing Then
str_text = "0 Results"
Else
str_text = html.getElementById("resultStats").innerText
End If
Cells(i, 3) = str_text
DoEvents
Next
end_time = Time
Debug.Print "end_time:" & end_time
Debug.Print "done" & "Time taken : " & DateDiff("n", start_time, end_time)
MsgBox "done" & "Time taken : " & DateDiff("n", start_time, end_time)
End Sub
Best option (IMO) is to use the Google News API and register for an API key. You can then use a queryString including your search term and parse the JSON response to get the result count. I do that below and also populate a collection with the article titles and links. I use a JSON parser called JSONConverter.bas which you download and add to your project. You can then go to VBE > Tools > References > add a reference to Microsoft Scripting Runtime.
Sample JSON response from API:
The {} denotes a dictionary which you access by key, the [] denotes a collection which you access by index or by For Each loop over.
I use the key totalResults to retrieve the total results count from the initial dictionary returned by the API.
I then loop the collection of dictionaries (articles) and pull the story titles and URLs.
You can then inspect the results in the locals window or print out
Sample of results in locals window:
Option Explicit
Public Sub GetStories()
Dim articles As Collection, article As Object
Dim searchTerm As String, finalResults As Collection, json As Object, arr(0 To 1)
Set finalResults = New Collection
searchTerm = "Obama"
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://newsapi.org/v2/everything?q=" & searchTerm & "&apiKey=yourAPIkey", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
Set json = JsonConverter.ParseJson(.responseText)
End With
Debug.Print "total results = " & json("totalResults")
Set articles = json("articles")
For Each article In articles
arr(0) = article("title")
arr(1) = article("url")
finalResults.Add arr
Next
Stop '<== Delete me later
End Sub
Loop:
If deploying in a loop you can use a class clsHTTP to hold the XMLHTTP object. This is more efficient than creating and destroying. I supply this class with a method GetString to retrieve the JSON response from the API, and a GetInfo method to parse the JSON and retrieve the results count and the API results URLs and Titles.
Example of results structure in locals window:
Class clsHTTP:
Option Explicit
Private http As Object
Private Sub Class_Initialize()
Set http = CreateObject("MSXML2.XMLHTTP")
End Sub
Public Function GetString(ByVal url As String) As String
With http
.Open "GET", url, False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
GetString = .responseText
End With
End Function
Public Function GetInfo(ByVal json As Object) As Variant
Dim results(), counter As Long, finalResults(0 To 1), articles As Object, article As Object
finalResults(0) = json("totalResults")
Set articles = json("articles")
ReDim results(1 To articles.Count, 1 To 2)
For Each article In articles
counter = counter + 1
results(counter, 1) = article("title")
results(counter, 2) = article("url")
Next
finalResults(1) = results
GetInfo = finalResults
End Function
Standard module:
Option Explicit
Public Sub GetStories()
Dim http As clsHTTP, json As Object
Dim finalResults(), searchTerms(), searchTerm As Long, url As String
Set http = New clsHTTP
With ThisWorkbook.Worksheets("Sheet1")
searchTerms = Application.Transpose(.Range("A1:A2")) '<== Change to appropriate range containing search terms
End With
ReDim finalResults(1 To UBound(searchTerms))
For searchTerm = LBound(searchTerms, 1) To UBound(searchTerms, 1)
url = "https://newsapi.org/v2/everything?q=" & searchTerms(searchTerm) & "&apiKey=yourAPIkey"
Set json = JsonConverter.ParseJson(http.GetString(url))
finalResults(searchTerm) = http.GetInfo(json)
Set json = Nothing
Next
Stop '<==Delete me later
End Sub
'
Otherwise:
I would use the following where I grab story links by their class name. I get the count and write the links to a collection
Option Explicit
Public Sub GetStories()
Dim sResponse As String, html As HTMLDocument, articles As Collection
Const BASE_URL As String = "https://news.google.com/"
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://news.google.com/topics/CAAqIggKIhxDQkFTRHdvSkwyMHZNRGxqTjNjd0VnSmxiaWdBUAE?hl=en-US&gl=US&ceid=US:en", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
Set html = New HTMLDocument: Set articles = New Collection
Dim numberOfStories As Long, nodeList As Object, i As Long
With html
.body.innerHTML = sResponse
Set nodeList = .querySelectorAll(".VDXfz")
numberOfStories = nodeList.Length
Debug.Print "number of stories = " & numberOfStories
For i = 0 To nodeList.Length - 1
articles.Add Replace$(Replace$(nodeList.item(i).href, "./", BASE_URL), "about:", vbNullString)
Next
End With
Debug.Print articles.Count
End Sub
Standard Google search:
The following works an example standard google search but you will not always get the same HTML structure depending on your search term. You will need to provide some failing cases to help me determine if there is a consistent selector method that can be applied.
Option Explicit
Public Sub GetResultsCount()
Dim sResponse As String, html As HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.google.com/search?q=mitsubishi", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
Set html = New HTMLDocument
With html
.body.innerHTML = sResponse
Debug.Print .querySelector("#resultStats").innerText
End With
End Sub