Loop answers when retriveing data in VB - json

Sub FIND_ISBN()
Dim jsonBooks As Object, auth, authInfo As Object, k
Dim jsonBook As Object, bookDetails As Object
Dim ws As Worksheet, isbn, rngIsbn As Range, cell As Range
Set ws = ThisWorkbook.Worksheets("Books")
Set rngIsbn = ws.Range("A1:A5")
For Each cell In rngIsbn
isbn = cell.Value
If Len(isbn) > 5 Then
Set jsonBooks = BookInfo(isbn)
'Note: the aPI only returns `{}` if there's no match to
' the ISBN, not (eg) status=404
If Not jsonBooks Is Nothing Then
If jsonBooks.Count = 0 Then
Debug.Print "No results"
Else
For Each k In jsonBooks
Debug.Print "-------" & k & "----------"
Set jsonBook = jsonBooks(k)
Set bookDetails = jsonBook("details")
Debug.Print "Title:", bookDetails("title")
Debug.Print "Pub. Date:", bookDetails("publish_date")
For Each auth In bookDetails("authors")
Debug.Print "Author:", auth("name")
Next auth
Next k
End If
End If
End If 'have something to look up
Next cell
End Sub
Function BookInfo(isbn) As Object
Dim url As String
url = "https://openlibrary.org/api/books?bibkeys=ISBN:" & isbn & "&jscmd=details&format=json"
Set BookInfo = responseObject(url)
End Function
Function responseObject(url As String) As Object
Dim json As Object, http As Object
With CreateObject("msxml2.xmlhttp")
.Open "GET", url, False
.Send
'Debug.Print .Status, .responseText
* ** If .Status = 200 Then
'Set responseObject = JsonConverter.ParseJson(.responseText)
ThisWorkbook.Worksheets("Books").Cells(1, 3) = .responseText
Else***
Debug.Print .responseText, .Status
End If
End With
End Function
I;m trying to finalise some code in VB.. i can see this code fetching the isbn details but it is only putting the retreived answers into cell 1,3 as per the code and i'm not sure how to make this move & loop to the next cell down....
I'm trying to get it to run thru ISBN numbers retreive the data (this bit works) and then get the line information in the correct cell... any ideas or pointers would be gratefully received..

Keep track of an index as you loop though the cells of rngIsbn, use that index to move the output location after each output.
I added a parameter to your two functions, OffsetValue, which is used with the function Offset to turn the static Cells(1, 3) into a variable location based on OffsetValue.
Sub FIND_ISBN()
Dim jsonBooks As Object, auth, authInfo As Object, k
Dim jsonBook As Object, bookDetails As Object
Dim ws As Worksheet, isbn, rngIsbn As Range, cell As Range
Dim OffsetValue As Long
Set ws = ThisWorkbook.Worksheets("Books")
Set rngIsbn = ws.Range("A1:A5")
For Each cell In rngIsbn
isbn = cell.Value
If Len(isbn) > 5 Then
Set jsonBooks = BookInfo(isbn, OffsetValue)
OffsetValue = OffsetValue + 1
'Note: the aPI only returns `{}` if there's no match to
' the ISBN, not (eg) status=404
If Not jsonBooks Is Nothing Then
If jsonBooks.Count = 0 Then
Debug.Print "No results"
Else
For Each k In jsonBooks
Debug.Print "-------" & k & "----------"
Set jsonBook = jsonBooks(k)
Set bookDetails = jsonBook("details")
Debug.Print "Title:", bookDetails("title")
Debug.Print "Pub. Date:", bookDetails("publish_date")
For Each auth In bookDetails("authors")
Debug.Print "Author:", auth("name")
Next auth
Next k
End If
End If
End If 'have something to look up
Next cell
End Sub
Function BookInfo(isbn, Optional OffsetValue As Long = 0) As Object
Dim url As String
url = "https://openlibrary.org/api/books?bibkeys=ISBN:" & isbn & "&jscmd=details&format=json"
Set BookInfo = responseObject(url, OffsetValue)
End Function
Function responseObject(url As String, Optional OffsetValue As Long = 0) As Object
Dim json As Object, http As Object
With CreateObject("msxml2.xmlhttp")
.Open "GET", url, False
.Send
'Debug.Print .Status, .responseText
If .Status = 200 Then
'Set responseObject = JsonConverter.ParseJson(.responseText)
ThisWorkbook.Worksheets("Books").Cells(1, 3).Offset(OffsetValue) = .responseText
Else
Debug.Print .responseText, .Status
End If
End With
End Function
Now, the output goes into Cells(1, 3).Offset(OffsetValue), and since OffsetValue is incrementing by 1 each time, the output is moving down 1 row each time a value is written into the worksheet.

Related

I'm getting stuck at vba runtime error 424

I'm getting
run-time error 424
in 68th row (line)
request.Open "GET", Url, False
and I don't know how to fix it.
My previous question I posted ;
How to scrape specific part of online english dictionary?
My final goal is to get result like this;
A B
beginning bɪˈɡɪnɪŋ
behalf bɪˈhæf
behave bɪˈheɪv
behaviour bɪˈheɪvjər
belong bɪˈlɔːŋ
below bɪˈloʊ
bird bɜːrd
biscuit ˈbɪskɪt
Here's code I wrote, and it's mostly based on someone else's code I found on internet.
' Microsoft ActiveX Data Objects x.x Library
' Microsoft XML, v3.0
' Microsoft VBScript Regular Expressions
Sub ParseHelp()
' Word reference from
Dim Url As String
Url = "https://www.oxfordlearnersdictionaries.com/definition/english/" & Cells(ActiveCell.Row, "B").Value
' Get dictionary's html
Dim Html As String
Html = GetHtml(Url)
' Check error
If InStr(Html, "<TITLE>Not Found</Title>") > 0 Then
MsgBox "404"
Exit Sub
End If
' Extract phonetic alphabet from HTML
Dim wrapPattern As String
wrapPattern = "<span class='name' (.*?)</span>"
Set wrapCollection = FindRegexpMatch(Html, wrapPattern)
' MsgBox StripHtml(CStr(wrapCollection(1)))
' Fill phonetic alphabet into cell
If Not wrapCollection Is Nothing Then
Dim wrap As String
On Error Resume Next
wrap = StripHtml(CStr(wrapCollection(1)))
If Err.Number <> 0 Then
wrap = ""
End If
Cells(ActiveCell.Row, "C").Value = wrap
Else
MsgBox "not found"
End If
End Sub
Public Function StripHtml(Html As String) As String
Dim RegEx As New RegExp
Dim sOut As String
Html = Replace(Html, "</li>", vbNewLine)
Html = Replace(Html, " ", " ")
With RegEx
.Global = True
.IgnoreCase = True
.MultiLine = True
.Pattern = "<[^>]+>"
End With
sOut = RegEx.Replace(Html, "")
StripHtml = sOut
Set RegEx = Nothing
End Function
Public Function GetHtml(Url As String) As String
Dim xmlhttp As Object
Set xmlhttp = CreateObject("MSXML2.serverXMLHTTP")
Dim converter As New ADODB.stream
' Get
request.Open "GET", Url, False
request.send
' raw bytes
converter.Open
converter.Type = adTypeBinary
converter.Write request.responseBody
' read
converter.Position = 0
converter.Type = adTypeText
converter.Charset = "utf-8"
' close
GetHtml = converter.ReadText
converter.Close
End Function
Public Function FindRegexpMatch(txt As String, pat As String) As Collection
Set FindRegexpMatch = New Collection
Dim rx As New RegExp
Dim matcol As MatchCollection
Dim mat As Match
Dim ret As String
Dim delimiter As String
txt = Replace(txt, Chr(10), "")
txt = Replace(txt, Chr(13), "")
rx.Global = True
rx.IgnoreCase = True
rx.MultiLine = True
rx.Pattern = pat
Set matcol = rx.Execute(txt)
'MsgBox "Match:" & matcol.Count
On Error GoTo ErrorHandler
For Each mat In matcol
'FindRegexpMatch.Add mat.SubMatches(0)
FindRegexpMatch.Add mat.Value
Next mat
Set rx = Nothing
' Insert code that might generate an error here
Exit Function
ErrorHandler:
' Insert code to handle the error here
MsgBox "FindRegexpMatch. " & Err.GetException()
Resume Next
End Function
Any kind of help would be greatly appreciated.
The following is an example of how to read in values from column A and write out pronounciations to column B. It uses css selectors to match a child node then steps up to parentNode in order to ensure entire pronounciation is grabbed. There are a number of ways you could have matched on the parent node to get the second pronounciation. Note that I use a parent node and Replace as the pronounciation may span multiple childNodes.
If doing this for lots of lookups please be a good netizen and put some waits in the code so as to not bombard the site with requests.
Option Explicit
Public Sub WriteOutPronounciations()
Dim html As MSHTML.HTMLDocument, i As Long, ws As Worksheet
Dim data As String, lastRow As Long, urls()
Set ws = ThisWorkbook.Worksheets("Sheet1")
lastRow = ws.Cells(ws.rows.Count, "A").End(xlUp).row 'you need at least two words in column A or change the redim.
urls = Application.Transpose(ws.Range("A1:A" & lastRow).Value)
ReDim results(1 To UBound(urls))
Set html = New MSHTML.HTMLDocument
With CreateObject("MSXML2.ServerXMLHTTP")
For i = LBound(urls) To UBound(urls)
.Open "GET", "https://www.oxfordlearnersdictionaries.com/definition/english/" & urls(i), False
.send
html.body.innerHTML = .responseText
data = Replace$(Replace$(html.querySelector(".name ~ .wrap").ParentNode.innerText, "/", vbNullString), Chr$(10), Chr$(32))
results(i) = Right$(data, Len(data) - 4)
Next
End With
With ThisWorkbook.Worksheets(1)
.Cells(1, 2).Resize(UBound(results, 1), 1) = Application.Transpose(results)
End With
End Sub
Required references (VBE>Tools>References):
Microsoft HTML Object Library
Should you go down the API route then here is a small example. You can make 1000 free calls in a month with Prototype account. The next best, depending on how many calls you wish to make looks like the 10,001 calls (that one extra PAYG call halves the price). # calls will be affected by whether word is head word or needs lemmas lookup call first. The endpoint construction you need is GET /entries/{source_lang}/{word_id}?fields=pronunciations though that doesn't seem to filter massively. You will need a json parser to handle the json returned e.g. github.com/VBA-tools/VBA-JSON/blob/master/JsonConverter.bas. Download raw code from there and add to standard module called JsonConverter. You then need to go VBE > Tools > References > Add reference to Microsoft Scripting Runtime. Remove the top Attribute line from the copied code.
Option Explicit
Public Sub WriteOutPronounciations()
Dim html As MSHTML.HTMLDocument, i As Long, ws As Worksheet
Dim data As String, lastRow As Long, words()
'If not performing lemmas lookup then must be head word e.g. behave, behalf
Const appId As String = "yourAppId"
Const appKey As String = "yourAppKey"
Set ws = ThisWorkbook.Worksheets("Sheet1")
lastRow = ws.Cells(ws.rows.Count, "A").End(xlUp).row
words = Application.Transpose(ws.Range("A1:A" & lastRow).Value)
ReDim results(1 To UBound(words))
Set html = New MSHTML.HTMLDocument
Dim json As Object
With CreateObject("MSXML2.ServerXMLHTTP")
For i = LBound(words) To UBound(words)
.Open "GET", "https://od-api.oxforddictionaries.com/api/v2/entries/en-us/" & LCase$(words(i)) & "?fields=pronunciations", False
.setRequestHeader "app_id", appId
.setRequestHeader "app_key", appKey
.setRequestHeader "ContentType", "application/json"
.send
Set json = JsonConverter.ParseJson(.responseText)
results(i) = IIf(json("results")(1)("type") = "headword", json("results")(1)("lexicalEntries")(1)("pronunciations")(2)("phoneticSpelling"), "lemmas lookup required")
Set json = Nothing
Next
End With
With ThisWorkbook.Worksheets(1)
.Cells(1, 2).Resize(UBound(results, 1), 1) = Application.Transpose(results)
End With
End Sub

Store JSON results into an array excel vba

I have tried using API for get some information for Yahoo Finance
And this is the UDF that I created
Sub Test()
'1 >> High & 2 >> Close
MsgBox YahooHigh("GOOG", "2019-07-18", 1)
MsgBox YahooHigh("GOOG", "2019-07-18", 2)
End Sub
Function YahooHigh(sTicker As String, sDate As String, idx As Integer)
Dim json As Object
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=" & sTicker & "&outputsize=full&apikey=myapikey"
.Send
Set json = JsonConverter.ParseJson(.responseText)
End With
If idx = 1 Then
YahooHigh = json("Time Series (Daily)")(sDate)("2. high")
ElseIf idx = 2 Then
YahooHigh = json("Time Series (Daily)")(sDate)("4. close")
Else
YahooHigh = Empty
End If
End Function
The UDF works fine but of course I will have to load the JSON result each time. As in my example, the UDF will run for twice the first for High value and the second for the Close value
Is there a way to store the json results into an array then instead of loading the json, the array is called. I thought of static but I am stuck at this
What I would like to do is to store all the dates in the json results for specific ticker (High value and Close value only) then to recall the desired value from the static array .. Any ideas?
Another variation:
I have tried using the HTML content and it works fine for me when using the link directly
Sub MyTest()
Dim html As Object, ele As Object
With CreateObject("MSXML2.ServerXMLHTTP")
'https://finance.yahoo.com/quote/GOOG/history?period1=1325566800&period2=1325566800&interval=1d&filter=history&frequency=1d
'.Open "GET", "https://finance.yahoo.com/quote/GOOG/history?period1=1325566800&period2=1325566800&interval=1d&filter=history&frequency=1d", False
Dim sTicker As String
sTicker = Sheets(1).Range("B1").Value 'GOOG
Dim period1 As Long, period2 As Long
period1 = ToUnix(Sheets(1).Range("B2").Value) '3 Jan 2012
period2 = ToUnix(Sheets(1).Range("B3").Value) '3 Jan 2012
.Open "GET", "https://finance.yahoo.com/quote/" & sTicker & "/history?period1=" & period1 & "&period2=" & period2 & "&interval=1d&filter=history&frequency=1d", False
.Send
If .Status <> 200 Then MsgBox "Problem" & vbNewLine & .Status & " - " & .StatusText: Exit Sub
Set html = CreateObject("htmlfile")
html.body.innerHTML = .responseText
'WriteTxtFile html.body.innerHTML
'Stop
Set ele = html.getElementsByTagName("table")(0).getElementsByTagName("tr")(1)
Dim tCell As Object
Dim cnt As Long
For Each tCell In ele.Children
cnt = cnt + 1
If cnt = 3 Then Debug.Print "High: " & tCell.innerText
If cnt = 5 Then Debug.Print "Close: " & tCell.innerText
Next tCell
End With
End Sub
Public Function ToUnix(dt) As Long
ToUnix = DateDiff("s", "1/1/1970", dt)
End Function
When using this line .Open "GET", "https://finance.yahoo.com/quote/GOOG/history?period1=1325566800&period2=1325566800&interval=1d&filter=history&frequency=1d", False it works fine and returns values from High and Close
But when trying to convert the dates from the worksheet to UNIX so as to use them in the link, it doesn't work
This is the problem for me now
Just have your function return the json object, then parse it in your sub.
The json object will contain all your data, and you can parse out what you want.
For example
In your function:
Function YahooHigh(sTicker As String) as object
Dim json As Object
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=" & sTicker & "&outputsize=full&apikey=myapikey"
.Send
Set YahooHigh = JsonConverter.ParseJson(.responseText)
End With
and in your Sub:
Sub Test()
Dim obj As Object
Set obj = YahooHigh("GOOG")
MsgBox obj("Time Series (Daily)")("2019-07-18")("2. high")
MsgBox obj("Time Series (Daily)")("2019-07-18")("4. close")
End Sub

Parsing JSON to Excel - LOOP

I have a code that gets historical stock prices by parsing JSON. I need to get the "Close" price on a specific date. I need the code to read the date from an Excel cell and paste the price corresponding to the date. Here is an example:
https://cloud.iexapis.com/stable/stock/AAPL/chart/1m?token=pk_98e61bb72fd84b7d8b5f19c579fd0d9d
Below is my code, but I need to modify it so it can loop to find the date required:
Sub getHistoricalData()
'Application.DisplayAlerts = False
Application.ScreenUpdating = False
Dim wb As Workbook
Dim ws As Worksheet
Dim rng As Range
Dim symbol As Variant
Dim n As Integer
Dim lastrow As Long
Dim myrequest As Variant
Dim i As Variant
Set wb = ActiveWorkbook
Set ws = Sheets("Sheet1")
ws.Activate
'Last row find
lastrow = ws.Cells(Rows.Count, "A").End(xlUp).Row
Set rng = ws.Range("A3:A" & lastrow)
'Clear Prior Prices
ws.Range("k3:k" & lastrow).ClearContents
n = 3
'Get Symbols list
For Each symbol In rng
Set myrequest = CreateObject("WinHttp.WinHttpRequest.5.1")
myrequest.Open "Get", "https://cloud.iexapis.com/stable/stock/" & symbol & "/chart/1m?token=pk_98e61bb72fd84b7d8b5f19c579fd0d9d" 'updated 06/15/2019
'Debug.Print myrequest.ResponseText
Dim Json As Object
Set Json = JsonConverter.ParseJson(myrequest.ResponseText)
'MsgBox (myrequest.ResponseText)
i = Json("Close")
ws.Range(Cells(n, 2), Cells(n, 2)) = i
n = n + 1
Next symbol
ws.Columns("k").AutoFit
'MsgBox ("Data is downloaded.")
ws.Range("k3:k" & lastrow).HorizontalAlignment = xlGeneral
ws.Range("k3:k" & lastrow).NumberFormat = "$#,##0.00"
Application.DisplayAlerts = True
Application.ScreenUpdating = False
End Sub
For Example, I need to extract the closing price on 06/06/2019 for each stock symbol.
Json parser would be an ideal choice. You can however also regex out from the response and handle cases of http errors i.e. where not a successful connection to desired page, as well as and date not found. I read the date from cell A1. The date is formatted unambiguously as yyyy-mm-dd. The tickers are read into an array which is looped - this is faster. Results are stored in an array and written out once to sheet - also faster.
Option Explicit
Public Sub GetClosePrices()
Dim lastRow As Long, url As String, ws As Worksheet, tickers(), dateString As String
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ws
dateString = Format$(.Range("A1").Value, "yyyy-mm-dd")
lastRow = .Cells(.Rows.Count, "A").End(xlUp).Row
If lastRow >= 3 Then
.Range("K3:K" & lastRow).ClearContents
tickers = Application.Transpose(.Range("A3:A" & lastRow).Value)
Else
Exit Sub
End If
End With
Dim s As String, re As Object, p As String, r As String, prices(), i As Long
ReDim prices(1 To UBound(tickers))
p = """DATE_HERE"",""open"":[0-9.]+,""close"":(.*?)," 'Format must be YYYY-MM-DD
p = Replace$(p, "DATE_HERE", dateString)
url = "https://cloud.iexapis.com/stable/stock/TICKER_HERE/chart/1m?token=pk_98e61bb72fd84b7d8b5f19c579fd0d9d"
Set re = CreateObject("VBScript.RegExp")
With CreateObject("MSXML2.XMLHTTP")
For i = LBound(tickers) To UBound(tickers)
.Open "GET", Replace$(url, "TICKER_HERE", tickers(i)), False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
If .Status = 200 Then
s = .responseText
r = GetValue(re, s, p)
Else
r = "Failed connection"
End If
prices(i) = r
s = vbNullString
Next
End With
ws.Cells(3, "K").Resize(UBound(prices), 1) = Application.Transpose(prices)
End Sub
Public Function GetValue(ByVal re As Object, ByVal inputString As String, ByVal pattern As String) As String
With re
.Global = True
.pattern = pattern
If .test(inputString) Then ' returns True if the regex pattern can be matched agaist the provided string
GetValue = .Execute(inputString)(0).submatches(0)
Else
GetValue = "Not found"
End If
End With
End Function
Regex explanation for an example date (try it):
The JSON response is an array of objects (exposed by the VBA-JSON library as a Collection of Dictionaries), so you need to loop over them and find the one of interest, based on the date:
Dim closePrice
Set Json = JsonConverter.ParseJson(myrequest.ResponseText)
For Each o in Json
if o("date") = "2019-06-06" Then
closePrice = o("close")
exit for
end if
Next o

Retrieving all Excel file links from a webpage

I'm trying to get all the downloadable Excel file links from the website, but having difficulty. Please help to guide me. Thanks.
Sub TYEX()
Dim internet As Object
Dim internetdata As Object
Dim div_result As Object
Dim header_links As Object
Dim link As Object
Dim URL As String
Set internet = CreateObject("InternetExplorer.Application")
internet.Visible = True
URL = "https://www.jpx.co.jp/markets/public/short-selling/index.html"
internet.Navigate URL
Do Until internet.ReadyState >= 4
DoEvents
Loop
Application.Wait Now + TimeSerial(0, 0, 5)
Set internetdata = internet.Document
Set div_result = internetdata.getElementById("readArea")
Set header_links = div_result.getElementsByTagName("td")
For Each h In header_links
Set link = h.ChildNodes.item(0)
Cells(Range("A" & Rows.Count).End(xlUp).Row + 1, 1) = link.href
Next
MsgBox "done"
End Sub
You had the idea down correctly, but here's a different approach:
Sub TYEX()
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
With ie
.navigate "https://www.jpx.co.jp/markets/public/short-selling/index.html"
.Visible = True
Do While .Busy Or .readyState < 4
DoEvents
Loop
Dim doc As Object, tbl As Object
Set doc = .document
Set tbl = doc.getElementsByClassName("component-normal-table")(0).Children(0)
Dim r As Long, xlsArr(), a As Object
With tbl.Rows
ReDim xlsArr(1 To .Length - 1)
For r = 1 To .Length - 1 ' 0 is the table header
xlsArr(r) = .Item(r).Children(1).innerHTML
Next r
End With
With CreateObject("VBScript.RegExp")
.Pattern = "<a href=""(\/markets.*?\.xls)"
For r = 1 To UBound(xlsArr)
xlsArr(r) = "https://www.jpx.co.jp" & .Execute(xlsArr(r))(0).SubMatches(0)
Debug.Print xlsArr(r)
Next
End With
End With
'Add to sheet
Dim ws As Worksheet, rng As Range
Set ws = ThisWorkbook.Worksheets(1)
With ws
Set rng = .Range(.Cells(NextRow(ws), 1), .Cells(NextRow(ws) + UBound( _
xlsArr) - 1, 1))
rng.Value = Application.Transpose(xlsArr)
End With
End Sub
Public Function NextRow(ByVal ws As Worksheet, Optional ByVal col As Variant = 1) As Long
With ws
NextRow = .Cells(.Rows.Count, col).End(xlUp).Row + 1
End With
End Function
Breaking Down the Code
This will loop your html table rows. We start at 1, because 0 is actually just the table header.
With tbl.Rows
ReDim xlsArr(1 To .Length - 1)
For r = 1 To .Length - 1 ' 0 is the table header
xlsArr(r) = .Item(r).Children(1).innerHTML
Next r
End With
This uses regular expressions to extract the url from the innerHTML property. You can see how this particular regex works here: Regex101
With CreateObject("VBScript.RegExp")
.Pattern = "<a href=""(\/markets.*?\.xls)"
For r = 1 To UBound(xlsArr)
xlsArr(r) = "https://www.jpx.co.jp" & .Execute(xlsArr(r))(0).SubMatches(0)
Debug.Print xlsArr(r)
Next
End With
You will size your range to be the same size as your array that contains the links, then write the array to the worksheet. This is usually much faster than writing cells one-by-one.
'Add to sheet
Dim ws As Worksheet, rng As Range
Set ws = ThisWorkbook.Worksheets(1)
With ws
Set rng = .Range(.Cells(NextRow(ws), 1), .Cells(NextRow(ws) + UBound( _
xlsArr) - 1, 1))
rng.Value = Application.Transpose(xlsArr)
End With
You can use an attribute = value CSS selector with $ operator to say the href value must end with .xls. Then use querySelectorAll to retrieve all matched results. Using CSS selectors is a very fast and generally robust method.
Dim list As Object
Set list = ie.document.querySelectorAll("[href$='.xls']")
It is much faster to use XMLHTTP as well, rather than opening IE. Note that you can then pass these links to a function to perform a binary download or to URLMon for downloading.
Option Explicit
Public Sub Links()
Dim sResponse As String, html As HTMLDocument, list As Object, i As Long
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.jpx.co.jp/markets/public/short-selling/index.html", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
Set html = New HTMLDocument
With html
.body.innerHTML = sResponse
Set list = html.querySelectorAll("[href$='.xls']")
End With
For i = 0 To list.Length - 1
Debug.Print Replace$(list.item(i), "about:", "https://www.jpx.co.jp")
Next
End Sub
Example download function (though you could re-use your existing XMLHTTP object - this is just to illustrate):
Public Function DownloadFile(ByVal downloadFolder As String, ByVal downloadURL As String) As String
Dim http As Object , tempArr As Variant
Set http = CreateObject("WinHttp.WinHttpRequest.5.1")
http.Open "GET", downloadURL, False
http.send
On Error GoTo errhand
With CreateObject("ADODB.Stream")
.Open
.Type = 1
.write http.responseBody
tempArr = Split(downloadURL, "/")
tempArr = tempArr(UBound(tempArr))
.SaveToFile downloadFolder & tempArr, 2 '< "/" on enter of downloadFolder. 2 for overwrite which is Ok if no file modifications.
.Close
End With
DownloadFile = downloadFolder & tempArr
Exit Function
errhand:
If Err.Number <> 0 Then
Debug.Print Err.Number, Err.Description
MsgBox "Download failed"
End If
DownloadFile = vbNullString
End Function
References (VBE > Tools > References):
Microsoft HTML Object Library

Fetching data from web page

I am trying to fetch the publication date corresponding to each patent number.
Here is the Excel sheet:
The database is espacenet.com
Here's the link for the first patent you see in the Excel sheet:
http://worldwide.espacenet.com/searchResults?compact=false&PN=US7055777B2&ST=advanced&locale=en_EP&DB=EPODOC
Under the "Publication Info" header, I need to get the date after matching the patent number with the one in the Excel sheet.
Here's the code:
Sub tryextraction()
Dim ie As New InternetExplorer
Dim sdd As String
Dim tdd() As String
Dim num0 As Integer
Dim num1 As Integer
Dim doc As HTMLDocument
Dim i As Integer
Dim j As Integer
ie.Visible = True
num1 = ActiveSheet.UsedRange.Rows.Count
For num0 = 2 To num1
ie.navigate "http://worldwide.espacenet.com/searchResults?compact=false&PN=" & Range("A" & num0) & "&ST=advanced&locale=en_EP&DB=EPODOC"
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Set doc = ie.document
sdd = Trim(doc.getElementsByTagName("td")(5).innerText)
tdd() = Split(sdd, vbLf)
j = UBound(tdd)
For i = 0 To j
If InStr(tdd(i), "(") <> 0 Then
tdd(i) = Replace(tdd(i), " ", "")
tdd(i) = Replace(tdd(i), "(", "")
tdd(i) = Replace(tdd(i), ")", "")
If tdd(i) = Range("A" & num0).Value Then
Range("B" & num0).Value = tdd(i + 1)
End If
End If
Next i
Next num0
ie.Quit
End Sub
The code is not giving any error. The column "Publication Date" remains blank after the code finishes running.
The html tag which contains the publication info has been taken correctly.
There are some trailing white space characters after the ID you are searching for in the document so tdd(i) = Range("A" & num0).Value never evaluates to true. It's not just a space, so a simple Trim(tdd(i)) = Range("A" & num0).Value call does not help. Try instead InStr(tdd(i), Range("A" & num0).Value) If that is not good enough, you'll have to specifically remove CRLF from the end of the string before doing the compare.
There are often multiple publication dates under the publication info header.
Example:
The following script obtains all of these and the preceeding line (so you have the associated publication along with date).
It loops from row 2 of the Activesheet, to the last populated row, picking up the Publication Numbers from column A and writing out the results starting from column B. Depending on how many dates there are, the data will extend across multiple columns from B.
Regex:
A regex of ^(.*)\s\d{4}-\d{2}-\d{2} is used to retrieve the date pattern and the preceeding line i.e. The publication identifier and the date. Try it
Example output:
VBA:
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer, html As New HTMLDocument, url As String, pubInfo As Object
Dim loopRange As Range, iRow As Range, counter As Long
'example US7055777B2
Application.ScreenUpdating = False
With ActiveSheet
Set loopRange = Range("A2:A" & .Cells(.Rows.Count, "A").End(xlUp).Row)
End With
With IE
.Visible = True
counter = 2 '<== start from row 2
For Each iRow In loopRange
If Not IsEmpty(iRow) Then
url = "https://worldwide.espacenet.com/searchResults?compact=false&PN=" & iRow.Value & "&ST=advanced&locale=en_EP&DB=EPODOC"
.navigate url
While .Busy Or .readyState < 4: DoEvents: Wend
Set html = .document
Do
DoEvents
On Error Resume Next
Set pubInfo = html.querySelector(".publicationInfoColumn")
On Error GoTo 0
Loop While pubInfo Is Nothing
Dim tempArr()
tempArr = GetDateAndPatent(pubInfo.innerText, "^(.*)\s\d{4}-\d{2}-\d{2}") '"(?m)^(.*)\s\d{4}-\d{2}-\d{2}" '<==This is not supported
With ActiveSheet
.Cells(counter, 2).Resize(1, UBound(tempArr) + 1) = tempArr
End With
End If
counter = counter + 1
Next iRow
.Quit '<== Remember to quit application
End With
Application.ScreenUpdating = True
End Sub
Public Function GetDateAndPatent(ByVal inputString As String, ByVal sPattern As String) As Variant
Dim matches As Object, iMatch As Object, s As String, arrMatches(), i As Long
With CreateObject("vbscript.regexp")
.Global = True
.MultiLine = True
.IgnoreCase = True
.Pattern = sPattern
If .test(inputString) Then
Set matches = .Execute(inputString)
For Each iMatch In matches
ReDim Preserve arrMatches(i)
arrMatches(i) = iMatch.Value
i = i + 1
Next iMatch
End If
End With
GetDateAndPatent = arrMatches
End Function