start browser with specicfic user profile using chromedp - google-chrome

i am using selenium with python and i can do all my testing work using it
but i am learning Golang and i want to try to test using it
i came across
chromedp the chromedp github repo
and i like it but
i couldn't figure out how to start google chrome with a specific user profile
can any one help please ?
i am using this example :
package main
import (
"context"
"fmt"
"io/ioutil"
"log"
"time"
cdp "github.com/knq/chromedp"
cdptypes "github.com/knq/chromedp/cdp"
)
func main() {
var err error
// create context
ctxt, cancel := context.WithCancel(context.Background())
defer cancel()
// create chrome instance
c, err := cdp.New(ctxt, cdp.WithLog(log.Printf))
if err != nil {
log.Fatal(err)
}
// run task list
var site, res string
err = c.Run(ctxt, googleSearch("site:brank.as", "Easy Money Management", &site, &res))
if err != nil {
log.Fatal(err)
}
// shutdown chrome
err = c.Shutdown(ctxt)
if err != nil {
log.Fatal(err)
}
// wait for chrome to finish
err = c.Wait()
if err != nil {
log.Fatal(err)
}
log.Printf("saved screenshot of #testimonials from search result listing `%s` (%s)", res, site)
}
func googleSearch(q, text string, site, res *string) cdp.Tasks {
var buf []byte
sel := fmt.Sprintf(`//a[text()[contains(., '%s')]]`, text)
return cdp.Tasks{
cdp.Navigate(`https://www.google.com`),
cdp.Sleep(2 * time.Second),
cdp.WaitVisible(`#hplogo`, cdp.ByID),
cdp.SendKeys(`#lst-ib`, q+"\n", cdp.ByID),
cdp.WaitVisible(`#res`, cdp.ByID),
cdp.Text(sel, res),
cdp.Click(sel),
cdp.Sleep(2 * time.Second),
cdp.WaitVisible(`#footer`, cdp.ByQuery),
cdp.WaitNotVisible(`div.v-middle > div.la-ball-clip-rotate`, cdp.ByQuery),
cdp.Location(site),
cdp.Screenshot(`#testimonials`, &buf, cdp.ByID),
cdp.ActionFunc(func(context.Context, cdptypes.Handler) error {
return ioutil.WriteFile("testimonials.png", buf, 0644)
}),
}
}

You need to use the runner options for this
cdp, err := cdp.New(ctxt, cdp.WithRunnerOptions(
runner.UserDataDir("<your path>"),
))
You can look for all available options at below link
https://github.com/knq/chromedp/blob/dc08ecc7272dd745adc3494fb675c76174cbb2b3/runner/runner.go

When you have multiple 'profiles' in the user data directory you can also specify the name of the profile directory you want to use:
opts := []chromedp.ExecAllocatorOption{
chromedp.UserDataDir(`...\AppData\Local\Google\Chrome\User Data`),
chromedp.Flag("profile-directory", "Profile 1"), // <-- like this
...

Related

How to output results to CSV of a concurrent web scraper in Go?

I'm new to Go and am trying to take advantage of the concurrency in Go to build a basic scraper to pull extract title, meta description, and meta keywords from URLs.
I am able to print out the results to terminal with the concurrency but can't figure out how to write output to CSV. I've tried many a variations that I could think of with limited knowledge of Go and many end up breaking the concurrency - so losing my mind a bit.
My code and URL input file is below - Thanks in advance for any tips!
// file name: metascraper.go
package main
import (
// import standard libraries
"encoding/csv"
"fmt"
"io"
"log"
"os"
"time"
// import third party libraries
"github.com/PuerkitoBio/goquery"
)
func csvParsing() {
file, err := os.Open("data/sample.csv")
checkError("Cannot open file ", err)
if err != nil {
// err is printable
// elements passed are separated by space automatically
fmt.Println("Error:", err)
return
}
// automatically call Close() at the end of current method
defer file.Close()
//
reader := csv.NewReader(file)
// options are available at:
// http://golang.org/src/pkg/encoding/csv/reader.go?s=3213:3671#L94
reader.Comma = ';'
lineCount := 0
fileWrite, err := os.Create("data/result.csv")
checkError("Cannot create file", err)
defer fileWrite.Close()
writer := csv.NewWriter(fileWrite)
defer writer.Flush()
for {
// read just one record
record, err := reader.Read()
// end-of-file is fitted into err
if err == io.EOF {
break
} else if err != nil {
fmt.Println("Error:", err)
return
}
go func(url string) {
// fmt.Println(msg)
doc, err := goquery.NewDocument(url)
if err != nil {
checkError("No URL", err)
}
metaDescription := make(chan string, 1)
pageTitle := make(chan string, 1)
go func() {
// time.Sleep(time.Second * 2)
// use CSS selector found with the browser inspector
// for each, use index and item
pageTitle <- doc.Find("title").Contents().Text()
doc.Find("meta").Each(func(index int, item *goquery.Selection) {
if item.AttrOr("name", "") == "description" {
metaDescription <- item.AttrOr("content", "")
}
})
}()
select {
case res := <-metaDescription:
resTitle := <-pageTitle
fmt.Println(res)
fmt.Println(resTitle)
// Have been trying to output to CSV here but it's not working
// writer.Write([]string{url, resTitle, res})
// err := writer.WriteString(`res`)
// checkError("Cannot write to file", err)
case <-time.After(time.Second * 2):
fmt.Println("timeout 2")
}
}(record[0])
fmt.Println()
lineCount++
}
}
func main() {
csvParsing()
//Code is to make sure there is a pause before program finishes so we can see output
var input string
fmt.Scanln(&input)
}
func checkError(message string, err error) {
if err != nil {
log.Fatal(message, err)
}
}
The data/sample.csv input file with URLs:
http://jonathanmh.com
http://keshavmalani.com
http://google.com
http://bing.com
http://facebook.com
In the code you supplied, you had commented the following code:
// Have been trying to output to CSV here but it's not working
err = writer.Write([]string{url, resTitle, res})
checkError("Cannot write to file", err)
This code is correct, except you have one issue.
Earlier in the function, you have the following code:
fileWrite, err := os.Create("data/result.csv")
checkError("Cannot create file", err)
defer fileWrite.Close()
This code causes the fileWriter to close once your csvParsing() func exits.
Because you've closed fileWriter with the defer, you are unable to write to it in your concurrent function.
Solution:
You'll need to use defer fileWrite.Close() inside your concurrent func or something similar so you do not close the fileWriter before you have written to it.

How to get the HTTP response body using chromedp?

Using github.com/knq/chromedp, a go package to drive web browsers using Chrome Debugging Protocol, I can navigate to webpages, update forms and submit forms, but I need to retrieve a HTTP response body and haven't figured out how to yet. I'd like to be able to retrieve the HTTP response body for a JSON response (not HTML).
From looking in the code, it seems the HTTP response body is in the CachedResponse.Body property:
https://github.com/knq/chromedp/blob/b9e4c14157325be092c1c1137edbd584648d8c72/cdp/cachestorage/types.go#L30
And that it should be accessible using:
func (p *RequestCachedResponseParams) Do(ctxt context.Context, h cdp.Handler) (response *CachedResponse, err error)
https://github.com/knq/chromedp/blob/b9e4c14157325be092c1c1137edbd584648d8c72/cdp/cachestorage/cachestorage.go#L168
The examples use cdp.Tasks such as the following from the simple example.
func googleSearch(q, text string, site, res *string) cdp.Tasks {
var buf []byte
sel := fmt.Sprintf(`//a[text()[contains(., '%s')]]`, text)
return cdp.Tasks{
cdp.Navigate(`https://www.google.com`),
cdp.Sleep(2 * time.Second),
cdp.WaitVisible(`#hplogo`, cdp.ByID),
cdp.SendKeys(`#lst-ib`, q+"\n", cdp.ByID),
cdp.WaitVisible(`#res`, cdp.ByID),
cdp.Text(sel, res),
cdp.Click(sel),
cdp.Sleep(2 * time.Second),
cdp.WaitVisible(`#footer`, cdp.ByQuery),
cdp.WaitNotVisible(`div.v-middle > div.la-ball-clip-rotate`, cdp.ByQuery),
cdp.Location(site),
cdp.Screenshot(`#testimonials`, &buf, cdp.ByID),
cdp.ActionFunc(func(context.Context, cdptypes.Handler) error {
return ioutil.WriteFile("testimonials.png", buf, 0644)
}),
}
}
https://github.com/knq/chromedp/blob/b9e4c14157325be092c1c1137edbd584648d8c72/examples/simple/main.go
It seems like the CachedResponse.Body can be accessed by calling RequestCachedResponseParams.Do() by referencing RequestCachedResponseParams.CacheID, but the following is still needed::
how to call RequestCachedResponseParams.Do() in cdp.Tasks - seems possible using cdp.ActionFunc()
how to get access to RequestCachedResponseParams.CacheID
If you want to get request response, that's how I managed to do it.
This sample call http://www.google.com and listen EventResponseReceived to keep Response that contains Headers for example.
package main
import (
"context"
"io/ioutil"
"log"
"os"
"time"
"github.com/chromedp/cdproto/network"
"github.com/chromedp/chromedp"
)
func main() {
dir, err := ioutil.TempDir("", "chromedp-example")
if err != nil {
panic(err)
}
defer os.RemoveAll(dir)
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.DisableGPU,
chromedp.NoDefaultBrowserCheck,
chromedp.Flag("headless", false),
chromedp.Flag("ignore-certificate-errors", true),
chromedp.Flag("window-size", "50,400"),
chromedp.UserDataDir(dir),
)
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
// also set up a custom logger
taskCtx, cancel := chromedp.NewContext(allocCtx, chromedp.WithLogf(log.Printf))
defer cancel()
// create a timeout
taskCtx, cancel = context.WithTimeout(taskCtx, 10*time.Second)
defer cancel()
// ensure that the browser process is started
if err := chromedp.Run(taskCtx); err != nil {
panic(err)
}
// listen network event
listenForNetworkEvent(taskCtx)
chromedp.Run(taskCtx,
network.Enable(),
chromedp.Navigate(`http://www.google.com`),
chromedp.WaitVisible(`body`, chromedp.BySearch),
)
}
func listenForNetworkEvent(ctx context.Context) {
chromedp.ListenTarget(ctx, func(ev interface{}) {
switch ev := ev.(type) {
case *network.EventResponseReceived:
resp := ev.Response
if len(resp.Headers) != 0 {
log.Printf("received headers: %s", resp.Headers)
}
}
// other needed network Event
})
}

I need help on this Golang web application using mysql as database

I am new to Golang and had been following some tutorials and I want to put into practice what I have learned to create a website
This is the main.go file
package main
import (
"html/template"
"net/http"
"log"
"database/sql"
_"github.com/go-sql-driver/mysql"
)
//Fetch all templates
var templates, templatesErr = template.ParseGlob("templates/*")
func main() {
PORT := ":9000"
log.Println("Listening to port", PORT)
http.HandleFunc("/", root)
http.HandleFunc("/facilities", allFacilities)
http.ListenAndServe(PORT, nil)
}
func root(w http.ResponseWriter, r *http.Request) {
rootData := make(map[string]string)
rootData["page_title"] = "iSpace Open Data"
rootData["body"] = ""
templates.ExecuteTemplate(w, "index.html", rootData)
}
type facility struct{
FacilityName string
Type string
}
func allFacilities(w http.ResponseWriter, r *http.Request){
db, err := sql.Open("mysql", "root:08swanzy#tcp(127.0.0.1:3306)/iod")
if err !=nil{
log.Fatal(err)
}
defer db.Close()
rows, err := db.Query("Select FacilityName, Type from health_facilities ")
if err != nil {
log.Fatal(err)
}
defer rows.Close()
fac := facility{}
facilities := []facility{}
for rows.Next(){
var FacilityName, Type string
rows.Scan(&FacilityName, &Type)
fac.FacilityName= FacilityName
fac.Type= Type
facilities = append(facilities, fac)
}
templates.ExecuteTemplate(w, "facilities.html", facilities)
}
This uses html files in templates folder for the view. But I keep on getting runtime error saying it has pointer dereference. I need help please.
Tried your code and got the same error. It happened on this line:
templates.ExecuteTemplate(w, "index.html", rootData)
The problem is that your templates are not loaded correctly. I moved template parsing to the main function and it works. Here the relevant code snippet:
//Fetch all templates
var (
templates *template.Template
)
func main() {
var err error
templates, err = template.ParseGlob("templates/*")
if err != nil {
panic(err)
}
PORT := ":9000"
log.Println("Listening to port", PORT)
http.HandleFunc("/", root)
http.HandleFunc("/facilities", allFacilities)
http.ListenAndServe(PORT, nil)
}

How can I implement my own interface for OpenID that uses a MySQL Database instead of In memory storage

So I'm trying to use the OpenID package for Golang, located here: https://github.com/yohcop/openid-go
In the _example it says that it uses in memory storage for storing the nonce/discoverycache information and that it will not free the memory and that I should implement my own version of them using some sort of database.
My database of choice is MySQL, I have tried to implement what I thought was correct (but is not, does not give me any compile errors, but crashes on runtime)
My DiscoveryCache.go is as such:
package openid
import (
"database/sql"
"log"
//"time"
_ "github.com/go-sql-driver/mysql"
"github.com/yohcop/openid-go"
)
type SimpleDiscoveredInfo struct {
opEndpoint, opLocalID, claimedID string
}
func (s *SimpleDiscoveredInfo) OpEndpoint() string { return s.opEndpoint }
func (s *SimpleDiscoveredInfo) OpLocalID() string { return s.opLocalID }
func (s *SimpleDiscoveredInfo) ClaimedID() string { return s.claimedID }
type SimpleDiscoveryCache struct{}
func (s SimpleDiscoveryCache) Put(id string, info openid.DiscoveredInfo) {
/*
db, err := sql.Query("mysql", "db:connectinfo")
errCheck(err)
rows, err := db.Query("SELECT opendpoint, oplocalid, claimedid FROM discovery_cache")
errCheck(err)
was unsure what to do here because I'm not sure how to
return the info properly
*/
log.Println(info)
}
func (s SimpleDiscoveryCache) Get(id string) openid.DiscoveredInfo {
db, err := sql.Query("mysql", "db:connectinfo")
errCheck(err)
var sdi = new(SimpleDiscoveredInfo)
err = db.QueryRow("SELECT opendpoint, oplocalid, claimedid FROM discovery_cache WHERE id=?", id).Scan(&sdi)
errCheck(err)
return sdi
}
And my Noncestore.go
package openid
import (
"database/sql"
"errors"
"flag"
"fmt"
"time"
_ "github.com/go-sql-driver/mysql"
)
var maxNonceAge = flag.Duration("openid-max-nonce-age",
60*time.Second,
"Maximum accepted age for openid nonces. The bigger, the more"+
"memory is needed to store used nonces.")
type SimpleNonceStore struct{}
func (s *SimpleNonceStore) Accept(endpoint, nonce string) error {
db, err := sql.Open("mysql", "dbconnectinfo")
errCheck(err)
if len(nonce) < 20 || len(nonce) > 256 {
return errors.New("Invalid nonce")
}
ts, err := time.Parse(time.RFC3339, nonce[0:20])
errCheck(err)
rows, err := db.Query("SELECT * FROM noncestore")
defer rows.Close()
now := time.Now()
diff := now.Sub(ts)
if diff > *maxNonceAge {
return fmt.Errorf("Nonce too old: %ds", diff.Seconds())
}
d := nonce[20:]
for rows.Next() {
var timeDB, nonce string
err := rows.Scan(&nonce, &timeDB)
errCheck(err)
dbTime, err := time.Parse(time.RFC3339, timeDB)
errCheck(err)
if dbTime == ts && nonce == d {
return errors.New("Nonce is already used")
}
if now.Sub(dbTime) < *maxNonceAge {
_, err := db.Query("INSERT INTO noncestore SET nonce=?, time=?", &nonce, dbTime)
errCheck(err)
}
}
return nil
}
func errCheck(err error) {
if err != nil {
panic("We had an error!" + err.Error())
}
}
Then I try to use them in my main file as:
import _"github.com/mysqlOpenID"
var nonceStore = &openid.SimpleNonceStore{}
var discoveryCache = &openid.SimpleDiscoveryCache{}
I get no compile errors but it crashes
I'm sure you'll look at my code and go what the hell (I'm fairly new and only have a week or so experience with Golang so please feel free to correct anything)
Obviously I have done something wrong, I basically looked at the NonceStore.go and DiscoveryCache.go on the github for OpenId, replicated it, but replaced the map with database insert and select functions
IF anybody can point me in the right direction on how to implement this properly that would be much appreciated, thanks! If you need anymore information please ask.
Ok. First off, I don't believe you that the code compiles.
Let's look at some mistakes, shall we?
db, err := sql.Open("mysql", "dbconnectinfo")
This line opens a database connection. It should only be used once, preferably inside an init() function. For example,
var db *sql.DB
func init() {
var err error
// Now the db variable above is automagically set to the left value (db)
// of sql.Open and the "var err error" above is the right value (err)
db, err = sql.Open("mysql", "root#tcp(127.0.0.1:3306)")
if err != nil {
panic(err)
}
}
Bang. Now you're connected to your MySQL database.
Now what?
Well this (from Get) is gross:
db, err := sql.Query("mysql", "db:connectinfo")
errCheck(err)
var sdi = new(SimpleDiscoveredInfo)
err = db.QueryRow("SELECT opendpoint, oplocalid, claimedid FROM discovery_cache WHERE id=?", id).Scan(&sdi)
errCheck(err)
Instead, it should be this:
// No need for a pointer...
var sdi SimpleDiscoveredInfo
// Because we take the address of 'sdi' right here (inside Scan)
// And that's a useless (and potentially problematic) layer of indirection.
// Notice how I dropped the other "db, err := sql.Query" part? We don't
// need it because we've already declared "db" as you saw in the first
// part of my answer.
err := db.QueryRow("SELECT ...").Scan(&sdi)
if err != nil {
panic(err)
}
// Return the address of sdi, which means we're returning a pointer
// do wherever sdi is inside the heap.
return &sdi
Up next is this:
/*
db, err := sql.Query("mysql", "db:connectinfo")
errCheck(err)
rows, err := db.Query("SELECT opendpoint, oplocalid, claimedid FROM discovery_cache")
errCheck(err)
was unsure what to do here because I'm not sure how to
return the info properly
*/
If you've been paying attention, we can drop the first sql.Query line.
Great, now we just have:
rows, err := db.Query("SELECT ...")
So, why don't you do what you did inside the Accept method and parse the rows using for rows.Next()... ?

How to make sure the PC is having internet and then launch exec?

I have this application running, when system boots Windows 8.1 then it launch. But often the PC get into the network later as a result Google Chrome shows a failed page.
package main
import "os"
import "os/exec"
import "runtime"
import "encoding/json"
type Configuration struct {
main []string
name []string
window []string
}
func main() {
myos := runtime.GOOS;
myarch := runtime.GOARCH;
var chrome = "";
var cmdopen *exec.Cmd;
if myos == "windows" {
if myarch == "386" {
chrome = "C:/Program Files (x86)/Google/Chrome/Application/chrome.exe";
} else {
chrome = "C:/Program Files (x86)/Google/Chrome/Application/chrome.exe";
}
// Read config
file, _ := os.Open("C:/Program Files (x86)/abc/package.json");
decoder := json.NewDecoder(file);
configuration := Configuration{};
err := decoder.Decode(&configuration);
if err != nil {
println("error: ", err);
}
println(configuration.main);
// BUG!!!!!!!!!!!!!!!!!!! But make sure local network or internet is available do not just execute the chrome like idiot, which is showing dead page
cmdopen = exec.Command(chrome, "--app=http://icanhazip.com");
err1 := cmdopen.Start();
if err1 != nil {
println("Failed: ", err1);
}
} else {
println("Incompatible");
}
}
you could do an http.Get()
func hazInternet() bool {
res, err := http.Get("http://www.google.com/robots.txt")
if err != nil {
log.Println(err)
return false
}
res.Body.Close()
return true
}
Go PlayGround