How to block css and image request in chromedp like puppeteer RequestInterception? - google-chrome

I am trying to block CSS and images request in chromedp but I can't find proper details or code about that. if anyone know please msg me some reference code.
in puppeteer, I am using below code to block image and CSS.
await page.on('request', (req) => {
if (
req.resourceType() == 'stylesheet' ||
req.resourceType() == 'font' ||
req.resourceType() == 'image' ||
req.resourceType() == 'media'
) {
req.abort();
} else {
req.continue();
}
});
As per below github method, I tried but not work.
https://github.com/chromedp/chromedp/issues/260
package main
import (
"context"
"io/ioutil"
"os"
"time"
"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/cdproto/fetch"
"github.com/chromedp/cdproto/network"
"github.com/chromedp/chromedp"
)
func main() {
dir, err := ioutil.TempDir("", "chromedp-example")
if err != nil {
panic(err)
}
defer os.RemoveAll(dir)
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.DisableGPU,
chromedp.NoDefaultBrowserCheck,
chromedp.Flag("headless", false),
chromedp.Flag("ignore-certificate-errors", true),
chromedp.UserDataDir(dir),
)
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
// also set up a custom logger
taskCtx, cancel := chromedp.NewContext(allocCtx)
defer cancel()
// create a timeout
taskCtx, cancel = context.WithTimeout(taskCtx, 100*time.Second)
defer cancel()
// ensure that the browser process is started
if err := chromedp.Run(taskCtx); err != nil {
panic(err)
}
// listen network event
chromedp.ListenTarget(taskCtx, DisableImageLoad(taskCtx))
chromedp.Run(taskCtx,
network.Enable(),
fetch.Enable(),
chromedp.Navigate(`http://www.yahoo.com`),
chromedp.WaitVisible(`body`, chromedp.BySearch),
)
}
func DisableImageLoad(ctx context.Context) func(event interface{}) {
return func(event interface{}) {
switch ev := event.(type) {
case *fetch.EventRequestPaused:
go func() {
c := chromedp.FromContext(ctx)
ctx := cdp.WithExecutor(ctx, c.Target)
if ev.ResourceType == network.ResourceTypeImage {
fetch.FailRequest(ev.RequestID, network.ErrorReasonConnectionAborted).Do(ctx)
} else {
fetch.ContinueRequest(ev.RequestID).Do(ctx)
}
}()
}
}
}

Related

Loading CSV data on server, converting data to JSON and getting result using Json Queries using Golang

I am trying to build a TCP server that loads dataset from a CSV file and provide an interface to query the dataset. TCP server will expose port 4040. CSV file contains the following columns related to corona virus cases:
Cumulative Test Positive
Cumulative Tests Performed
Date
Discharged
Expired
Admitted
Region
Users should be able to connect to the server using NetCat nc localhost 4040 command on Linux/Unix based systems.
Once connected to TCP, the user should be able to communicate with the application by sending queries in JSON format.
{
"query": {
"region": "Sindh"
}
}
{
"query": {
"date": "2020-03-20"
}
}
My server.go
package main
import (
"fmt"
"net"
"os"
"flag"
"log"
"encoding/csv"
"encoding/json"
"bufio"
"io"
"strings"
)
type CovidPatient struct {
Positive string `json:"Covid_Positive"`
Performed string `json:"Coivd_Performed"`
Date string `json:"Covid_Date"`
Discharged string `json:"Covid_Discharged"`
Expired string `json:"Covid_Expired"`
Region string `json:"Covid_Region"`
Admitted string `json:"Covid_Admitted"`
}
type DataRequest struct {
Get string `json:"get"`
}
type DataError struct {
Error string `json:"Covid_error"`
}
func Load(path string) []CovidPatient {
table := make([]CovidPatient, 0)
var patient CovidPatient
file, err := os.Open(path)
if err != nil {
panic(err.Error())
}
defer file.Close()
reader := csv.NewReader(file)
csvData, err := reader.ReadAll()
if err != nil {
fmt.Println(err)
os.Exit(1)
}
for _, row := range csvData{
patient.Positive = row[0]
patient.Performed = row[1]
patient.Date = row[2]
patient.Discharged = row[3]
patient.Expired = row[4]
patient.Region = row[5]
patient.Admitted = row[6]
table = append(table, patient)
}
return table
}
func Find(table []CovidPatient, filter string) []CovidPatient {
if filter == "" || filter == "*" {
return table
}
result := make([]CovidPatient, 0)
filter = strings.ToUpper(filter)
for _, cp := range table {
if cp.Date == filter ||
cp.Region == filter ||
strings.Contains(strings.ToUpper(cp.Positive), filter) ||
strings.Contains(strings.ToUpper(cp.Performed), filter) ||
strings.Contains(strings.ToUpper(cp.Date), filter) ||
strings.Contains(strings.ToUpper(cp.Discharged), filter) ||
strings.Contains(strings.ToUpper(cp.Expired), filter) ||
strings.Contains(strings.ToUpper(cp.Region), filter) ||
strings.Contains(strings.ToUpper(cp.Admitted), filter){
result = append(result, cp)
}
}
return result
}
var (
patientsDetail = Load("./covid_final_data.csv")
)
func main(){
var addr string
var network string
flag.StringVar(&addr, "e", ":4040", "service endpoint [ip addr or socket path]")
flag.StringVar(&network, "n", "tcp", "network protocol [tcp,unix]")
flag.Parse()
switch network {
case "tcp", "tcp4", "tcp6", "unix":
default:
fmt.Println("unsupported network protocol")
os.Exit(1)
}
ln, err := net.Listen(network, addr)
if err != nil {
log.Println(err)
os.Exit(1)
}
defer ln.Close()
log.Println("Covid19 Condition in Pakistan")
log.Printf("Service started: (%s) %s\n", network, addr)
for {
conn, err := ln.Accept()
if err != nil {
log.Println(err)
conn.Close()
continue
}
log.Println("Connected to ", conn.RemoteAddr())
go handleConnection(conn)
}
}
func handleConnection(conn net.Conn) {
defer func() {
if err := conn.Close(); err != nil {
log.Println("error closing connection:", err)
}
}()
reader := bufio.NewReaderSize(conn, 4)
for {
buf, err := reader.ReadSlice('}')
if err != nil {
if err != io.EOF {
log.Println("connection read error:", err)
return
}
}
reader.Reset(conn)
var req DataRequest
if err := json.Unmarshal(buf, &req); err != nil {
log.Println("failed to unmarshal request:", err)
cerr, jerr := json.Marshal(DataError{Error: err.Error()})
if jerr != nil {
log.Println("failed to marshal DataError:", jerr)
continue
}
if _, werr := conn.Write(cerr); werr != nil {
log.Println("failed to write to DataError:", werr)
return
}
continue
}
result := Find(patientsDetail, req.Get)
rsp, err := json.Marshal(&result)
if err != nil {
log.Println("failed to marshal data:", err)
if _, err := fmt.Fprintf(conn, `{"data_error":"internal error"}`); err != nil {
log.Printf("failed to write to client: %v", err)
return
}
continue
}
if _, err := conn.Write(rsp); err != nil {
log.Println("failed to write response:", err)
return
}
}
}
This correctly loads the csv and convert it into JSON. But, when I try to run query using NetCat command it return empty JSON element. Kindly guide me where is error.
Guess you want this:
╭─root#DESKTOP-OCDRD7Q ~
╰─# nc localhost 4040
{"get": "Sindh"}
[{"Covid_Positive":"1","Coivd_Performed":"1","Covid_Date":"1","Covid_Discharged":"1","Covid_Expired":"1","Covid_Region":"Sindh","Covid_Admitted":"1"}]
What you should do is just to modify your json request.
package main
import (
"bufio"
"encoding/csv"
"encoding/json"
"flag"
"fmt"
"io"
"log"
"net"
"os"
)
type CovidPatient struct {
Positive string `json:"Covid_Positive"`
Performed string `json:"Coivd_Performed"`
Date string `json:"Covid_Date"`
Discharged string `json:"Covid_Discharged"`
Expired string `json:"Covid_Expired"`
Region string `json:"Covid_Region"`
Admitted string `json:"Covid_Admitted"`
}
type DataRequest struct {
Get CovidPatient `json:"get"`
}
type DataError struct {
Error string `json:"Covid_error"`
}
func Load(path string) []CovidPatient {
table := make([]CovidPatient, 0)
var patient CovidPatient
file, err := os.Open(path)
if err != nil {
panic(err.Error())
}
defer file.Close()
reader := csv.NewReader(file)
csvData, err := reader.ReadAll()
if err != nil {
fmt.Println(err)
os.Exit(1)
}
for _, row := range csvData {
patient.Positive = row[0]
patient.Performed = row[1]
patient.Date = row[2]
patient.Discharged = row[3]
patient.Expired = row[4]
patient.Region = row[5]
patient.Admitted = row[6]
table = append(table, patient)
}
return table
}
func Find(table []CovidPatient, filter CovidPatient) []CovidPatient {
result := make([]CovidPatient, 0)
log.Println(filter, table)
for _, cp := range table {
if filter.Positive == "" {
} else if filter.Positive != cp.Positive {
continue
}
if filter.Performed == "" {
} else if filter.Performed != cp.Performed {
continue
}
if filter.Date == "" {
} else if filter.Date != cp.Date {
continue
}
if filter.Discharged == "" {
} else if filter.Discharged != cp.Discharged {
continue
}
if filter.Expired == "" {
} else if filter.Expired != cp.Expired {
continue
}
if filter.Region == "" {
} else if filter.Region != cp.Region {
continue
}
if filter.Admitted == "" {
} else if filter.Admitted != cp.Admitted {
continue
}
result = append(result, cp)
}
return result
}
var (
patientsDetail = Load("./covid_final_data.csv")
)
func main() {
log.SetFlags(log.Lshortfile | log.Ltime)
var addr string
var network string
flag.StringVar(&addr, "e", ":4040", "service endpoint [ip addr or socket path]")
flag.StringVar(&network, "n", "tcp", "network protocol [tcp,unix]")
flag.Parse()
switch network {
case "tcp", "tcp4", "tcp6", "unix":
default:
fmt.Println("unsupported network protocol")
os.Exit(1)
}
ln, err := net.Listen(network, addr)
if err != nil {
log.Println(err)
os.Exit(1)
}
defer ln.Close()
log.Println("Covid19 Condition in Pakistan")
log.Printf("Service started: (%s) %s\n", network, addr)
for {
conn, err := ln.Accept()
if err != nil {
log.Println(err)
conn.Close()
continue
}
log.Println("Connected to ", conn.RemoteAddr())
go handleConnection(conn)
}
}
func handleConnection(conn net.Conn) {
defer func() {
if err := conn.Close(); err != nil {
log.Println("error closing connection:", err)
}
}()
reader := bufio.NewReaderSize(conn, 100)
for {
buf, err := reader.ReadBytes('|')
if err != nil {
if err != io.EOF {
log.Println("connection read error:", err)
return
}
}
reader.Reset(conn)
var req DataRequest
if err := json.Unmarshal(buf[:len(buf)-1], &req); err != nil {
log.Println("failed to unmarshal request:", string(buf), err)
cerr, jerr := json.Marshal(DataError{Error: err.Error()})
if jerr != nil {
log.Println("failed to marshal DataError:", jerr)
continue
}
if _, werr := conn.Write(cerr); werr != nil {
log.Println("failed to write to DataError:", werr)
return
}
continue
}
result := Find(patientsDetail, req.Get)
rsp, err := json.Marshal(&result)
if err != nil {
log.Println("failed to marshal data:", err)
if _, err := fmt.Fprintf(conn, `{"data_error":"internal error"}`); err != nil {
log.Printf("failed to write to client: %v", err)
return
}
continue
}
if _, err := conn.Write(rsp); err != nil {
log.Println("failed to write response:", err)
return
}
}
}
The query is:
╭─root#DESKTOP-OCDRD7Q ~
╰─# nc localhost 4040 127 ↵
{
"get": {
"Covid_Region": "Sindh",
"Covid_Date": "2020-03-20"
}
}|
[{"Covid_Positive":"1","Coivd_Performed":"1","Covid_Date":"2020-03-20","Covid_Discharged":"1","Covid_Expired":"1","Covid_Region":"Sindh","Covid_Admitted":"1"}]
Inside function handleConnection, the first thing is "read until you find the first }", imagine the user is sending the request:
{ "get": { "Covid_Region": "Sindh", "Covid_Date": "2020-03-20" } }
then that step read:
{ "get": { "Covid_Region": "Sindh", "Covid_Date": "2020-03-20" }
Notice the trailing } is missing, then the json.Unmarshal is trying to unmarshal the query without the last } (which is an invalid json).
This problem can take advantage of JSON streaming decoding, in other words, use json.NewDecoder(r io.Reader) instead of json.Unmarshal. Let me copy and modify the first part of that function:
func handleConnection(conn net.Conn) {
defer func() {
if err := conn.Close(); err != nil {
log.Println("error closing connection:", err)
}
}()
jsonDecoder := json.NewDecoder(conn) // A json decoder read a stream to find a
// valid JSON and stop just the byte
// after the JSON ends. Process can be
// repeated.
for {
var req DataRequest
err := jsonDecoder.Decode(&req)
if err == io.EOF {
log.Println("finish")
return
}
if err != nil {
log.Println("unmarshal:", err)
return
}
result := Find(patientsDetail, req.Get) // Here query the system
// ...
Probably now it works, but you can also take advantage of json streaming to send the response back with a jsonEncoder := json.NewEncoder(conn) before de for loop and sending the request like this:
err := jsonEncoder.Encode(&result)
if err != nil {
log.Println("failed to marshal data:", err)
// ...
continue
}

create and write two CSV simultaneously golang

I am trying to create two CSV files and write both simultaneously. This code doesn't work that way, but it's just to exemplify what I want to do. Since (* csv.Writer) is a pointer I don't know how I can differentiate it from another file. Is it possible to do that?
package test
import (
"encoding/csv"
"os"
"sync"
"time"
)
var csvSuccess *os.File
var csvError *os.File
var csvErr error
var csvWriterSuccess *csv.Writer
var csvWriterError *csv.Writer
var mutex *sync.Mutex
func init() {
csvSuccess, csvErr = os.Create("sucess-result.csv")
csvError, csvErr = os.Create("error-result.csv")
if csvErr != nil {
panic("Error open file")
}
csvWriterSuccess = csv.NewWriter(csvSuccess)
csvWriterError = csv.NewWriter(csvSuccess)
mutex = &sync.Mutex{}
}
func WriteRecordSuccess(record []string) {
mutex.Lock()
if err := csvWriterSuccess.Write(record); err != nil {
if err != nil {
panic("Error writing success record")
}
csvWriterSuccess.Flush()
}
mutex.Unlock()
}
func WriteRecordError(record []string) {
mutex.Lock()
if err := csvWriterError.Write(record); err != nil {
if err != nil {
panic("Error writing error record")
}
csvWriterError.Flush()
}
mutex.Unlock()
}
func ClosecsvFile() {
csvWriterSuccess.Flush()
csvWriterError.Flush()
csvSuccess.Sync()
csvError.Sync()
csvSuccess.Close()
csvError.Close()
}
In your init function you have both writers writing to the success file.
csvWriterSuccess = csv.NewWriter(csvSuccess)
csvWriterError = csv.NewWriter(csvSuccess)
Write to the success and error files separately.
csvWriterSuccess = csv.NewWriter(csvSuccess)
csvWriterError = csv.NewWriter(csvError)

How to get the HTTP response body using chromedp?

Using github.com/knq/chromedp, a go package to drive web browsers using Chrome Debugging Protocol, I can navigate to webpages, update forms and submit forms, but I need to retrieve a HTTP response body and haven't figured out how to yet. I'd like to be able to retrieve the HTTP response body for a JSON response (not HTML).
From looking in the code, it seems the HTTP response body is in the CachedResponse.Body property:
https://github.com/knq/chromedp/blob/b9e4c14157325be092c1c1137edbd584648d8c72/cdp/cachestorage/types.go#L30
And that it should be accessible using:
func (p *RequestCachedResponseParams) Do(ctxt context.Context, h cdp.Handler) (response *CachedResponse, err error)
https://github.com/knq/chromedp/blob/b9e4c14157325be092c1c1137edbd584648d8c72/cdp/cachestorage/cachestorage.go#L168
The examples use cdp.Tasks such as the following from the simple example.
func googleSearch(q, text string, site, res *string) cdp.Tasks {
var buf []byte
sel := fmt.Sprintf(`//a[text()[contains(., '%s')]]`, text)
return cdp.Tasks{
cdp.Navigate(`https://www.google.com`),
cdp.Sleep(2 * time.Second),
cdp.WaitVisible(`#hplogo`, cdp.ByID),
cdp.SendKeys(`#lst-ib`, q+"\n", cdp.ByID),
cdp.WaitVisible(`#res`, cdp.ByID),
cdp.Text(sel, res),
cdp.Click(sel),
cdp.Sleep(2 * time.Second),
cdp.WaitVisible(`#footer`, cdp.ByQuery),
cdp.WaitNotVisible(`div.v-middle > div.la-ball-clip-rotate`, cdp.ByQuery),
cdp.Location(site),
cdp.Screenshot(`#testimonials`, &buf, cdp.ByID),
cdp.ActionFunc(func(context.Context, cdptypes.Handler) error {
return ioutil.WriteFile("testimonials.png", buf, 0644)
}),
}
}
https://github.com/knq/chromedp/blob/b9e4c14157325be092c1c1137edbd584648d8c72/examples/simple/main.go
It seems like the CachedResponse.Body can be accessed by calling RequestCachedResponseParams.Do() by referencing RequestCachedResponseParams.CacheID, but the following is still needed::
how to call RequestCachedResponseParams.Do() in cdp.Tasks - seems possible using cdp.ActionFunc()
how to get access to RequestCachedResponseParams.CacheID
If you want to get request response, that's how I managed to do it.
This sample call http://www.google.com and listen EventResponseReceived to keep Response that contains Headers for example.
package main
import (
"context"
"io/ioutil"
"log"
"os"
"time"
"github.com/chromedp/cdproto/network"
"github.com/chromedp/chromedp"
)
func main() {
dir, err := ioutil.TempDir("", "chromedp-example")
if err != nil {
panic(err)
}
defer os.RemoveAll(dir)
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.DisableGPU,
chromedp.NoDefaultBrowserCheck,
chromedp.Flag("headless", false),
chromedp.Flag("ignore-certificate-errors", true),
chromedp.Flag("window-size", "50,400"),
chromedp.UserDataDir(dir),
)
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
// also set up a custom logger
taskCtx, cancel := chromedp.NewContext(allocCtx, chromedp.WithLogf(log.Printf))
defer cancel()
// create a timeout
taskCtx, cancel = context.WithTimeout(taskCtx, 10*time.Second)
defer cancel()
// ensure that the browser process is started
if err := chromedp.Run(taskCtx); err != nil {
panic(err)
}
// listen network event
listenForNetworkEvent(taskCtx)
chromedp.Run(taskCtx,
network.Enable(),
chromedp.Navigate(`http://www.google.com`),
chromedp.WaitVisible(`body`, chromedp.BySearch),
)
}
func listenForNetworkEvent(ctx context.Context) {
chromedp.ListenTarget(ctx, func(ev interface{}) {
switch ev := ev.(type) {
case *network.EventResponseReceived:
resp := ev.Response
if len(resp.Headers) != 0 {
log.Printf("received headers: %s", resp.Headers)
}
}
// other needed network Event
})
}

Why does this program not print anything?

I'm trying to use Go to parse html. I would like to print the html to the terminal and I don't understand why this doesn't print anything:
package main
import (
"fmt"
"log"
"net/http"
"golang.org/x/net/html"
)
func main() {
r, err := http.Get("https://google.com")
if err != nil {
log.Panicln(err)
}
defer func() {
err := r.Body.Close()
if err != nil {
fmt.Println(err)
}
}()
node, err := html.Parse(r.Body)
if err != nil {
log.Panicln(err)
}
fmt.Println(node.Data)
}
I know there are different ways to print the html, but I don't understand why this in particular never prints anything no matter what website I use. Is this intended behavior?
Docs:
https://godoc.org/golang.org/x/net/html#Node
https://github.com/golang/net/blob/master/html/node.go#L38
Because it's a tree of the HTML. Upper level is empty.
For example if you need parse all url from html:
package main
import (
"fmt"
"log"
"net/http"
"golang.org/x/net/html"
)
func main() {
r, err := http.Get("https://google.com")
if err != nil {
log.Panicln(err)
}
defer func() {
err := r.Body.Close()
if err != nil {
fmt.Println(err)
}
}()
node, err := html.Parse(r.Body)
if err != nil {
log.Panicln(err)
}
fmt.Println(node.Data)
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
for _, a := range n.Attr {
if a.Key == "href" {
fmt.Println(a.Val)
break
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(node)
}
It is because html.Parse returns a tree of connected nodes. And the root node is of type "document" which has no data inside of it.
Simplistic example of how to walk the tree:
package main
import (
"fmt"
"golang.org/x/net/html"
"strings"
)
func nodeTypeAsString(nodeType html.NodeType) string{
switch(nodeType){
case html.ErrorNode : return "ErrorNode"
case html.TextNode : return "TextNode"
case html.DocumentNode : return "DocumentNode"
case html.ElementNode : return "ElementNode"
case html.CommentNode : return "CommentNode"
case html.DoctypeNode: return "DoctypeNode"
}
return "UNKNOWN"
}
func main() {
s := "<html><body><p>Some content</p></body></html>"
node, err := html.Parse(strings.NewReader(s))
if err != nil {
panic(err.Error())
}
// Root node
fmt.Printf("NodeType=%s Data=%s\n",nodeTypeAsString(node.Type),node.Data)
// Step deeper
node = node.FirstChild
fmt.Printf("NodeType=%s Data=%s\n",nodeTypeAsString(node.Type),node.Data)
// Step deeper
node = node.FirstChild
fmt.Printf("NodeType=%s Data=%s\n",nodeTypeAsString(node.Type),node.Data)
// Step over to sibling
node = node.NextSibling
fmt.Printf("NodeType=%s Data=%s\n",nodeTypeAsString(node.Type),node.Data)
// Step deeper
node = node.FirstChild
fmt.Printf("NodeType=%s Data=%s\n",nodeTypeAsString(node.Type),node.Data)
// Step deeper
node = node.FirstChild
fmt.Printf("NodeType=%s Data=%s\n",nodeTypeAsString(node.Type),node.Data)
}
OUTPUT:
NodeType=DocumentNode Data=
NodeType=ElementNode Data=html
NodeType=ElementNode Data=head
NodeType=ElementNode Data=body
NodeType=ElementNode Data=p
NodeType=TextNode Data=Some content

How to make sure the PC is having internet and then launch exec?

I have this application running, when system boots Windows 8.1 then it launch. But often the PC get into the network later as a result Google Chrome shows a failed page.
package main
import "os"
import "os/exec"
import "runtime"
import "encoding/json"
type Configuration struct {
main []string
name []string
window []string
}
func main() {
myos := runtime.GOOS;
myarch := runtime.GOARCH;
var chrome = "";
var cmdopen *exec.Cmd;
if myos == "windows" {
if myarch == "386" {
chrome = "C:/Program Files (x86)/Google/Chrome/Application/chrome.exe";
} else {
chrome = "C:/Program Files (x86)/Google/Chrome/Application/chrome.exe";
}
// Read config
file, _ := os.Open("C:/Program Files (x86)/abc/package.json");
decoder := json.NewDecoder(file);
configuration := Configuration{};
err := decoder.Decode(&configuration);
if err != nil {
println("error: ", err);
}
println(configuration.main);
// BUG!!!!!!!!!!!!!!!!!!! But make sure local network or internet is available do not just execute the chrome like idiot, which is showing dead page
cmdopen = exec.Command(chrome, "--app=http://icanhazip.com");
err1 := cmdopen.Start();
if err1 != nil {
println("Failed: ", err1);
}
} else {
println("Incompatible");
}
}
you could do an http.Get()
func hazInternet() bool {
res, err := http.Get("http://www.google.com/robots.txt")
if err != nil {
log.Println(err)
return false
}
res.Body.Close()
return true
}
Go PlayGround