I am trying to develop a web application that parses JSON file AWS S3, Finally, save my local database. I don't save JSON files on my local machine. I want to read the JSON files directly from S3. Now I'm struggling to read JSON files, I cant read that parse JSON file. My workflow is 1.First read JSON file from amazon S3 withdout saving JSON file reading local machine 2.save on my DB
func (s *Server) awsDataDownload(res http.ResponseWriter, req *http.Request) {
AWSRegion := "eu-west-2"
AccessKeyID := "XXXXXXXXXXXX"
SecretAccessKey := "XXXXXXXXXXXXXXXXXX"
AWSBucketName := "sample-prod"
_, err := envcfg.New()
if err != nil {
s.logger.WithError(err).Infof("Failed to initalize AWS configuration")
http.Error(res, "Failed to initalize AWS configuration", http.StatusInternalServerError)
return
}
userS3, err := s3.NewS3(s3.Config{
Region: AWSRegion,
AccessKeyID: AccessKeyID,
SecretAccessKey: SecretAccessKey,
Buckets: map[string]string{
"userS3": AWSBucketName,
},
})
if err != nil {
s.logger.WithError(err).Infof("Failed to connect with AWS")
http.Error(res, "Failed to connect with AWS", http.StatusInternalServerError)
return
}
files, err := userS3.GetAllFiles(req.Context(), "userS3", "")
if err != nil {
s.logger.WithError(err).Infof("cannot load files from AWS")
http.Error(res, "Failed to download with AWS", http.StatusInternalServerError)
return
}
for _, f := range files {
awsUploadFile := AWSUploadedFile{
FileName: f.FileName,
URL: f.URL,
Created: time.Time{},
Size: f.Size,
}
// json file match
awsURLmatchJson := regexp.MustCompile(`.+?(.json)`).FindStringSubmatch(awsUploadFile.URL)[0]
//companiesNameMatch := regexp.MustCompile(`.+?(.companies)`).FindStringSubmatch(awsUploadFile.URL)
// date match
awsURLmatchDateJson := regexp.MustCompile(`\d{4}-\d{2}-\d{2}`).FindStringSubmatch(awsUploadFile.URL)[0]
//s.logger.Infof("%+v", awsURLmatchJson)
//s.logger.Infof("%+v", awsURLmatchDateJson)
if awsURLmatchDateJson == "2021-09-30" {
s.fctDataDownload(res,req,awsURLmatchJson,awsUploadFile.URL, awsUploadFile.FileName)
}
}
//s.logger.Info(":::::::::::::::::::::::AWS::::::::::::::::::::::::::::")
//s.logger.Info(files)
//s.logger.Infof("%+v", files)
}
Now, I got all files name, URL from this. Now I want to read JSON from getting this JSON file. this is my code
func (s *Server) fctDataDownload(res http.ResponseWriter, req *http.Request, awsURLmatchJson, URL, FileName string) {
s.fctOrgProfile(res, req, awsURLmatchJson, URL, FileName)
// s.fctOrgPeople(res, req)
// s.fctOrgFundings(res, req)
}
func (s *Server) fctOrgProfile(res http.ResponseWriter, req *http.Request, awsURLmatchJson, URL, FileName string) {
s.logger.Infof("awsURLmatchJson %+v", awsURLmatchJson)
//orgProfile := "/home/asad/go/src/codemen.org/json-dump-data/companies/companies.json"
companiesNameMatch := regexp.MustCompile(`.+?(.people)`).FindStringSubmatch(awsURLmatchJson)
s.logger.Infof("%+v", companiesNameMatch)
if len(companiesNameMatch) < 1 {
return
}
s.logger.Errorf(" companiesNameMatch %+v", companiesNameMatch)
s.logger.Errorf(" FileName %+v", FileName)
//orgProfile := companiesNameMatch
jsonFile, err := os.Open(FileName)
if err != nil {
fmt.Printf("failed to open json file: %s, error: %v", FileName, err)
s.logger.Errorf("%+v", companiesNameMatch)
return
}
//Rest code
But I'm getting this error
ERRO[0005] FileName part-00000-c6c78231-f8ca-498c-b949-472b53074a57-c000.json
failed to open json file: part-00000-c6c78231-f8ca-498c-b949-472b53074a57-c000.json, error: open part-00000-c6c78231-f8ca-498c-b949-472b53074a57-c000.json: no such file or directoryERRO[0005] [https://fct-apix-data-dump-prod.s3.eu-west-2.amazonaws.com/date%3D2021-09-30/people /people]
JSON file is stored in S3, you can't read it with os.Open
jsonFile, err := os.Open(FileName)
replace it with s3.GetObject
Related
Highlevel:my program gets a long URL and makes it shorter(like tinyurl). But i have problem passing Long URL variable and shortURL variable from function to a mySQL database. I have tried looking it up on internet but noluck and information im getting is not close to what i have. im only posting some parts of my code the full code will be in playground
var db *sql.DB //global variable
func main(){
var db, err = sql.Open("mysql", dsn())
if err != nil {
fmt.Println(err.Error())
} else {
fmt.Printf("Connection established to MYSQL server\n%s\n", dsn())
}
defer db.Close()
linkList = map[string]string{}
http.HandleFunc("/link", addLink)
http.HandleFunc("/hpe/", getLink)
http.HandleFunc("/", Home)
// Flags to pass string ip or port to WEB app
ip := flag.String("i", "0.0.0.0", "")
port := flag.String("p", "8080", "")
flag.Parse()
fmt.Printf("Web application listening on %s \n", net.JoinHostPort(*ip, *port))
log.Fatal(http.ListenAndServe(net.JoinHostPort(*ip, *port), nil))
}
function that creates short url. Every time this func is called it produces link
func addLink(w http.ResponseWriter, r *http.Request) {
log.Println("Add Link")
key, ok := r.URL.Query()["ik"]
if ok {
if !validLink(key[0]) {
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, "Could not create shortlink need absolute path link.")
return
}
log.Println(key)
if _, ok := linkList[key[0]]; !ok {
genString := randomString(5)
linkList[genString] = key[0]
w.Header().Set("Content-Type", "text/html")
w.WriteHeader(http.StatusAccepted)
linkString := fmt.Sprintf("hpe/%s", genString, genString)
fmt.Fprintf(w, "Added shortlink\n")
fmt.Fprintf(w, linkString)
return
// database function
defer db.Close()
s:
result, err := db.Exec("insert into Url (LongUrl, ShortUrl) value(?,?);",genString,linkString)
if err != nil {
fmt.Print(err.Error())
} else {
_,err:=result.LastInsertId()
}
}
w.WriteHeader(http.StatusConflict)
fmt.Fprintf(w, "Already have this link")
return
}
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, "Failed to add link")
return
}
func getLink(w http.ResponseWriter, r *http.Request) {
path := r.URL.Path
log.Println("Get Link:", path)
pathArgs := strings.Split(path, "/")
if len(pathArgs[2]) < 1 {
w.WriteHeader(http.StatusNotFound)
http.Redirect(w, r, "0.0.0.0:8080", http.StatusTemporaryRedirect)
return
}
log.Printf("Redirected to: %s", linkList[pathArgs[2]])
http.Redirect(w, r, linkList[pathArgs[2]], http.StatusTemporaryRedirect)
//fmt.Printf("all %s", linkList)
return
}
My expectation is when func addlink gets called the info that generated from long and short url gets put into database like in the code.
I wrote some code which hits one public API and saves the JSON output in a file. But the data is storing line by line into the file instead of a single JSON format.
For eg.
Current Output:
{"ip":"1.1.1.1", "Country":"US"}
{"ip":"8.8.8.8", "Country":"IN"}
Desired Output:
[
{"ip":"1.1.1.1", "Country":"US"},
{"ip":"8.8.8.8", "Country":"IN"}
]
I know this should be pretty simple and i am missing out something.
My Current Code is:
To read IP from file and hit the API one by one on each IP.
func readIPfromFile(filename string, outFile string, timeout int) {
data := jsonIn{}
//open input file
jsonFile, err := os.Open(filename) //open input file
...
...
jsonData := bufio.NewScanner(jsonFile)
for jsonData.Scan() {
// marshal json data & check for logs
if err := json.Unmarshal(jsonData.Bytes(), &data); err != nil {
log.Fatal(err)
}
//save to file
url := fmt.Sprintf("http://ipinfo.io/%s", data.Host)
GetGeoIP(url, outFile, timeout)
}
}
To make HTTP Request with custom request header and call write to file function.
func GetGeoIP(url string, outFile string, timeout int) {
geoClient := http.Client{
Timeout: time.Second * time.Duration(timeout), // Timeout after 5 seconds
}
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
log.Fatal(err)
}
req.Header.Set("accept", "application/json")
res, getErr := geoClient.Do(req)
if getErr != nil {
log.Fatal(getErr)
}
if res.Body != nil {
defer res.Body.Close()
}
body, readErr := ioutil.ReadAll(res.Body)
if readErr != nil {
log.Fatal(readErr)
}
jsonout := jsonOut{}
jsonErr := json.Unmarshal(body, &jsonout)
if jsonErr != nil {
log.Fatal(jsonErr)
}
file, _ := json.Marshal(jsonout)
write2file(outFile, file)
}
To Write data to file:
func write2file(outFile string, file []byte) {
f, err := os.OpenFile(outFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0600)
if err != nil {
log.Fatal(err)
}
defer f.Close()
if _, err = f.WriteString(string(file)); err != nil {
log.Fatal(err)
}
if _, err = f.WriteString("\n"); err != nil {
log.Fatal(err)
}
I know, i can edit f.WriteString("\n"); to f.WriteString(","); to add comma but still adding [] in the file is challenging for me.
First, please do not invent a new way of json marshaling, just use golang built-in encoding/json or other library on github.
Second, if you want to create a json string that represents an array of object, you need to create the array of objects in golang and marshal it into string (or more precisely, into array of bytes)
I create a simple as below, but please DIY if possible.
https://go.dev/play/p/RR_ok-fUTb_4
I'm trying to read the content of a bucket on Google Cloud Storage using GO.
I'm able to do that, but is is very slow.
The content of the bucket is like this:
bucket name
-> folders with alphanumeric characters
----> 5 files into each of the folder
--------> each file has a json array inside
what I want to do is to inspect the content of the jsons files for all the folders in the bucket and look for a specific value. The following code work, but it is very slow:
package backend
import (
"encoding/json"
"fmt"
"golang.org/x/net/context"
"golang.org/x/oauth2"
"golang.org/x/oauth2/google"
"google.golang.org/appengine"
"google.golang.org/appengine/file"
"google.golang.org/appengine/urlfetch"
"google.golang.org/cloud"
"google.golang.org/cloud/storage"
"io"
"io/ioutil"
"net/http"
)
var bucket = "bucket_Name"
type jsonStruct struct {
Gender string `json:"gender"`
Age string `json:"age"`
ID string `json:"id"`
Done int `json:"done"`
}
type saveData struct {
c context.Context
r *http.Request //http response
w http.ResponseWriter //http writer
ctx context.Context
cleanUp []string // cleanUp is a list of filenames that need cleaning up at the end of the saving.
failed bool // failed indicates that one or more of the saving steps failed.
}
func init() {
http.HandleFunc("/", handleStatic)
http.HandleFunc("/listBuckets", listBuckets)
}
func handleStatic(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Cache-Control", "no-cache")
http.ServeFile(w, r, "static/"+r.URL.Path)
}
func listBuckets(w http.ResponseWriter, r *http.Request) {
c := appengine.NewContext(r)
if bucket == "" {
var err error
if bucket, err = file.DefaultBucketName(c); err != nil {
// log.Errorf(c, "failed to get default GCS bucket name: %v", err)
return
}
}
hc := &http.Client{
Transport: &oauth2.Transport{
Source: google.AppEngineTokenSource(c, storage.ScopeFullControl),
Base: &urlfetch.Transport{Context: c},
},
}
ctx := cloud.NewContext(appengine.AppID(c), hc)
// structure to holds information needed to run the various saving functions
d := &saveData{
c: c,
r: r,
w: w,
ctx: ctx,
}
d.listBucket(bucket)
}
func (d *saveData) errorf(format string, args ...interface{}) {
d.failed = true
// log.Errorf(d.c, format, args...)
}
func (d *saveData) listBucket(bucket string) {
io.WriteString(d.w, "\nListbucket result:\n")
query := &storage.Query{}
for query != nil {
objs, err := storage.ListObjects(d.ctx, bucket, query)
if err != nil {
d.errorf("listBucket: unable to list bucket %q: %v", bucket, err)
return
}
query = objs.Next
for _, obj := range objs.Results {
d.readFile(obj.Name)
}
}
}
func (d *saveData) readFile(fileName string) {
rc, err := storage.NewReader(d.ctx, bucket, fileName)
if err != nil {
d.errorf("readFile: unable to open file from bucket %q, file %q: %v", bucket, fileName, err)
return
}
defer rc.Close()
slurp, err := ioutil.ReadAll(rc)
if err != nil {
d.errorf("readFile: unable to read data from bucket %q, file %q: %v", bucket, fileName, err)
return
}
var userDetails jsonStruct
err1 := json.Unmarshal(slurp, &userDetails)
if err1 != nil {
d.errorf("readFile: %v", err1)
return
}
fmt.Fprintf(d.w, "done is: %v\n", userDetails.Done)
}
Basically now I'm reading the folder name from the bucket and then I read the content using the folder name. It would be possible to cache all the bucket content in a go variable and then work on that variable instead of read the bucket for each folder?
I really need this to be faster because I need to present the result back in real time.
Thanks a lot
See below a simple Go sample code to list the bucket content on Google Cloud Storage:
package main
import (
"context"
"fmt"
"log"
"os"
"cloud.google.com/go/storage"
"google.golang.org/api/iterator"
)
func main() {
os.Setenv("GOOGLE_APPLICATION_CREDENTIALS",
"C:\\Users\\Shubham Snehi\\Downloads\\awacs-dev-160bf0e57dc1.json")
ctx := context.Background()
client, err := storage.NewClient(ctx)
if err != nil {
log.Fatalf("Failed to create client: %v", err)
}
defer client.Close()
// Sets the name for the new bucket.
bucketName := "balatestawacs"
// Creates a Bucket instance.
bucket := client.Bucket(bucketName)
it := bucket.Objects(ctx, nil)
for {
attrs, err := it.Next()
if err == iterator.Done {
break
}
if err != nil {
panic(err)
}
fmt.Println(attrs.Owner)
}
}
Which way Go does handle read access to directories and files?
I want to read a folder, search for csv files and save the content per csv file in order to send the content via http request.
Here's my code:
func GetFileContent(fileName string) ([][]string, error) {
file, err := os.Open(fileName)
if err != nil {
log.Println("There was an error:", err)
return nil, err
}
defer file.Close()
reader := csv.NewReader(file)
reader.FieldsPerRecord = -1
csvFileContent, err := reader.ReadAll()
if err != nil {
log.Println("There was an error:", err)
return nil, err
}
return csvFileContent, nil
}
func GetFiles(importPath string, fileExtension string) ([]string, error) {
var result []string
directory, err := os.Open(importPath)
if err != nil {
return result, err
}
defer directory.Close()
files, err := directory.Readdir(-1)
if err != nil {
return result, err
}
for _, file := range files {
if file.Mode().IsRegular() {
if filepath.Ext(file.Name()) == "." + fileExtension {
log.Println("Import Files: ", file.Name())
result = append(result, file.Name())
}
}
}
if len(result) == 0 {
log.Println("No import files of type " + fileExtension + " found.")
log.Println("Import skipped.")
}
return result, nil
}
Error: open : no such file or directory
appears when I do not use the root folder / working directory of my application but another (parent) folder? It works as long as I use ./ as source folder.
FileInfo.Name returns the base name of the file. You need to join the directory name with the base name to get a file path.
Change this line in GetFiles
result = append(result, file.Name())
to
result = append(result, filepath.Join(importPath, file.Name()))
In this case, you might consider using the higher-level filepath.Glob() instead:
files, err := filepath.Glob(filepath.Join(importPath, "*.csv"))
Trying to accomplish the following output with my data:
Convert to JSON string and write to file: output.json (this part is working)
Gzip Compress the JSON string and write that to a json.gz file: output.json.gz (NOT WORKING)
The code runs fine and writes to both files. But the gzipped file gives this error when I try to unzip it: Data error in 'output.json'. File is broken
Here's the code:
package main
import (
"bytes"
"compress/gzip"
"encoding/json"
"fmt"
"io/ioutil"
)
type Generic struct {
Name string
Cool bool
Rank int
}
func main() {
generic := Generic{"Golang", true, 100}
fileJson, _ := json.Marshal(generic)
err := ioutil.WriteFile("output.json", fileJson, 0644)
if err != nil {
fmt.Printf("WriteFileJson ERROR: %+v", err)
}
var fileGZ bytes.Buffer
zipper := gzip.NewWriter(&fileGZ)
defer zipper.Close()
_, err = zipper.Write([]byte(string(fileJson)))
if err != nil {
fmt.Printf("zipper.Write ERROR: %+v", err)
}
err = ioutil.WriteFile("output.json.gz", []byte(fileGZ.String()), 0644)
if err != nil {
fmt.Printf("WriteFileGZ ERROR: %+v", err)
}
}
What did I miss?
You need to call zipper.Close() immediately after finishing writing
http://play.golang.org/p/xNeMg3aXxO
_, err = zipper.Write(fileJson)
if err != nil {
log.Fatalf("zipper.Write ERROR: %+v", err)
}
err := zipper.Close() // call it explicitly and check error
Calling defer zipper.Close() would trigger the call at the end of the main function. Until you call .Close() the data is being written to an intermediate buffer and not flushed to the actual file.