I am working on a website scraper. I can send only 1 JSON data to JSON file regularly. I want to write one after another JSON data, so I need to keep hundreds of data in a single JSON file. like this
[
{
"id": 1321931,
"name": "Mike"
},
{
"id": 32139219,
"name": "Melissa"
},
{
"id": 8421921,
"name": "Jordan"
},
{
"id": 4291901,
"name": "David"
}
]
but output like this. When I send new data, just the first JSON data update itself.
[
{
"id": 1,
"name": "Mike"
}
]
here is the code:
package main
import (
"encoding/json"
"fmt"
"html/template"
"io/ioutil"
"log"
"math/rand"
"net/http"
"os"
"strings"
"github.com/gocolly/colly"
)
type Info struct {
ID int `json:"id"`
Name string `json:"name"`
}
var tpl *template.Template
var name string
var stonf Info
var allInfos []Info
var id int
var co = colly.NewCollector()
func main() {
fmt.Println("Started...")
allInfos = make([]Info, 1)
id = rand.Intn((99999 - 10000) + 10000)
// Reading Data From Json
data, err := ioutil.ReadFile("stocky.json")
if err != nil {
fmt.Println("ERROR 1 JSON", err)
}
// Unmarshal JSON data
var d []Info
err = json.Unmarshal([]byte(data), &d)
if err != nil {
fmt.Println(err)
}
tpl, _ = tpl.ParseGlob("templates/*.html")
http.HandleFunc("/mete", hellloHandleFunc)
staticHandler := http.FileServer(http.Dir("./css/"))
http.Handle("/css/", http.StripPrefix("/css", staticHandler))
http.ListenAndServe("localhost:8080", nil)
}
func hellloHandleFunc(w http.ResponseWriter, r *http.Request) {
err := r.ParseForm()
if err != nil {
log.Fatal(err)
}
allInfos[0].ID = id // JSON-PRO
// GET Price - Fiyat GETİR
co.OnHTML("div#dp", func(p *colly.HTMLElement) {
name = p.ChildText("h1#title")
})
requestLink := strings.TrimSpace(r.FormValue("input-link"))
co.Visit(requestLink)
// FIRST DATA JSON
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
enc.Encode(allInfos)
stonf = Info{
Name: name,
}
fmt.Println("Index Running")
tpl.ExecuteTemplate(w, "form-copy.html", stonf)
}
func writeJson(data []Info) {
dataFile, err := json.MarshalIndent(data, "", " ")
if err != nil {
log.Println("Could not create JSON", err)
}
ioutil.WriteFile("stocky.json", dataFile, 0666)
}
Here is a solution which appends new Info to the list and store in file.
The solution will perform properly only for relatively small list. For large lists, the overhead of writing the entire file each time may be too high. In such case i propose to change the format to ndjson. It will allow to write only the current Info struct instead of the whole list.
I've also added synchronization mechanism to avoid race conditions in case you send multiple HTTP requests at the same time.
I assumed that the identifier must be generated separately for each request, and it is not a problem if collision occur.
package main
import (
"encoding/json"
"fmt"
"html/template"
"io/ioutil"
"log"
"math/rand"
"net/http"
"os"
"strings"
"sync"
"github.com/gocolly/colly"
)
type (
Info struct {
ID int `json:"id"`
Name string `json:"name"`
}
Infos struct {
List []Info
sync.Mutex
}
)
var (
infos *Infos
tpl *template.Template
co = colly.NewCollector()
)
func main() {
fmt.Println("Started...")
var err error
infos, err = readInfos()
if err != nil {
log.Fatal(err)
}
tpl, _ = tpl.ParseGlob("templates/*.html")
http.HandleFunc("/mete", hellloHandleFunc)
staticHandler := http.FileServer(http.Dir("./css/"))
http.Handle("/css/", http.StripPrefix("/css", staticHandler))
if err := http.ListenAndServe("localhost:8080", nil); err != nil {
log.Fatal(err)
}
}
func hellloHandleFunc(w http.ResponseWriter, r *http.Request) {
err := r.ParseForm()
if err != nil {
log.Fatal(err)
}
stonf := Info{
ID: rand.Intn((99999 - 10000) + 10000),
}
// GET Price - Fiyat GETİR
co.OnHTML("div#dp", func(p *colly.HTMLElement) {
stonf.Name = p.ChildText("h1#title")
})
requestLink := strings.TrimSpace(r.FormValue("input-link"))
if err := co.Visit(requestLink); err != nil {
log.Fatal(err)
}
if err := infos.AppendAndWrite(stonf); err != nil {
log.Fatal(err)
}
// FIRST DATA JSON
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
enc.Encode(stonf)
fmt.Println("Index Running")
tpl.ExecuteTemplate(w, "form-copy.html", stonf)
}
func readInfos() (*Infos, error) {
// Reading Data From Json
data, err := ioutil.ReadFile("stocky.json")
if err != nil {
return nil, err
}
var r []Info
// Unmarshal JSON data
err = json.Unmarshal([]byte(data), &r)
if err != nil {
return nil, err
}
return &Infos{List: r}, nil
}
func (i *Infos) AppendAndWrite(info Info) error {
i.Lock()
defer i.Unlock()
i.List = append(i.List, info)
if err := i.storeLocked(); err != nil {
return fmt.Errorf("storing info list failed: %w", err)
}
return nil
}
func (i *Infos) storeLocked() error {
dataFile, err := json.MarshalIndent(i.List, "", " ")
if err != nil {
return fmt.Errorf("could not marshal infos JSON: %w", err)
}
err = ioutil.WriteFile("stocky.json", dataFile, 0666)
if err != nil {
return fmt.Errorf("could not write 'stocky.json' file: %w", err)
}
return nil
}
There is a standard called JSON lines (https://jsonlines.org/) consisting on only one JSON per line instead of wrapping all in a JSON array.
JSON library from Go stdlib works pretty well with JSON lines on both cases, reading and writing.
Write multiple JSON (one per line):
e := json.NewEncoder(yourWriterFile)
e.Encode(object1)
e.Encode(object2)
//...
Read multiple JSON (one per line or concatenated):
d := json.NewDecoder(yourReaderFile)
d.Decode(&object1)
d.Decode(&object2)
//...
More info: https://pkg.go.dev/encoding/json
Related
I'm new to golang and json, we are using gorilla mux library and I'd like to do a post request in postman. In config struct entries needs to be a map like that and in post server I need to have an array of *Config in postServer struct. I have 3 go files.
Service.go file is this:
package main
import (
"errors"
"github.com/gorilla/mux"
"mime"
"net/http"
)
type Config struct {
Id string `json:"id"`
entries map[string]string `json:"entries"`
}
type postServer struct {
data map[string][]*Config
}
func (ts *postServer) createPostHandler(w http.ResponseWriter, req *http.Request) {
contentType := req.Header.Get("Content-Type")
mediatype, _, err := mime.ParseMediaType(contentType)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
if mediatype != "application/json" {
err := errors.New("Expect application/json Content-Type")
http.Error(w, err.Error(), http.StatusUnsupportedMediaType)
return
}
rt, err := decodeBody(req.Body)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
id := createId()
ts.data[id] = rt
renderJSON(w, rt)
}
func (ts *postServer) getAllHandler(w http.ResponseWriter, req *http.Request) {
allTasks := []*Config{}
for _, v := range ts.data {
allTasks = append(allTasks, v...)
}
renderJSON(w, allTasks)
}
func (ts *postServer) getPostHandler(w http.ResponseWriter, req *http.Request) {
id := mux.Vars(req)["id"]
task, ok := ts.data[id]
if !ok {
err := errors.New("key not found")
http.Error(w, err.Error(), http.StatusNotFound)
return
}
renderJSON(w, task)
}
func (ts *postServer) delPostHandler(w http.ResponseWriter, req *http.Request) {
id := mux.Vars(req)["id"]
if v, ok := ts.data[id]; ok {
delete(ts.data, id)
renderJSON(w, v)
} else {
err := errors.New("key not found")
http.Error(w, err.Error(), http.StatusNotFound)
}
}
I wanted to test createPostHandler.
Then I have helper.go file where I decoded json into go and rendered into json:
package main
import (
"encoding/json"
"github.com/google/uuid"
"io"
"net/http"
)
func decodeBody(r io.Reader) ([]*Config, error) {
dec := json.NewDecoder(r)
dec.DisallowUnknownFields()
var rt []*Config
if err := dec.Decode(&rt); err != nil {
return nil, err
}
return rt, nil
}
func renderJSON(w http.ResponseWriter, v interface{}) {
js, err := json.Marshal(v)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
w.Write(js)
}
func createId() string {
return uuid.New().String()
}
and the last one go file is main.go where I have this:
package main
import (
"context"
"github.com/gorilla/mux"
"log"
"net/http"
"os"
"os/signal"
"syscall"
"time"
)
func main() {
quit := make(chan os.Signal)
signal.Notify(quit, os.Interrupt, syscall.SIGTERM)
router := mux.NewRouter()
router.StrictSlash(true)
server := postServer{
data: map[string][]*Config{},
}
router.HandleFunc("/config/", server.createPostHandler).Methods("POST")
router.HandleFunc("/configs/", server.getAllHandler).Methods("GET")
router.HandleFunc("/config/{id}/", server.getPostHandler).Methods("GET")
router.HandleFunc("/config/{id}/", server.delPostHandler).Methods("DELETE")
// start server
srv := &http.Server{Addr: "0.0.0.0:8000", Handler: router}
go func() {
log.Println("server starting")
if err := srv.ListenAndServe(); err != nil {
if err != http.ErrServerClosed {
log.Fatal(err)
}
}
}()
<-quit
log.Println("service shutting down ...")
// gracefully stop server
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := srv.Shutdown(ctx); err != nil {
log.Fatal(err)
}
log.Println("server stopped")
}
And JSON whad I did send is this:
{
"entries":["hello", "world"]
}
And error what I'm getting in postman is this:
json: cannot unmarshal object into Go value of type []*main.Config
I don't know what is a problem, maybe I'm sending wrong json or I just did something wrong in decodeBody, I needed to add [] in decodeBody in var rt []*Config because it wouldn't work otherwise.
Can someone help me to fix this please?
This is an example of how you can define a struct Config that you can parse your sample JSON into.
EDIT: field entries changed to map.
You can play with it on Playground.
package main
import (
"encoding/json"
"fmt"
)
type Config struct {
Id string `json:"id"`
Entries map[string]string `json:"entries"`
}
func main() {
str := `[{"id":"42", "entries":{"hello": "world"}}]`
var tmp []Config
err := json.Unmarshal([]byte(str), &tmp)
if err != nil {
fmt.Printf("error: %v", err)
}
var rt []*Config
for _, c := range tmp {
rt = append(rt, &c)
}
for _, c := range rt {
for k, v := range c.Entries {
fmt.Printf("id=%s key=%s value=%s\n", c.Id, k, v)
}
}
}
How can i create and array of maps[string]interface to add multiple json files (not all json in one, but in different). I created a code, that add all json files in one. But in the future i need to compare field in map[string]interface. I think that need to create a loop. Here my program code:
var master map[string]interface{}
func main() {
fileIndex := 3 // three json files. All named test1.json, test2.json and test3.json
for i := 1; i <= fileIndex; i++ {
fileName := fmt.Sprintf("%s%d%s", "test", i, ".json")
// Open jsonFile
jsonFile, err := os.Open(fileName)
if err != nil {
log.Println("Error:", err)
}
defer jsonFile.Close()
byteValue, _ := ioutil.ReadAll(jsonFile)
json.Unmarshal(byteValue, &master)
fmt.Println(master)
}
}
And here my 3 json:
First:
{
"name":"Kate",
"date":"2013-04-23T19:24:59.511Z",
"data":"is nice"
}
Second:
{
"name":"Gleison",
"date":"2012-04-23T19:25:00.511Z",
"data":"is a good person"
}
Third:
{
"name":"Rodrigo",
"date":"2013-04-23T20:24:59.511Z",
"data":"is kind"
}
It is necessary to divide them into different map[string]interface. Without creating struct.
For an array, I think you are looking []map[string]interface{}. You can simply create this variable and append it if I understood your question correctly
Here is a modified example
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"os"
)
func main() {
var master map[string]interface{}
var allMaster []map[string]interface{}
fileIndex := 3 // three json files. All named test1.json, test2.json and test3.json
for i := 1; i <= fileIndex; i++ {
fileName := fmt.Sprintf("%s%d%s", "test", i, ".json")
// Open jsonFile
jsonFile, err := os.Open(fileName)
if err != nil {
log.Println("Error:", err)
}
defer jsonFile.Close()
byteValue, _ := ioutil.ReadAll(jsonFile)
err = json.Unmarshal(byteValue, &master)
if err != nil {
log.Fatal(err)
}
allMaster = append(allMaster, master)
fmt.Println(allMaster)
}
}
I have a JSON file in S3 that takes the format of the following struct:
type StockInfo []struct {
Ticker string `json:"ticker"`
BoughtPrice string `json:"boughtPrice"`
NumberOfShares string `json:"numberOfShares"`
}
and I want to read it into a struct value from S3. In python the code would look something like this:
import boto3
import json
s3 = boto3.client('s3', 'us-east-1')
obj = s3.get_object(Bucket=os.environ["BucketName"], Key=os.environ["Key"])
fileContents = obj['Body'].read().decode('utf-8')
json_content = json.loads(fileContents)
However I'm kinda stuck on how to make this happen in Go. I've gotten this far:
package main
import (
"archive/tar"
"bytes"
"fmt"
"log"
"os"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
"github.com/joho/godotenv"
)
type StockInfo []struct {
Ticker string `json:"ticker"`
BoughtPrice string `json:"boughtPrice"`
NumberOfShares string `json:"numberOfShares"`
}
func init() {
// loads values from .env into the system
if err := godotenv.Load(); err != nil {
log.Print("No .env file found")
}
return
}
func main() {
// Store the PATH environment variable in a variable
sess, err := session.NewSession(&aws.Config{
Region: aws.String("us-east-1")},
)
if err != nil {
panic(err)
}
s3Client := s3.New(sess)
bucket := "ian-test-bucket-go-python"
key := "StockInfo.json"
requestInput := &s3.GetObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
}
result, err := s3Client.GetObject(requestInput)
if err != nil {
fmt.Println(err)
}
fmt.Println(result)
which returns to me the body/object buffer, but im not sure how to read that into a string so I can marshal it into my struct. I found this code in a similar question:
requestInput := &s3.GetObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
}
buf := new(aws.WriteAtBuffer)
numBytes, _ := *s3manager.Downloader.Download(buf, requestInput)
tr := tar.NewReader(bytes.NewReader(buf.Bytes()))
but I get the following errors:
not enough arguments in call to method expression s3manager.Downloader.Download
have (*aws.WriteAtBuffer, *s3.GetObjectInput)
want (s3manager.Downloader, io.WriterAt, *s3.GetObjectInput, ...func(*s3manager.Downloader))
multiple-value s3manager.Downloader.Download() in single-value context
Can anyone point me in the right direction? kinda frustrating how hard it seems to do this compared to python.
I was able to do it with the following code:
requestInput := &s3.GetObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
}
result, err := s3Client.GetObject(requestInput)
if err != nil {
fmt.Println(err)
}
defer result.Body.Close()
body1, err := ioutil.ReadAll(result.Body)
if err != nil {
fmt.Println(err)
}
bodyString1 := fmt.Sprintf("%s", body1)
var s3data StockInfo
decoder := json.NewDecoder(strings.NewReader(bodyString1))
err = decoder.Decode(&s3data)
if err != nil {
fmt.Println("twas an error")
}
fmt.Println(s3data)
Alternative solution using json.Unmarshal
besed on aws-sdk-go-v2
...
params := &s3.GetObjectInput{
Bucket: aws.String(s3Record.S3.Bucket.Name),
Key: aws.String(s3Record.S3.Object.Key),
}
result, _ := client.GetObject(context.TODO(), params)
if err != nil {
panic(err)
}
defer result.Body.Close()
// capture all bytes from upload
b, err := ioutil.ReadAll(result.Body)
if err != nil {
panic(err)
}
var temp StockInfo
if err = json.Unmarshal(b, &temp); err != nil {
panic(err)
}
ftm.Println("res: ",b)
I have used json.Unmarshal and extracted json content. I then managed to get one layer deeper into the []interface{} by using the following code:
response, err := http.Get("http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key=2B2A0C37AC20B5DC2234E579A2ABB11C&steamids=76561198132612090")
content, err := ioutil.ReadAll(response.Body)
defer response.Body.Close()
if err != nil {
panic(0)
}
var decoded map[string]interface{}
if err := json.Unmarshal(content, &decoded); err != nil {
panic(0)
}
players := decoded["response"].(map[string]interface{})["players"]
if err != nil {
panic(0)
}
Variable players' type is []interface {} and content is [map[personaname:Acidic]].
How do I access this map? I've tried players["personaname"] but that doesn't seem to work. Any ideas?
Defining a struct type with the expected schema will make your life easier when you want to get the data from it:
package main
import "fmt"
//import "net/http"
//import "io/ioutil"
import "encoding/json"
// you don't need to define everything, only what you need
type Player struct {
Steamid string
Communityvisibilitystate int
Personaname string
Lastlogoff int64 // time.Unix(Lastlogoff, 0)
Profileurl string
Avatar string
Avatarmedium string
Avatarfull string
Personastate int
Realname string
Primaryclanid string
Timecreated int64 // time.Unix(Timecreated, 0)
Personastateflags int
//Loccountrycode string // e.g. if you don't need this
}
func main() {
/*response, err := http.Get("http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key=2B2A0C37AC20B5DC2234E579A2ABB11C&steamids=76561198132612090")
if err != nil {
panic(err)
}
content, err := ioutil.ReadAll(response.Body)
defer response.Body.Close()
if err != nil {
panic(0)
}*/
content := []byte(`{
"response": {
"players": [
{
"steamid": "76561198132612090",
"communityvisibilitystate": 3,
"profilestate": 1,
"personaname": "Acidic",
"lastlogoff": 1459489924,
"profileurl": "http://steamcommunity.com/id/ari9/",
"avatar": "https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/bc/bc50a4065c31c606e51dfad329341b2d1f1ac4d3.jpg",
"avatarmedium": "https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/bc/bc50a4065c31c606e51dfad329341b2d1f1ac4d3_medium.jpg",
"avatarfull": "https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/bc/bc50a4065c31c606e51dfad329341b2d1f1ac4d3_full.jpg",
"personastate": 3,
"realname": "Ari Seyhun",
"primaryclanid": "103582791440552060",
"timecreated": 1397199406,
"personastateflags": 0,
"loccountrycode": "TR"
}
]
}
}`)
var decoded struct {
Response struct {
Players []Player
}
}
if err := json.Unmarshal(content, &decoded); err != nil {
panic(err)
}
fmt.Printf("%#v\n", decoded.Response.Players)
}
http://play.golang.org/p/gVPRwLFunF
You can also create a new named type from time.Time for Timecreated and Lastlogoff with its own UnmarshalJSON function, and immediately convert it to time.Time using time.Unix()
Players is a JSON array. Thus you have to convert it to a slice of interface.
Then you can access any element of the slice and casting it to a map[string]interface{} type.
Here's the working example
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
)
func main() {
response, err := http.Get("http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key=2B2A0C37AC20B5DC2234E579A2ABB11C&steamids=76561198132612090")
content, err := ioutil.ReadAll(response.Body)
defer response.Body.Close()
if err != nil {
panic(0)
}
var decoded map[string]interface{}
if err := json.Unmarshal(content, &decoded); err != nil {
panic(0)
}
players := decoded["response"].(map[string]interface{})["players"]
if err != nil {
panic(0)
}
sliceOfPlayers := players.([]interface{})
fmt.Println((sliceOfPlayers[0].(map[string]interface{}))["personaname"])
}
map[key:2073933158088]
I need to grab the key out of this data structure as a string, but I can't seem to figure out how!
Help with this overly simple question very much appreciated.
The value above is encapsulated in the variable named data.
I have tried: data.key, data[key], data["key"], data[0] and none of these seem to be appropriate calls.
To define data I sent up a JSON packet to a queue on IronMQ. I then pulled the message from the queue and manipulated it like this:
payloadIndex := 0
for index, arg := range(os.Args) {
if arg == "-payload" {
payloadIndex = index + 1
}
}
if payloadIndex >= len(os.Args) {
panic("No payload value.")
}
payload := os.Args[payloadIndex]
var data interface{}
raw, err := ioutil.ReadFile(payload)
if err != nil {
panic(err.Error())
}
err = json.Unmarshal(raw, &data)
Design your data type to match json structure. This is how can you achieve this:
package main
import (
"fmt"
"encoding/json"
)
type Data struct {
Key string `json:"key"`
}
func main() {
data := new(Data)
text := `{ "key": "2073933158088" }`
raw := []byte(text)
err := json.Unmarshal(raw, data)
if err != nil {
panic(err.Error())
}
fmt.Println(data.Key)
}
Since the number in the json is unquoted, it's not a string, Go will panic if you try to just handle it as a string (playground: http://play.golang.org/p/i-NUwchJc1).
Here's a working alternative:
package main
import (
"fmt"
"encoding/json"
"strconv"
)
type Data struct {
Key string `json:"key"`
}
func (d *Data) UnmarshalJSON(content []byte) error {
var m map[string]interface{}
err := json.Unmarshal(content, &m)
if err != nil {
return err
}
d.Key = strconv.FormatFloat(m["key"].(float64), 'f', -1, 64)
return nil
}
func main() {
data := new(Data)
text := `{"key":2073933158088}`
raw := []byte(text)
err := json.Unmarshal(raw, data)
if err != nil {
panic(err.Error())
}
fmt.Println(data.Key)
}
You can see the result in the playground: http://play.golang.org/p/5hU3hdV3kK