Check if strings are JSON format - json

How to check if a given string is in form of multiple json string separated by spaces/newline?
For example,
given: "test" 123 {"Name": "mike"} (3 json concatenated with space)
return: true, since each of item ("test" 123 and {"Name": "mike"}) is a valid json.
In Go, I can write a O(N^2) function like:
// check given string is json or multiple json concatenated with space/newline
func validateJSON(str string) error {
// only one json string
if isJSON(str) {
return nil
}
// multiple json string concatenate with spaces
str = strings.TrimSpace(str)
arr := []rune(str)
start := 0
end := 0
for start < len(str) {
for end < len(str) && !unicode.IsSpace(arr[end]) {
end++
}
substr := str[start:end]
if isJSON(substr) {
for end < len(str) && unicode.IsSpace(arr[end]) {
end++
}
start = end
} else {
if end == len(str) {
return errors.New("error when parsing input: " + substr)
}
for end < len(str) && unicode.IsSpace(arr[end]) {
end++
}
}
}
return nil
}
func isJSON(str string) bool {
var js json.RawMessage
return json.Unmarshal([]byte(str), &js) == nil
}
But this won't work for large input.

There are two options. The simplest, from a coding standpoint, is going to be just to decode the JSON string normally. You can make this most efficient by decoding to an empty struct:
package main
import "encoding/json"
func main() {
input := []byte(`{"a":"b", "c": 123}`)
var x struct{}
if err := json.Unmarshal(input, &x); err != nil {
panic(err)
}
input = []byte(`{"a":"b", "c": 123}xxx`) // This one fails
if err := json.Unmarshal(input, &x); err != nil {
panic(err)
}
}
(playground link)
This method has a few potential drawbacks:
It only works with a single JSON object. That is, a list of objects (as requested in the question) will fail, without additional logic.
As pointed out by #icza in comments, it only works with JSON objects, so bare arrays, numbers, or strings will fail. To accomodate these types, interface{} must be used, which introduces the potential for some serious performance penalties.
The throw-away x value must still be allocated, and at least one reflection call is likely under the sheets, which may introduce a noticeable performance penalty for some workloads.
Given these limitations, my recommendation is to use the second option: loop through the entire JSON input, ignoring the actual contents. This is made simple with the standard library json.Decoder:
package main
import (
"bytes"
"encoding/json"
"io"
)
func main() {
input := []byte(`{"a":"b", "c": 123}`)
dec := json.NewDecoder(bytes.NewReader(input))
for {
_, err := dec.Token()
if err == io.EOF {
break // End of input, valid JSON
}
if err != nil {
panic(err) // Invalid input
}
}
input = []byte(`{"a":"b", "c": 123}xxx`) // This input fails
dec = json.NewDecoder(bytes.NewReader(input))
for {
_, err := dec.Token()
if err == io.EOF {
break // End of input, valid JSON
}
if err != nil {
panic(err) // Invalid input
}
}
}
(playground link)

As Volker mentioned in the comments, use a *json.Decoder to decode all json documents in your input successively:
package main
import (
"encoding/json"
"io"
"log"
"strings"
)
func main() {
input := `"test" 123 {"Name": "mike"}`
dec := json.NewDecoder(strings.NewReader(input))
for {
var x json.RawMessage
switch err := dec.Decode(&x); err {
case nil:
// not done yet
case io.EOF:
return // success
default:
log.Fatal(err)
}
}
}
Try it on the playground: https://play.golang.org/p/1OKOii9mRHn

Try fastjson.Scanner:
s := `"test" 123 {"Name": "mike"}`
var sc fastjson.Scanner
sc.Init(s)
// Iterate over a stream of json objects
for sc.Next() {}
if sc.Error() != nil {
fmt.Println("ok")
} else {
fmt.Println("false")
}

Related

Unmarshaling from JSON key containing a single quote

I feel quite puzzled by this.
I need to load some data (coming from a French database) that is serialized in JSON and in which some keys have a single quote.
Here is a simplified version:
package main
import (
"encoding/json"
"fmt"
)
type Product struct {
Name string `json:"nom"`
Cost int64 `json:"prix d'achat"`
}
func main() {
var p Product
err := json.Unmarshal([]byte(`{"nom":"savon", "prix d'achat": 170}`), &p)
fmt.Printf("product cost: %d\nerror: %s\n", p.Cost, err)
}
// product cost: 0
// error: %!s(<nil>)
Unmarshaling leads to no errors however the "prix d'achat" (p.Cost) is not correctly parsed.
When I unmarshal into a map[string]any, the "prix d'achat" key is parsed as I would expect:
package main
import (
"encoding/json"
"fmt"
)
func main() {
blob := map[string]any{}
err := json.Unmarshal([]byte(`{"nom":"savon", "prix d'achat": 170}`), &blob)
fmt.Printf("blob: %f\nerror: %s\n", blob["prix d'achat"], err)
}
// blob: 170.000000
// error: %!s(<nil>)
I checked the json.Marshal documentation on struct tags and I cannot find any issue with the data I'm trying to process.
Am I missing something obvious here?
How can I parse a JSON key containing a single quote using struct tags?
Thanks a lot for any insight!
I didn't find anything in the documentation, but the JSON encoder considers single quote to be a reserved character in tag names.
func isValidTag(s string) bool {
if s == "" {
return false
}
for _, c := range s {
switch {
case strings.ContainsRune("!#$%&()*+-./:;<=>?#[]^_{|}~ ", c):
// Backslash and quote chars are reserved, but
// otherwise any punctuation chars are allowed
// in a tag name.
case !unicode.IsLetter(c) && !unicode.IsDigit(c):
return false
}
}
return true
}
I think opening an issue is justified here. In the meantime, you're going to have to implement json.Unmarshaler and/or json.Marshaler. Here is a start:
func (p *Product) UnmarshalJSON(b []byte) error {
type product Product // revent recursion
var _p product
if err := json.Unmarshal(b, &_p); err != nil {
return err
}
*p = Product(_p)
return unmarshalFieldsWithSingleQuotes(p, b)
}
func unmarshalFieldsWithSingleQuotes(dest interface{}, b []byte) error {
// Look through the JSON tags. If there is one containing single quotes,
// unmarshal b again, into a map this time. Then unmarshal the value
// at the map key corresponding to the tag, if any.
var m map[string]json.RawMessage
t := reflect.TypeOf(dest).Elem()
v := reflect.ValueOf(dest).Elem()
for i := 0; i < t.NumField(); i++ {
tag := t.Field(i).Tag.Get("json")
if !strings.Contains(tag, "'") {
continue
}
if m == nil {
if err := json.Unmarshal(b, &m); err != nil {
return err
}
}
if j, ok := m[tag]; ok {
if err := json.Unmarshal(j, v.Field(i).Addr().Interface()); err != nil {
return err
}
}
}
return nil
}
Try it on the playground: https://go.dev/play/p/aupACXorjOO

Golang Interface to struct conversion giving error

I have an json as a string of following format:
{"add": [{"var": ["100"]}, "200"]}
Here the key 'add' is not a constant value for all the jsons. In some cases, it can be 'minus', 'multiply' etc.
The value of that key is an array. In this case [{"var": ["100"]}, "200"]. This means that the 100 should be added to existing value 200.
I am trying to parse this expression. Since the main key(in this case 'add') is not a constant, I cannot convert into a struct. So, I converted it to a json object by following way:
type mathExpVar struct {
valueVar []string `json:"var"`
}
var mathExpJson map[string][]interface{}
var input = "{\"add\": [{\"var\": [\"100\"]}, \"200\"]}"
err := json.Unmarshal([]byte(input), &mathExpJson)
for operator, values := range mathExpJson{
vals, ok := values[0].(mathExpVar) // here values[0] will be {"var": ["100"]}
if !ok{
return nil
}
}
Here 'ok' is always returning false. I am not sure why. There is no additional error message for me to check why this is failing. Could someone help me in resolving this?
Link to go playground for the same: https://go.dev/play/p/POfQmEoPbjD
A Working example: https://go.dev/play/p/02YzI5cv8vV
The whole reason the original response from Burak Serdar was not valid in my opinion is that it does not take into account the fact that you would need to handle the rest of the params as well. If you look closely enough, then you see that the expression is not an array of strings, its of varying type. This implementation handles the custom Unmarshalling and stores all the extra parameters in the Extra field.
Also code:
package main
import (
"encoding/json"
"log"
"reflect"
)
const jsonPayload = "{\"add\": [{\"var\": [\"100\"]}, \"200\"]}"
func main() {
data := MathExpressions{}
err := json.Unmarshal([]byte(jsonPayload), &data)
if err != nil {
log.Println("Failed to unmarshal json, error:", err)
return
}
log.Println(data)
for operation, expression := range data {
log.Print("Op:", operation, "Exp:", expression)
}
log.Println("Finished..")
}
/**
The sub definition for a specific expression in the object
*/
type ExpressionDefinition struct {
Vars []string `json:"var"`
Extra []string
}
func (e *ExpressionDefinition) UnmarshalJSON(data []byte) error {
tokens := make([]interface{}, 0)
err := json.Unmarshal(data, &tokens)
if err != nil {
return err
}
for _, token := range tokens {
log.Println("Processing token:", token, "type:", reflect.TypeOf(token))
switch token.(type) {
case map[string]interface{}:
for _, v := range token.(map[string]interface{})["var"].([]interface{}) {
e.Vars = append(e.Vars, v.(string))
}
case string:
e.Extra = append(e.Extra, token.(string))
}
}
log.Println(tokens)
return nil
}
/**
The main expressions object which contains all the sub-expressions.
*/
type MathExpressions map[string]ExpressionDefinition
Here the entire structure of the parsed json value will be stored in nested map[string]interface{}(json object) or []interface{}(json array) types.
In the line:
vals, ok := values[0].(mathExpVar)
values[0] would be of type map[string]interface{}, which cannot be asserted to mathExpVar, which is a struct, an entirely different datatype.
You need to type assert to map[string]interface{} first, then do this in each nested level as you go forward:
package main
import (
"encoding/json"
"fmt"
)
func main() {
type mathExpVar struct {
valueVar []string `json:"var"`
}
var mathExpJson map[string][]interface{}
var input = "{\"add\": [{\"var\": [\"100\"]}, \"200\"]}"
err := json.Unmarshal([]byte(input), &mathExpJson)
if err != nil {
fmt.Println("Error in unmarshalling")
}
for _, values := range mathExpJson {
var vals mathExpVar
valMap, ok := values[0].(map[string]interface{})
if ok {
varSlice, ok := valMap["var"].([]interface{})
if ok {
for _, v := range varSlice {
nv, ok := v.(string)
if ok {
vals.valueVar = append(vals.valueVar, nv)
} else {
fmt.Printf("%T\n", v)
}
}
} else {
fmt.Printf("%T\n", valMap["var"])
}
} else {
fmt.Printf("%T\n", values[0])
}
fmt.Printf("%+v\n", vals)
}
}
See: https://go.dev/play/p/Ot_9IZr4pwM
For more on interfaces and go reflection, check out: https://go.dev/blog/laws-of-reflection

What input will cause golang's json.Marshal to return an error?

From the docs:
JSON cannot represent cyclic data structures and Marshal does not handle them. Passing cyclic structures to Marshal will result in an infinite recursion.
I've experienced this situation, which results in a runtime panic.
What I'm wondering is if anyone can provide a working program that demonstrates a non-panic situation where json.Marshal returns a non-nil error. The best answers would clearly include the inputs used.
Just to complement Jonathan's answer, the json.Marshal function can return two types of errors: UnsupportedTypeError or UnsupportedValueError
The first one can be caused, as Jonathan said by trying to Marshal an invalid type:
_, err := json.Marshal(make(chan int))
_, ok := err.(*json.UnsupportedTypeError) // ok == true
On the other hand you can also have the Marshal function return an error by passing an invalid value:
_, err := json.Marshal(math.Inf(1))
_, ok := err.(*json.UnsupportedValueError) // ok == true
Update: now using a channel instead of a map[int]int to elicit the error
Go-specific structures,e.g. func or chan refuse to serialize:
package main
import (
"encoding/json"
"fmt"
)
func main() {
value := make(chan int)
_, err := json.Marshal(value)
fmt.Println(err)
}
Read the source code you can found such a function to judge a encoder if not exist will return marshal error: https://github.com/golang/go/blob/master/src/encoding/json/encode.go
func newTypeEncoder(t reflect.Type, allowAddr bool) encoderFunc {
// ignored
switch t.Kind() {
case reflect.Bool:
return boolEncoder
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return intEncoder
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
return uintEncoder
case reflect.Float32:
return float32Encoder
case reflect.Float64:
return float64Encoder
case reflect.String:
return stringEncoder
case reflect.Interface:
return interfaceEncoder
case reflect.Struct:
return newStructEncoder(t)
case reflect.Map:
return newMapEncoder(t)
case reflect.Slice:
return newSliceEncoder(t)
case reflect.Array:
return newArrayEncoder(t)
case reflect.Ptr:
return newPtrEncoder(t)
default:
return unsupportedTypeEncoder
}
}
We can find all kinds enum
at https://github.com/golang/go/blob/master/src/reflect/type.go
So it's not hard to see that kinds not in above function are unable to marshal:
UnsafePointer,Complex64,Complex128,Chan,Func
Examples:
json.Marshal(unsafe.Pointer(nil)) // UnsafePointer
json.Marshal(complex64(1)) // Complex64
json.Marshal(complex128(1)) // Complex128
json.Marshal(make(chan struct{})) // Chan
json.Marshal(func() {}) // Func
A while ago I was solving a problem of serializing/deserializing cyclic references in golang, and all the links go to this question. However, it's slightly misleading as the question is broader.
If you got into the same situation like me, and can't find a solution on how to deal with cyclic references, you can now use tahwil - a new library that I published on github. To my knowledge it's now the only library that facilitates serialization/deserialization of cyclic data structures in a generic way.
Readme gives the information on how to use the library, so I will only duplicate the examples here.
Encoding:
package main
import (
"encoding/json"
"fmt"
"github.com/go-extras/tahwil"
)
type Person struct {
Name string
Parent *Person
Children []*Person
}
func main() {
parent := &Person{
Name: "Arthur",
Children: []*Person{
{
Name: "Ford",
},
{
Name: "Trillian",
},
},
}
parent.Children[0].Parent = parent
parent.Children[1].Parent = parent
v, err := tahwil.ToValue(parent)
if err != nil {
panic(err)
}
res, err := json.Marshal(v)
if err != nil {
panic(err)
}
fmt.Println(string(res))
}
Decoding:
package main
import (
"encoding/json"
"fmt"
"github.com/go-extras/tahwil"
)
type Person struct {
Name string `json:"name"`
Parent *Person `json:"parent"`
Children []*Person `json:"children"`
}
func prepareData() []byte {
parent := &Person{
Name: "Arthur",
Children: []*Person{
{
Name: "Ford",
},
{
Name: "Trillian",
},
},
}
parent.Children[0].Parent = parent
parent.Children[1].Parent = parent
v, err := tahwil.ToValue(parent)
if err != nil {
panic(err)
}
res, err := json.Marshal(v)
if err != nil {
panic(err)
}
return res
}
func main() {
data := &tahwil.Value{}
res := prepareData()
err := json.Unmarshal(res, data)
if err != nil {
panic(err)
}
person := &Person{}
err = tahwil.FromValue(data, person)
if err != nil {
panic(err)
}
fmt.Printf(`Name: %s
Children:
- %s
-- parent name: %s
- %s
-- parent name: %s
`, person.Name,
person.Children[0].Name,
person.Children[0].Parent.Name,
person.Children[1].Name,
person.Children[1].Parent.Name)
}
The main idea is to transform the original data to tahwil.Value{}, which essentially adds refid's to all of your fields. Whenever tahwil encounters a cyclic reference, it replaces the actual object with a reference. And after that the graph is technically not cyclic anymore and thus can be marshalled to json.
Restoring the data means a reverse operation, i.e. any reference will be replaced by a pointer to an object.
P.S. Why tahwil? I tried to find some uncommon word for the name, and found an Arabic word (تحويل) that means conversion.

Reading JSON into GO Strings

So I have a JSON File in the format...
[
{
"Key":"Value",
"Key2":"Value2",
"Key3":"Value3"
},
{
"Foo":"Bar",
"Blah":2
}
]
I want to just read in the hash parts of it and pass them to an HTTP request like in goRequest, because goRequest is fine with just the JSON being in a String.
package main
request := gorequest.New()
resp, body, errs := request.Post("http://example.com").
Set("Notes","gorequst is coming!").
Send(`{"Foo":"Bar","Blah":2}`).
End()
I don't care what the JSON is and I don't need to unmarshal it to any go Structs or anything of the sort, it's fine just remaining as a string and being totally untouched, just passed along to the request.
I've seen a lot online about it, but it always seems to wanna un-marshal the JSON to Go Structs and the sort, which is fine if you want to care about what actually is in the JSON, but in my case this seems like unnecessary overhead.
How would I accomplish something like this? It seems pretty simple, but none of the existing JSON libraries for Go seem to be able to accomplish this.
Thanks.
You are probably looking for json.RawMessage.
For example:
package main
import (
"encoding/json"
"fmt"
"log"
)
func main() {
txt := []byte(`
[
{"key1" : "value1" },
{"key2" : "value2" }
]`)
msg := []json.RawMessage{}
err := json.Unmarshal(txt, &msg)
if err != nil {
log.Fatal(err)
}
for _, c := range msg {
fmt.Printf("%s\n", string(c))
}
}
Note that the redundant white space in the example separating the key/value pairs is intentional: you will see that these are preserved in the output.
Alternatively, even if you don't care about the exact structure, you can still dynamically poke at it by using an interface{} variable. See the JSON and Go document for a running example of this, under the Generic JSON with interface{} section.
If we are trying to do something like a streaming approach, we may attempt to do something custom with the io.Reader. The JSON parser assumes you can represent everything in memory at once. That assumption may not hold in your situation, so we have to break a few things.
Perhaps we might manually consume bytes in the io.Reader till we eat the leading [ character, and then repeatedly call json.Decode on the rest of the io.Reader. Something like this:
package main
import (
"bytes"
"encoding/json"
"fmt"
"io"
"log"
)
func main() {
var txt io.Reader = bytes.NewBufferString(`
[
{"key1" : "value1" },
{"key2" : "value2" }
]`)
buf := make([]byte, 1)
for {
_, err := txt.Read(buf)
if err != nil {
log.Fatal(err)
}
if buf[0] == '[' {
break
}
}
for {
decoder := json.NewDecoder(txt)
msg := json.RawMessage{}
err := decoder.Decode(&msg)
if err != nil {
break
}
fmt.Printf("I see: %s\n", string(msg))
txt = decoder.Buffered()
for {
_, err := txt.Read(buf)
if err != nil {
log.Fatal(err)
}
if buf[0] == ',' || buf[0] == ']' {
break
}
}
}
}
This code is severely kludgy and non-obvious. I also don't think it's a good idea. If you have to deal with this in a streaming fashion, then JSON is likely not a good serialization format for this scenario. If you have control over the input, then you should consider changing it so it's more amendable to a streaming approach: hacks like what we're doing here are a bad smell that the input is in the wrong shape.
Here is what I was thinking as a solution, does this look sane?
package main
import (
"encoding/csv"
"fmt"
"os"
"bytes"
"flag"
"github.com/parnurzeal/gorequest"
)
func process_line(headers []string, line []string) {
var comma string = ""
var buffer bytes.Buffer
buffer.WriteString("[{")
for i := range headers {
buffer.WriteString(fmt.Sprintf("%s\"%s\":\"%s\"", comma, headers[i], line[i]))
comma = ","
}
fmt.Fprintf(&buffer,"}]\n")
request := gorequest.New()
resp, body, errs := request.Post("www.something.com").
Set("Content-Type", "application/json").
Set("Accept", "application/json").
Send(buffer.String()).End()
if errs == nil {
return resp
}else{
fmt.Println(errs)
}
}
func main() {
file := flag.String("file", "", "Filename?")
flag.Parse()
if *file == "" {
fmt.Println("No file specified. :-(")
os.Exit(1)
}
csvFile, err := os.Open(*file)
if err != nil {
fmt.Println(err)
}
defer csvFile.Close()
reader := csv.NewReader(csvFile)
var i int = 0
var headers []string
for {
line, err := reader.Read()
if err != nil {
break
}
if i == 0 {
headers = line
}else{
go process_line(headers, line)
}
if i%100 == 0 {
fmt.Printf("%v records processed.\n", i)
}
i += 1
}
}

Don't read unneeded JSON key-values into memory

I have a JSON file with a single field that takes a huge amount of space when loaded into memory. The other fields are reasonable, but I'm trying to take care not to load that particular field unless I absolutely have to.
{
"Field1": "value1",
"Field2": "value2",
"Field3": "a very very long string that potentially takes a few GB of memory"
}
When reading that file into memory, I'd want to ignore Field3 (because loading it could crash my app). Here's some code that I would assume does that because it uses io streams rather than passing a []byte type to the Unmarshal command.
package main
import (
"encoding/json"
"os"
)
func main() {
type MyStruct struct {
Field1 string
Field2 string
}
fi, err := os.Open("myJSONFile.json")
if err != nil {
os.Exit(2)
}
// create an instance and populate
var mystruct MyStruct
err = json.NewDecoder(fi).Decode(&mystruct)
if err != nil {
os.Exit(2)
}
// do some other stuff
}
The issue is that the built-in json.Decoder type reads the entire file into memory on Decode before throwing away key-values that don't match the struct's fields (as has been pointed out on StackOverflow before: link).
Are there any ways of decoding JSON in Go without keeping the entire JSON object in memory?
You could write a custom io.Reader that you feed to json.Decoder and that will pre-read your json file and skip that specific field.
The other option is to write your own decoder, more complicated and messy.
//edit it seemed like a fun exercise, so here goes:
type IgnoreField struct {
io.Reader
Field string
buf bytes.Buffer
}
func NewIgnoreField(r io.Reader, field string) *IgnoreField {
return &IgnoreField{
Reader: r,
Field: field,
}
}
func (iF *IgnoreField) Read(p []byte) (n int, err error) {
if n, err = iF.Reader.Read(p); err != nil {
return
}
s := string(p)
fl := `"` + iF.Field + `"`
if i := strings.Index(s, fl); i != -1 {
l := strings.LastIndex(s[0:i], ",")
if l == -1 {
l = i
}
iF.buf.WriteString(s[0:l])
s = s[i+1+len(fl):]
i = strings.Index(s, `"`)
if i != -1 {
s = s[i+1:]
}
for {
i = strings.Index(s, `"`) //end quote
if i != -1 {
s = s[i+1:]
fmt.Println("Skipped")
break
} else {
if n, err = iF.Reader.Read(p); err != nil {
return
}
s = string(p)
}
}
iF.buf.WriteString(s)
}
ln := iF.buf.Len()
if ln >= len(p) {
tmp := iF.buf.Bytes()
iF.buf.Reset()
copy(p, tmp[0:len(p)])
iF.buf.Write(p[len(p):])
ln = len(p)
} else {
copy(p, iF.buf.Bytes())
iF.buf.Reset()
}
return ln, nil
}
func main() {
type MyStruct struct {
Field1 string
Field2 string
}
fi, err := os.Open("myJSONFile.json")
if err != nil {
os.Exit(2)
}
// create an instance and populate
var mystruct MyStruct
err := json.NewDecoder(NewIgnoreField(fi, "Field3")).Decode(&mystruct)
if err != nil {
fmt.Println(err)
}
fmt.Println(mystruct)
}
playground