Sails.js associations - mysql

I am beginning with sails.js and I am completely lost with my sql queries.
I have the following tables :
genres
+-----------+--------------+------+-----+
| Field | Type | Null | Key |
+-----------+--------------+------+-----+
| id | int(6) | NO | PRI |
| name | varchar(100) | NO | |
| slug | varchar(255) | NO | |
| type | varchar(32) | NO | |
| parent_id | int(11) | YES | MUL |
+-----------+--------------+------+-----+
genres_radios
+----------+--------+------+-----+
| Field | Type | Null | Key |
+----------+--------+------+-----+
| genre_id | int(6) | NO | MUL |
| radio_id | int(6) | NO | MUL |
+----------+--------+------+-----+
radios
+-----------+--------------+------+-----+
| Field | Type | Null | Key |
+-----------+--------------+------+-----+
| id | int(5) | NO | PRI |
| name | varchar(100) | NO | |
| slug | varchar(100) | NO | |
| url | varchar(100) | NO | |
+-----------+--------------+------+-----+
I want to retrieve the radios and their associated genres. I managed to do it using the Model.query("Select * FROM ...") but I'd like to do it using the populate method. I had a look at the docs, but I am a bit confused with the "via", "through", ...

Well if you've followed the Sails.js Model documentation and the many-many association docs your models should look something like:
// api/models/genre.js
module.exports = {
attributes : {
name : {
type: 'string'
},
slug : {
type: 'string'
},
type : {
type: 'string'
},
radios : {
collection: 'radio',
via: 'genres'
}
}
}
// api/models/radio.js
module.exports = {
attributes : {
name : {
type: 'string'
},
slug : {
type: 'string'
},
url : {
type: 'string'
},
genres : {
collection: 'genre',
via: 'radios'
}
}
}
The many-many lookup table will be created for you internally by waterline. All you need to get the genres for your radio is populate the "genres" attribute.
Radio.findOne({name:"RadioName"}).populate("genres").then(function(radio){
console.log(radio); //radio.genres will have all the genres associated with this radio.
})
I really do recommend looking at the many-many association docs. They have exactly what you need.

This should do it :
// api/models/Genres.js
module.exports = {
attributes : {
name : {
type: 'string'
},
slug : {
type: 'string'
},
type : {
type: 'string'
},
radios : {
collection: 'Radios',
through: 'genres_radios'
}
}
}
// api/models/Radios.js
module.exports = {
attributes : {
name : {
type: 'string'
},
slug : {
type: 'string'
},
url : {
type: 'string'
},
genres : {
collection: 'genre',
through: 'genres_radios'
}
}
}
// api/models/Genres_radios.js
module.exports = {
attributes = {
'Genre_id': {
columnName:'genre_id',
type:'integer',
foreignKey:'true',
references:'genres',
on:'id',
via:'genres'
},
'Radio_id': {
columnName:'radio_id',
type:'integer',
foreignKey:'true',
references:'radios',
on:'id',
via:'radios'
}
}
}
And then you can make the following request :
Radio.findOne({name:"RadioName"}).populate("genres").then(function(radio){
console.log(radio);
})

Related

Access a nested array of objects in nodejs, mysql and json

I have these tables:
CREATE TABLE Progress_Category (
categoryId int(4) AUTO_INCREMENT NOT NULL,
name varchar(150) NOT NULL,
PRIMARY KEY (categoryId)
);
CREATE TABLE Progress_Skill (
skillId int(4) AUTO_INCREMENT NOT NULL,
name varchar(150) NOT NULL,
currentProgress int NOT NULL,
`25` varchar(300) NOT NULL,
`50` varchar(300) NOT NULL,
`75` varchar(300) NOT NULL,
`100` varchar(300) NOT NULL,
categoryId int(4) NOT NULL,
PRIMARY KEY (skillId),
CONSTRAINT Constr_Progress_Skill_Skill_fk FOREIGN KEY Skill_fk (categoryId) REFERENCES Progress_Category(categoryId) ON DELETE CASCADE ON UPDATE CASCADE
);
CREATE TABLE Progress_Message (
messageId int(4) AUTO_INCREMENT NOT NULL,
message varchar(500) NOT NULL,
messageDate DATE NOT NULL,
skillId int(4) NOT NULL,
PRIMARY KEY (messageId),
CONSTRAINT Constr_Progress_Message_Message_fk FOREIGN KEY Message_fk (skillId) REFERENCES Progress_Skill(skillId) ON DELETE CASCADE ON UPDATE CASCADE
);
I have this query to retrieve all the data in a table:
SELECT *
FROM Progress_Category AS pcat
LEFT JOIN Progress_Skill AS ps
ON pcat.categoryId = ps.catParentId
LEFT JOIN Progress_Message AS pm
ON ps.skillId = pm.skillParentId
For each skill of a category a new row of category will be created, with the respected skill. For each message of a skill a new row with the category and the skill will be created, with the respected message.
Query result:
+------------+-----------+---------+-----------+-----------------+------+-------+--------+-------+-------------+-----------+-------------------------+-------------+---------------+
| categoryId | catname | skillId | skillname | currentProgress | 25 | 50 | 75 | 100 | catParentId | messageId | message | messageDate | skillParentId |
+------------+-----------+---------+-----------+-----------------+------+-------+--------+-------+-------------+-----------+-------------------------+-------------+---------------+
| 1 | Languages | 1 | Spanish | 100 | Read | Write | Listen | Speak | 1 | 1 | Native language | 2022-08-27 | 1 |
| 1 | Languages | 2 | English | 85 | Read | Write | Listen | Speak | 1 | 2 | Learning since 2016 | 2022-08-27 | 2 |
| 1 | Languages | 2 | English | 85 | Read | Write | Listen | Speak | 1 | 3 | Can speak almost fluent | 2022-08-27 | 2 |
| 2 | Projects | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL |
| 3 | Ideas | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL |
+------------+-----------+---------+-----------+-----------------+------+-------+--------+-------+-------------+-----------+-------------------------+-------------+---------------+
5 rows in set (0.001 sec)
In nodejs I use that query and the following code:
connection.query(myquery, function(err, results, fields) {
if (err) {
console.log('----> Error with MySQL query in /api/showProgress: ' + err.message);
}
else{
console.log('Query successful, results are being displayed.');
var categories = [];
for (let category in results) {
if(categories.length > 0){
for(let key in categories){
if(results[category].categoryId !== categories[key].Category.Id){
console.log("Category Id: " + results[category].categoryId + " Id already in the array: " + categories[key].Category.Id);
categories.push({
"Category" : [{
"Id" : results[category].categoryId,
"Name" : results[category].catname
}]
});
}
}
}
}
else{
categories.push({
"Category" : [{
"Id" : results[category].categoryId,
"Name" : results[category].catname
}]
})
}
}
response.send({"My progress" : categories});
});
The result I get:
Query successful, results are being displayed.
Category Id: 1 Id already in the array: undefined
Category Id: 1 Id already in the array: undefined
Category Id: 1 Id already in the array: undefined
Category Id: 2 Id already in the array: undefined
Category Id: 2 Id already in the array: undefined
Category Id: 2 Id already in the array: undefined
Category Id: 2 Id already in the array: undefined
Category Id: 3 Id already in the array: undefined
Category Id: 3 Id already in the array: undefined
Category Id: 3 Id already in the array: undefined
Category Id: 3 Id already in the array: undefined
Category Id: 3 Id already in the array: undefined
Category Id: 3 Id already in the array: undefined
Category Id: 3 Id already in the array: undefined
Category Id: 3 Id already in the array: undefined
So the problem is categories[key].Category.Id. I don't know how to access the property Id that belong to Category that is in the array.
The final idea is use that if so only one category is shown with an array of skills instead of a category, skill, same category, other skill:
Current:
{"My progress":[
{
"Category":[{
"Id":1,
"Name":"Languages",
"Skill":"asd"
}]
},
{
"Category":[{
"Id":1,
"Name":"Languages",
"Skill":"fgh"
}]
},
{
"Category":[{
"Id":1,
"Name":"Languages",
"Skill":"ijk"
}]
},
]}
Expected:
{"My progress":[
{
"Category":[
{
"Id":1,
"Name":"Languages",
"Skills":[{
"Name":"asd",
"Name":"fgh",
"Name":"ijk"
}]
},
{
"Id":2,
"Name":"Projects",
"Skills":[{
"Name":"123",
"Name":"456",
"Name":"789"
}]
}
]
}
]}
Got the expected result, changing almost everything:
{"My skills":[
{
"categoryId":1,
"CategoryName":"Web development",
"Subcategories":[
{
"parentId":1,
"subcategoryId":1,
"SubcategoryName":"Frontend",
"Skills":[
"Sass",
"Css",
"Bootstrap",
"Figma"
]
},
{
"parentId":1,
"subcategoryId":2,
"SubcategoryName":"Backend",
"Skills":[
"Nodejs",
"Express",
"MySQL",
"PHP"
]
}
]
},
{
"categoryId":2,
"CategoryName":"Cybersecurity",
"Subcategories":[
{
"parentId":2,
"subcategoryId":3,
"SubcategoryName":"Red team",
"Skills":[
"curl",
"Sherlock",
"Wappalyzer",
"Burpsuite"
]
},
{
"parentId":2,
"subcategoryId":4,
"SubcategoryName":"Blue team",
"Skills":[
"Cloudfare"
]
}
]
}
]}
Nodejs code:
connection.query(myquery, function(err, results, fields) {
if (err) {
console.log('----> Error with MySQL query in /api/showSkills: ' + err.message);
}
else{
console.log('Query successful, results are being displayed.');
var mylist = [];
var subcat = [];
var lastPushedId = 0;
for (let key in results){
if(lastPushedId !== results[key].categoryId){
for (let otherkey in results){
if(results[otherkey].subcatParentId === results[key].categoryId){
subcat.push({
'parentId': results[otherkey].subcatParentId,
'subcategoryId': results[otherkey].subcategoryId,
'SubcategoryName': results[otherkey].subcatname,
'Skills': results[otherkey].skills.split(',')
});
}
}
mylist.push({
'categoryId': results[key].categoryId,
'CategoryName': results[key].catname,
'Subcategories': subcat
});
subcat = [];
lastPushedId = results[key].categoryId;
}
}
response.send({"My skills" : mylist});
}
});

Using Power Query, how can I convert a JSON file that contains records of different data types into multiple tables, one for each datatype?

For example, I have the following JSON file:
[
{
"DataType":"DataType_A",
"JSON":"{\"x\":\"0\", \"y\":\"1\", \"z\":\"2\"}"
},
{
"DataType":"DataType_A",
"JSON":"{\"x\":\"1\", \"y\":\"3\", \"z\":\"0\"}"
},
{
"DataType":"DataType_B",
"JSON":"{\"Name\":\"steve\", \"Id\":\"4b\"}"
},
{
"DataType":"DataType_B",
"JSON":"{\"Name\":\"andy\", \"Id\":\"7c\"}"
},
{
"DataType":"DataType_C",
"JSON":"{\"Address\":\"123 Anywhere St.\", \"Town\":\"Springfield\"}"
},
{
"DataType":"DataType_C",
"JSON":"{\"Address\":\"1400 Another Rd.\", \"Town\":\"Anytown\"}"
}
]
I can import the file via the Get Data > From JSON function, resulting in this table:
| DataType | JSON |
| DataType_A | {"x":"0", "y":"1", "z":"2"} |
| DataType_A | {"x":"1", "y":"3", "z":"0"} |
| DataType_B | {"Name":"steve", "Id":"4b"} |
| DataType_B | {"Name":"andy", "Id":"7c"} |
| DataType_C | {"Address":"123 Anywhere St.", "Town":"Springfield" } |
| DataType_C | {"Address":"1400 Another Rd.", "Town":"Anytown" } |
How do I go from the table I have to 3 tables, one for each data type?

How to make nested JSON response in Go?

I am new in Go and need some help.
In my PostgreSQL database I have 4 table. They called: surveys, questions, options and surveys_questions_options.
They looks like this:
surveys table:
| survey_id (uuid4) | survey_name (varchar) |
|--------------------------------------|-----------------------|
| 0cf1cf18-d5fd-474e-a8be-754fbdc89720 | April |
| b9fg55d9-n5fy-s7fe-s5bh-856fbdc89720 | May |
questions table:
| question_id (int) | question_text (text) |
|-------------------|------------------------------|
| 1 | What is your favorite color? |
options table:
| option_id (int) | option_text (text) |
|-------------------|--------------------|
| 1 | red |
| 2 | blue |
| 3 | grey |
| 4 | green |
| 5 | brown |
surveys_questions_options table combines data from all three previous tables:
| survey_id | question_id | option_id |
|--------------------------------------|-------------|-----------|
| 0cf1cf18-d5fd-474e-a8be-754fbdc89720 | 1 | 1 |
| 0cf1cf18-d5fd-474e-a8be-754fbdc89720 | 1 | 2 |
| 0cf1cf18-d5fd-474e-a8be-754fbdc89720 | 1 | 3 |
| b9fg55d9-n5fy-s7fe-s5bh-856fbdc89720 | 1 | 3 |
| b9fg55d9-n5fy-s7fe-s5bh-856fbdc89720 | 1 | 4 |
| b9fg55d9-n5fy-s7fe-s5bh-856fbdc89720 | 1 | 5 |
How can I make nested JSON response in Go? I use GORM library. I want a JSON response like this:
[
{
"survey_id": "0cf1cf18-d5fd-474e-a8be-754fbdc89720",
"survey_name": "April",
"questions": [
{
"question_id": 1,
"question_text": "What is your favorite color?",
"options": [
{
"option_id": 1,
"option_text": "red"
},
{
"option_id": 2,
"option_text": "blue"
},
{
"option_id": 3,
"option_text": "grey"
},
]
}
]
},
{
"survey_id": "b9fg55d9-n5fy-s7fe-s5bh-856fbdc89720",
"survey_name": "May",
"questions": [
{
"question_id": 1,
"question_text": "What is your favorite color?",
"options": [
{
"option_id": 3,
"option_text": "grey"
},
{
"option_id": 4,
"option_text": "green"
},
{
"option_id": 5,
"option_text": "brown"
},
]
}
]
}
]
My models looks like this:
type Survey struct {
SurveyID string `gorm:"primary_key" json:"survey_id"`
SurveyName string `gorm:"not null" json:"survey_name"`
Questions []Question
}
type Question struct {
QuestionID int `gorm:"primary_key" json:"question_id"`
QuestionText string `gorm:"not null;unique" json:"question_text"`
Options []Option
}
type Option struct {
OptionID int `gorm:"primary_key" json:"option_id"`
OptionText string `gorm:"not null;unique" json:"option_text"`
}
I'm not sure abour GORM part, but with JSON you need to add struct tags on the nested objects as well:
type Survey struct {
...
Questions []Question `json:"questions"`
}
type Question struct {
...
Options []Option `json:"options"`
}
We're missing some scope from your code, and so it's quite hard to point you in the right direction. Are you asking about querying GORM so you get []Survey, or are you asking about marshalling []Survey? Anyway, you should add the tag to Questions too, as slomek replied.
However, try this:
To fetch nested data in m2m relation
type Survey struct {
gorm.Model
SurveyID string `gorm:"primary_key" json:"survey_id"`
SurveyName string `gorm:"not null" json:"survey_name"`
Questions []*Question `gorm:"many2many:survey_questions;"`
}
surveys := []*model.Survey{}
db := dbSession.Where(&model.Survey{SurveyID: id}).Preload("Questions").Find(&surveys)

Deserialize JSON with nested categories

My Controller Class:
public function postAction(Request $request)
{
$content = $request->getContent();
$category = $this->get('jms_serializer')->deserialize($content,'AppBundle\Entity\Category','json');
$errors = $this->get('validator')->validate($category);
if (count($errors) > 0) {
return new View("NAME LENGTH MUST BE >4",Response::HTTP_BAD_REQUEST);
} else {
$em = $this->getDoctrine()->getManager();
$em->persist($category);
$em->flush();
return new View($category, Response::HTTP_OK);
}
}
Entity:
class Category
{
private $id;
private $parent;
public function getChildren()
{
return $this->children;
}
private $children;
public function __construct()
{
$this->children = new ArrayCollection();
}
//setters and getters
Doctrine.yml:
AppBundle\Entity\Category:
type: entity
oneToMany:
children:
targetEntity: AppBundle\Entity\Category
mappedBy: parent
orderBy:
name: ASC
manyToOne:
parent:
targetEntity: AppBundle\Entity\Category
inversedBy: children
joinColumn:
name: parentId
referencedColumn: id
table: category
repositoryClass: AppBundle\Repository\CategoryRepository
id:
id:
column: id
type: integer
id: true
generator:
strategy: AUTO
fields:
name:
type: string
lenght: 255
When I send POST json request like this:
{
"name": "Child to 8",
"parentId": "8"
}
In MySQL table i do not recieve parentId:
mysql> select * from category;
+----+--------------------+----------+
| id | name | parentId |
+----+--------------------+----------+
| 1 | Primary Category | NULL |
| 2 | Secondary Category | 1 |
| 3 | D_child | 1 |
| 4 | F_child | 1 |
| 5 | Z_child | 1 |
| 6 | Y_child | 1 |
| 7 | H_child | 1 |
| 8 | A_child | 1 |
| 9 | Child to 8 | NULL |<----- must be 8
+----+--------------------+----------+
But after deserialization i receive this:
{
"id": 9,
"name": "Child to 8"
}
I understand that id is an integer, but parentId is already an object of class Category. But how to make it so that he also signed up?
How can i do this? Maybe I do not understand something ...
You need to have a .yml config file for serializer. In your case - Entity.Category.yml.
In this file add property of nested entities, set him a type of you Entity and for be sure accessors (setter, getter).

Load multi-line JSON data into HIVE table

I have a JSON data which is a multi-line JSON. I have created a hive table to load that data into it. I have another JSON which is a single-line JSON record. When I load the single-line JSON record to its hive table and try to query, it works fine. But when I load the multi-line JSON into its hive table, it gives below exception:
Failed with exception java.io.IOException:org.apache.hadoop.hive.serde2.SerDeExcep‌​tion: org.codehaus.jackson.JsonParseException: Unexpected end-of-input: expected close marker for OBJECT (from [Source: java.io.ByteArrayInputStream#8b89b3a; line: 1, column: 0]) at [Source: java.io.ByteArrayInputStream#8b89b3a; line: 1, column: 3]
Below is my JSON data:
{
"uploadTimeStamp" : "1486631318873",
"PDID" : "123",
"data" : [ {
"Data" : {
"unit" : "rpm",
"value" : "0"
},
"EventID" : "E1",
"PDID" : "123",
"Timestamp" : 1486631318873,
"Timezone" : 330,
"Version" : "1.0",
"pii" : { }
}, {
"Data" : {
"heading" : "N",
"loc3" : "false",
"loc" : "14.022425",
"loc1" : "78.760587",
"loc4" : "false",
"speed" : "10"
},
"EventID" : "E2",
"PDID" : "123",
"Timestamp" : 1486631318873,
"Timezone" : 330,
"Version" : "1.1",
"pii" : { }
}, {
"Data" : {
"x" : "1.1",
"y" : "1.2",
"z" : "2.2"
},
"EventID" : "E3",
"PDID" : "123",
"Timestamp" : 1486631318873,
"Timezone" : 330,
"Version" : "1.0",
"pii" : { }
}, {
"EventID" : "E4",
"Data" : {
"value" : "50",
"unit" : "percentage"
},
"Version" : "1.0",
"Timestamp" : 1486631318873,
"PDID" : "123",
"Timezone" : 330
}, {
"Data" : {
"unit" : "kmph",
"value" : "70"
},
"EventID" : "E5",
"PDID" : "123",
"Timestamp" : 1486631318873,
"Timezone" : 330,
"Version" : "1.0",
"pii" : { }
} ]
}
I am using /hive/lib/hive-hcatalog-core-0.13.0.jar
Below is my create table statement:
create table test7(
uploadtime bigint,
pdid string,
data array<
struct<Data:struct<
unit:string,
value:int>,
eventid:string,
pdid:bigint,
time:bigint,
timezone:int,
version:int,
pii:struct<pii:string>>,
struct<Data:struct<
heading:string,
Location:string,
latitude:bigint,
longitude:bigint,
Location2:string,
speed:int>,
eventid:string,
pdid:bigint,
time:bigint,
timezone:int,
version:int,
pii:struct<pii:string>>,
struct<Data:struct<
unit:string,
value:int>,
eventid:string,
pdid:bigint,
time:bigint,
timezone:int,
version:int,
pii:struct<pii:string>>,
struct<Data:struct<
x:int,
y:int,
z:int>,
eventid:string,
pdid:bigint,
time:bigint,
timezone:int,
version:int,
pii:struct<pii:string>>,
struct<Data:struct<
heading:string,
loc3:string,
latitude:bigint,
longitude:bigint,
loc4:string,
speed:int>,
eventid:string,
pdid:bigint,
time:bigint,
timezone:int,
version:int,
pii:struct<pii:string>>
>
)
ROW FORMAT SERDE
'org.apache.hive.hcatalog.data.JsonSerDe'
STORED AS TEXTFILE
LOCATION
'/xyz/abc/';
Edit:
Adding the single line JSON and new table create stmt with error:
{"uploadTimeStamp":"1487183800905","PDID":"123","data":[{"Data":{"unit":"rpm","value":"0"},"EventID":"event1","PDID":"123","Timestamp":1487183800905,"Timezone":330,"Version":"1.0","pii":{}},{"Data":{"heading":"N","loc1":"false","latitude":"16.032425","longitude":"80.770587","loc2":"false","speed":"10"},"EventID":"event2","PDID":"123","Timestamp":1487183800905,"Timezone":330,"Version":"1.1","pii":{}},{"Data":{"x":"1.1","y":"1.2","z":"2.2"},"event3":"AccelerometerInfo","PDID":"123","Timestamp":1487183800905,"Timezone":330,"Version":"1.0","pii":{}},{"EventID":"event4","Data":{"value":"50","unit":"percentage"},"Version":"1.0","Timestamp":1487183800905,"PDID":"123","Timezone":330},{"Data":{"unit":"kmph","value":"70"},"EventID":"event5","PDID":"123","Timestamp":1487183800905,"Timezone":330,"Version":"1.0","pii":{}}]}
create table test1(
uploadTimeStamp string,
PDID string,
data array<struct<
Data:struct<unit:string,value:int>,
EventID:string,
PDID:string,
TimeS:bigint,
Timezone:int,
Version:float,
pii:struct<>>,
struct<
Data:struct<heading:string,loc1:string,latitude:double,longitude:double,loc2:string,speed:int>,
EventID:string,
PDID:string,
TimeS:bigint,
Timezone:int,
Version:float,
pii:struct<>>,
struct<
Data:struct<x:float,y:float,z:float>,
EventID:string,
PDID:string,
TimeS:bigint,
Timezone:int,
Version:float,
pii:struct<>>,
struct<
EventID:string,
Data:struct<value:int,unit:percentage>,
Version:float,
TimeS:bigint,
PDID:string,
Timezone:int>,
struct<
Data:struct<unit:string,value:int>,
EventID:string,
PDID:string,
TimeS:bigint,
Timezone:int,
Version:float,
pii:struct<>>
>
ROW FORMAT SERDE
'org.apache.hive.hcatalog.data.JsonSerDe'
STORED AS TEXTFILE
LOCATION
'/ABC/XYZ/';
MismatchedTokenException(320!=313)
...
...
...
FAILED: ParseException line 11:10 mismatched input '<>' expecting < near 'struct' in struct type
Sample data
{"uploadTimeStamp":"1486631318873","PDID":"123","data":[{"Data":{"unit":"rpm","value":"0"},"EventID":"E1","PDID":"123","Timestamp":1486631318873,"Timezone":330,"Version":"1.0","pii":{}},{"Data":{"heading":"N","loc3":"false","loc":"14.022425","loc1":"78.760587","loc4":"false","speed":"10"},"EventID":"E2","PDID":"123","Timestamp":1486631318873,"Timezone":330,"Version":"1.1","pii":{}},{"Data":{"x":"1.1","y":"1.2","z":"2.2"},"EventID":"E3","PDID":"123","Timestamp":1486631318873,"Timezone":330,"Version":"1.0","pii":{}},{"EventID":"E4","Data":{"value":"50","unit":"percentage"},"Version":"1.0","Timestamp":1486631318873,"PDID":"123","Timezone":330},{"Data":{"unit":"kmph","value":"70"},"EventID":"E5","PDID":"123","Timestamp":1486631318873,"Timezone":330,"Version":"1.0","pii":{}}]}
add jar /usr/lib/hive-hcatalog/share/hcatalog/hive-hcatalog-core.jar
create external table myjson
(
uploadTimeStamp string
,PDID string
,data array
<
struct
<
Data:struct
<
unit:string
,value:string
,heading:string
,loc3:string
,loc:string
,loc1:string
,loc4:string
,speed:string
,x:string
,y:string
,z:string
>
,EventID:string
,PDID:string
,`Timestamp`:bigint
,Timezone:smallint
,Version:string
,pii:struct<dummy:string>
>
>
)
row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
stored as textfile
location '/tmp/myjson'
;
select * from myjson
;
+------------------------+-------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| myjson.uploadtimestamp | myjson.pdid | myjson.data |
+------------------------+-------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| 1486631318873 | 123 | [{"data":{"unit":"rpm","value":"0","heading":null,"loc3":null,"loc":null,"loc1":null,"loc4":null,"speed":null,"x":null,"y":null,"z":null},"eventid":"E1","pdid":"123","timestamp":1486631318873,"timezone":330,"version":"1.0","pii":{"dummy":null}},{"data":{"unit":null,"value":null,"heading":"N","loc3":"false","loc":"14.022425","loc1":"78.760587","loc4":"false","speed":"10","x":null,"y":null,"z":null},"eventid":"E2","pdid":"123","timestamp":1486631318873,"timezone":330,"version":"1.1","pii":{"dummy":null}},{"data":{"unit":null,"value":null,"heading":null,"loc3":null,"loc":null,"loc1":null,"loc4":null,"speed":null,"x":"1.1","y":"1.2","z":"2.2"},"eventid":"E3","pdid":"123","timestamp":1486631318873,"timezone":330,"version":"1.0","pii":{"dummy":null}},{"data":{"unit":"percentage","value":"50","heading":null,"loc3":null,"loc":null,"loc1":null,"loc4":null,"speed":null,"x":null,"y":null,"z":null},"eventid":"E4","pdid":"123","timestamp":1486631318873,"timezone":330,"version":"1.0","pii":null},{"data":{"unit":"kmph","value":"70","heading":null,"loc3":null,"loc":null,"loc1":null,"loc4":null,"speed":null,"x":null,"y":null,"z":null},"eventid":"E5","pdid":"123","timestamp":1486631318873,"timezone":330,"version":"1.0","pii":{"dummy":null}}] |
+------------------------+-------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
select j.uploadTimeStamp
,j.PDID
,d.val.EventID
,d.val.PDID
,d.val.`Timestamp`
,d.val.Timezone
,d.val.Version
,d.val.Data.unit
,d.val.Data.value
,d.val.Data.heading
,d.val.Data.loc3
,d.val.Data.loc
,d.val.Data.loc1
,d.val.Data.loc4
,d.val.Data.speed
,d.val.Data.x
,d.val.Data.y
,d.val.Data.z
from myjson j
lateral view explode (data) d as val
;
+-------------------+--------+---------+------+---------------+----------+---------+------------+-------+---------+-------+-----------+-----------+-------+-------+------+------+------+
| j.uploadtimestamp | j.pdid | eventid | pdid | timestamp | timezone | version | unit | value | heading | loc3 | loc | loc1 | loc4 | speed | x | y | z |
+-------------------+--------+---------+------+---------------+----------+---------+------------+-------+---------+-------+-----------+-----------+-------+-------+------+------+------+
| 1486631318873 | 123 | E1 | 123 | 1486631318873 | 330 | 1.0 | rpm | 0 | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL |
| 1486631318873 | 123 | E2 | 123 | 1486631318873 | 330 | 1.1 | NULL | NULL | N | false | 14.022425 | 78.760587 | false | 10 | NULL | NULL | NULL |
| 1486631318873 | 123 | E3 | 123 | 1486631318873 | 330 | 1.0 | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | 1.1 | 1.2 | 2.2 |
| 1486631318873 | 123 | E4 | 123 | 1486631318873 | 330 | 1.0 | percentage | 50 | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL |
| 1486631318873 | 123 | E5 | 123 | 1486631318873 | 330 | 1.0 | kmph | 70 | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL |
+-------------------+--------+---------+------+---------------+----------+---------+------------+-------+---------+-------+-----------+-----------+-------+-------+------+------+------+
Was having the same issue, then decided to create a custom input format which can extract the multiline(pretty print) json records.
This JsonRecordReader can read a multiline JSON record in Hive. It is extracting the record based on balancing of curly braces - { and }. So the content between first '{' to the balanced last '}' is considered as one complete record. Below is the code snippet:
public static class JsonRecordReader implements RecordReader<LongWritable, Text> {
public static final String START_TAG_KEY = "jsoninput.start";
public static final String END_TAG_KEY = "jsoninput.end";
private byte[] startTag = "{".getBytes();
private byte[] endTag = "}".getBytes();
private long start;
private long end;
private FSDataInputStream fsin;
private final DataOutputBuffer buffer = new DataOutputBuffer();
public JsonRecordReader(FileSplit split, JobConf jobConf) throws IOException {
// uncomment the below lines if you need to get the configuration
// from JobConf:
// startTag = jobConf.get(START_TAG_KEY).getBytes("utf-8");
// endTag = jobConf.get(END_TAG_KEY).getBytes("utf-8");
// open the file and seek to the start of the split:
start = split.getStart();
end = start + split.getLength();
Path file = split.getPath();
FileSystem fs = file.getFileSystem(jobConf);
fsin = fs.open(split.getPath());
fsin.seek(start);
}
#Override
public boolean next(LongWritable key, Text value) throws IOException {
if (fsin.getPos() < end) {
AtomicInteger count = new AtomicInteger(0);
if (readUntilMatch(false, count)) {
try {
buffer.write(startTag);
if (readUntilMatch(true, count)) {
key.set(fsin.getPos());
// create json record from buffer:
String jsonRecord = new String(buffer.getData(), 0, buffer.getLength());
value.set(jsonRecord);
return true;
}
} finally {
buffer.reset();
}
}
}
return false;
}
#Override
public LongWritable createKey() {
return new LongWritable();
}
#Override
public Text createValue() {
return new Text();
}
#Override
public long getPos() throws IOException {
return fsin.getPos();
}
#Override
public void close() throws IOException {
fsin.close();
}
#Override
public float getProgress() throws IOException {
return ((fsin.getPos() - start) / (float) (end - start));
}
private boolean readUntilMatch(boolean withinBlock, AtomicInteger count) throws IOException {
while (true) {
int b = fsin.read();
// end of file:
if (b == -1)
return false;
// save to buffer:
if (withinBlock)
buffer.write(b);
// check if we're matching start/end tag:
if (b == startTag[0]) {
count.incrementAndGet();
if (!withinBlock) {
return true;
}
} else if (b == endTag[0]) {
count.getAndDecrement();
if (count.get() == 0) {
return true;
}
}
// see if we've passed the stop point:
if (!withinBlock && count.get() == 0 && fsin.getPos() >= end)
return false;
}
}
}
This input format can be used along with the JSON Serde supplied by hive to read the multiline JSON file.
CREATE TABLE books (id string, bookname string, properties struct<subscription:string, unit:string>) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' STORED AS INPUTFORMAT 'JsonInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
The working code with samples is here: https://github.com/unayakdev/hive-json