Twitter bootstrap json forms arrangement - html

I have been trying to create forms with json schema using various libraries like json form by Josh factory and alpaca and so far I can produce the forms I want. However, the forms are all arranged in a vertical manner and one cannot easily change where the individual form fields are rendered.
I have tried alpaca templates to arrange my form fields but does not work for some fields like textarea.
Is there a library in bootstrap that I can use to generate forms from json and have them styled and arranged via bootstrap css?.

Why not use the strength of both libraries? Bootstrap rocks at laying out responsive grids & alpaca is awesome at turning JSON into forms.
Knock up a bootstrap layout and use Alpaca's layout binding abilities to set where the fields should be rendered.
$(document).ready(function () {
$("#CustomFormLayout").alpaca({
"data": {
"Picture": "http://www.alpacajs.org/assets/themes/dbyll/images/alpaca-icon.png"
},
"schema": {
"type": "object",
"properties": {
"Love": {
"required": true,
"enum": ["I like goat", "I like lama", "I like alpaca"]
},
"LoveFactor": {
"required": true,
"type": "number",
"minimum": 1,
"maximum": 10
},
"Picture": {
"type": "string"
}
},
"dependencies": {
"LoveFactor": ["Love"]
}
},
"options": {
"type": "object",
"fields": {
"Love": {
"type": "select",
"label": "Which animal do you love?",
"noneLabel": "-- Please Select an Animal --",
"removeDefaultNone": false
},
"LoveFactor": {
"label": "How much love does this animal get?",
},
"Picture": {
"type": "image",
"view": "bootstrap-display"
}
}
},
"view": {
"parent": "bootstrap-create",
"layout": {
"template": `
<div class='row'>
<div class ="col-xs-12">
<div data-alpaca-layout-binding='Love' class ='col-xs-6'></div>
<div data-alpaca-layout-binding='LoveFactor' class ='col-xs-6'></div>
</div>
<div class ="col-xs-12">
<div data-alpaca-layout-binding='Picture' class ='col-xs-2 col-xs-offset-5'></div>
</div>
</div>
`
}
}
})
})
<script src="https://code.jquery.com/jquery-3.2.1.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/handlebars.js/4.0.10/handlebars.js"></script>
<script src="https://code.cloudcms.com/alpaca/1.5.23/bootstrap/alpaca.min.js"></script>
<link href="https://code.cloudcms.com/alpaca/1.5.23/bootstrap/alpaca.min.css" rel="stylesheet"/>
<link href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" rel="stylesheet"/>
<div id="CustomFormLayout">
</div>

Related

How to output response of type atom/xml feed into Jquery DataTable?

How to output response of type atom/xml feed (from arxiv call) into Jquery DataTable?
I have the datatable working for a simple json from Ajax call to flask server example.
When i try to do it with the xml from an arxiv api response, i cant seem to get it to display in the datatable (though i can just print the raw xml using <pre lang="xml" > or json).
I also tried to convert to json first via python dictionary, but still couldnt get it formatted into datatable as im unsure how to access the properties properly in the Ajax call when theyre deeper than the first level as in the basic example linked.
The HTML in template:
<table id="arxivtable" class="display" style="width:100%">
<thead>
<tr>
<th>title</th>
<th>id</th>
<th>link</th>
<th>author</th>
<th>published</th>
</tr>
</thead>
</table>
I tried via xml :
$('#arxivtable').DataTable({
"ajax": {
// "url": "static/objects2.txt", // This works for the static file
"url": "/get_arxivpapers", // This now works too thanks to #kthorngren
"dataType": "xml",
"type":"GET",
"dataSrc": "{{name}}",
"contentType":"application/atom+xml"
},
"columns": [
{"data": "title"},
{
"data": "link",
"render": function(data, type, row, meta){
if(type === 'display'){
data = '' + data + '';
}
return data;
}
},
{ "data": "id" },
{ "data": "link" },
{ "data": "author" },
{ "data": "journal" },
{ "data": "published" },
{ "data": "summary" }
]
});
JSON from AJAX call:
{
"feed": {
"#xmlns": "http://www.w3.org/2005/Atom",
"link": {
"#href": "http://arxiv.org/api/query?search_query%3Dall%3Aeinstein%26id_list%3D%26start%3D0%26max_results%3D2",
"#rel": "self",
"#type": "application/atom+xml"
},
"title": {
"#type": "html",
"#text": "ArXiv Query: search_query=all:einstein&id_list=&start=0&max_results=2"
},
"id": "http://arxiv.org/api/vehKAQR+bheXtHwJw3qx/OG/XXw",
"updated": "2022-06-14T00:00:00-04:00",
"opensearch:totalResults": {
"#xmlns:opensearch": "http://a9.com/-/spec/opensearch/1.1/",
"#text": "36970"
},
"opensearch:startIndex": {
"#xmlns:opensearch": "http://a9.com/-/spec/opensearch/1.1/",
"#text": "0"
},
"opensearch:itemsPerPage": {
"#xmlns:opensearch": "http://a9.com/-/spec/opensearch/1.1/",
"#text": "2"
},
"entry": [
{
"id": "http://arxiv.org/abs/1801.05533v2",
"updated": "2018-11-22T14:04:43Z",
"published": "2018-01-17T03:05:51Z",
"title": "Einstein-Weyl structures on almost cosymplectic manifolds",
"summary": "",
"author": {
"name": "Xiaomin Chen"
},
"arxiv:comment": {
"#xmlns:arxiv": "http://arxiv.org/schemas/atom",
"#text": "accepted by Periodica Mathematica Hungarica, 14 pages, no figures"
},
"link": [
{
"#href": "http://arxiv.org/abs/1801.05533v2",
"#rel": "alternate",
"#type": "text/html"
},
{
"#title": "pdf",
"#href": "http://arxiv.org/pdf/1801.05533v2",
"#rel": "related",
"#type": "application/pdf"
}
],
"arxiv:primary_category": {
"#xmlns:arxiv": "http://arxiv.org/schemas/atom",
"#term": "math.DG",
"#scheme": "http://arxiv.org/schemas/atom"
},
"category": [
{
"#term": "math.DG",
"#scheme": "http://arxiv.org/schemas/atom"
},
{
"#term": "53D10, 53D15",
"#scheme": "http://arxiv.org/schemas/atom"
}
]
},
{
"id": "http://arxiv.org/abs/0802.2137v3",
"updated": "2008-04-01T04:36:21Z",
"published": "2008-02-15T04:40:56Z",
"title": "",
"summary": ".",
"author": {
"name": ""
},
"arxiv:comment": {
"#xmlns:arxiv": "http://arxiv.org/schemas/atom",
"#text": "18 pages, added Theorem 5"
},
"link": [
{
"#href": "http://arxiv.org/abs/0802.2137v3",
"#rel": "alternate",
"#type": "text/html"
},
{
"#title": "pdf",
"#href": "http://arxiv.org/pdf/0802.2137v3",
"#rel": "related",
"#type": "application/pdf"
}
],
"arxiv:primary_category": {
"#xmlns:arxiv": "http://arxiv.org/schemas/atom",
"#term": "math.DG",
"#scheme": "http://arxiv.org/schemas/atom"
},
"category": [
{
"#term": "math.DG",
"#scheme": "http://arxiv.org/schemas/atom"
},
{
"#term": "53C30; 53C25",
"#scheme": "http://arxiv.org/schemas/atom"
}
]
}
]
}
}
Or the original atom/xml:
<feed xmlns="http://www.w3.org/2005/Atom">
<link href="http://arxiv.org/api/query?search_query%3Dall%3Aeinstein%26id_list%3D%26start%3D0%26max_results%3D2" rel="self" type="application/atom+xml">
<title type="html">ArXiv Query: search_query=all:einstein&id_list=&start=0&max_results=2</title>
<id>http://arxiv.org/api/vehKAQR+bheXtHwJw3qx/OG/XXw</id>
<updated>2022-06-14T00:00:00-04:00</updated>
<opensearch:totalresults xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">36970</opensearch:totalresults>
<opensearch:startindex xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">0</opensearch:startindex>
<opensearch:itemsperpage xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">2</opensearch:itemsperpage>
<entry>
<id>http://arxiv.org/abs/1801.05533v2</id>
<updated>2018-11-22T14:04:43Z</updated>
<published>2018-01-17T03:05:51Z</published>
<title></title>
<summary>
</summary>
<author>
<name></name>
</author>
<arxiv:comment xmlns:arxiv="http://arxiv.org/schemas/atom">accepted by Periodica Mathematica Hungarica, 14 pages, no figures</arxiv:comment>
<link href="http://arxiv.org/abs/1801.05533v2" rel="alternate" type="text/html">
<link title="pdf" href="http://arxiv.org/pdf/1801.05533v2" rel="related" type="application/pdf">
<arxiv:primary_category xmlns:arxiv="http://arxiv.org/schemas/atom" term="math.DG" scheme="http://arxiv.org/schemas/atom">
<category term="math.DG" scheme="http://arxiv.org/schemas/atom">
<category term="53D10, 53D15" scheme="http://arxiv.org/schemas/atom">
</category></category></arxiv:primary_category></entry>
<entry>
<id>http://arxiv.org/abs/0802.2137v3</id>
<updated>2008-04-01T04:36:21Z</updated>
<published>2008-02-15T04:40:56Z</published>
<title></title>
<summary>
</summary>
<author>
<name></name>
</author>
<arxiv:comment xmlns:arxiv="http://arxiv.org/schemas/atom"></arxiv:comment>
<link href="http://arxiv.org/abs/0802.2137v3" rel="alternate" type="text/html">
<link title="pdf" href="http://arxiv.org/pdf/0802.2137v3" rel="related" type="application/pdf">
<arxiv:primary_category xmlns:arxiv="http://arxiv.org/schemas/atom" term="math.DG" scheme="http://arxiv.org/schemas/atom">
<category term="math.DG" scheme="http://arxiv.org/schemas/atom">
<category term="53C30; 53C25" scheme="http://arxiv.org/schemas/atom">
</category></category></arxiv:primary_category></entry>
</feed>
The End Point:
#app.route('/get_arxivpapers')
def getArxivPapers(name="einstein"):
max_results = 2
searchterm = name.replace("_", "&#32")
url = 'http://export.arxiv.org/api/query?search_query=all:' + searchterm + '&start=0&' + 'max_results='+ str(max_results)
data = urllib.request.urlopen(url)
# data_dict = xmltodict.parse(data)
# json_data = json.dumps(data_dict)
# print(json_data)
# return jsonify(json_data)
return data.read().decode('utf-8')
I will use your JSON source data instead of the XML, since that is easier to handle in DataTables.
Here is a basic demo, to start with, followed by some explanatory notes:
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Demo</title>
<script src="https://code.jquery.com/jquery-3.5.1.min.js"></script>
<script src="https://cdn.datatables.net/1.10.21/js/jquery.dataTables.min.js"></script>
<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.10.21/css/jquery.dataTables.min.css">
</head>
<body>
<div style="margin: 20px;">
<table id="arxivtable" class="display" style="width:100%">
<thead>
<tr>
<th>title</th>
<th>id</th>
<th>link</th>
<th>author</th>
<th>published</th>
<th>summary</th>
</tr>
</thead>
</table>
</div>
<script type="text/javascript">
$(document).ready(function(){
$('#arxivtable').DataTable({
"ajax": {
url: "YOUR_URL_GOES_HERE",
dataSrc: "feed.entry"
},
"columns": [
{"data": "title"},
{ "data": "id" },
{ "data": "link[].#href" },
{ "data": "author.name" },
{ "data": "published" },
{ "data": "summary" }
]
});
});
</script>
</body>
</html>
Notes
1 - Because you have provided hard-coded HTML column headers, you need to make sure the number of those headers matches the number of columns defined in the DataTable. Alternatively, you can remove the HTML <thead> section and use the DataTables columns.title option.
2 - Your Ajax JSON source data contains an array [ ... ]. DataTables needs to know where this array is located in your JSON response, as part of the Ajax handling option, so that it can iterate over that array. Each element in the array will be used to create a row of HTML table data. The ajax.dataSrc option therefore needs to be set accordingly:
dataSrc: "feed.entry"
Once you have set the above Ajax JSON starting point correctly, then you can use field names for each separate column data value - as shown below.
3 - The author JSON value is actually an object:
"author": {
"name": "Xiaomin Chen"
},
Therefore you need to drill down into that to get the field you want to show in the DataTable:
{ "data": "author.name" },
4 - I removed your column renderer function to keep my initial demo simple, but it can be used to access fields and sub-fields - and concatenate strings and other values as needed (as in your example in the question).
5 - The link JSON value is actually an array of objects. For my basic demo, I just accessed the final entry in that array, and then took the href field:
{ "data": "link[].#href" },
This may not be what you want. You may want to only choose links of a certain type, or choose all links, or something different.
This is where DataTables is limited in what it can handle. It cannot display arbitrary nested JSON values of this type (not surprisingly).
In such cases, you would need to re-structure the JSON, prior to sending it to DataTables - or restructure it in a dataSrc function inside DataTables itself:
"dataSrc": function ( json ) { ...transform and return your JSON here... }
6 - I was not sure what you wanted to display for { "data": "journal" }. I did not see anything called journal in the JSON.
7 - Note that all the source JSON data outside of the feed.entry array is also not available to DataTables. DataTables can only iterate over that outer array. Anything you may also need which is not in that outer array would need to be added to the array, to be accessible to DataTables.
See also Nested object data (arrays) and Nested object data (objects) for more related notes.

How can I specify in a json schema that a certain property is mandatory and also must contain a specific value?

I want to create several json schemas for different scenarios.
For scenario 1 I would like to specify that:
a) The property "draftenabled" must have the value true.
b) the property "draftenabled" does exist.
I have checked this post
Validating Mandatory String values in JSON Schema
and tried the following
I tried to validate this json
{
"$schema": "./test-schema.json",
"draftenabled": false,
"prefix": "hugo"
}
with this schema test-schema.json that I had created in Visual Studio Code.
{
"$schema": "http://json-schema.org/draft-04/schema#",
"properties": {
"$schema": {
"type": "string"
},
"draftenabled": {
"type": "boolean"
},
"prefix": {
"type": "string"
}
},
"additionalItems": false,
"contains": {
"properties": {
"draftenabled": {
"const": true
}
},
"required": [
"draftenabled"
]
}
}
I would have expected an error since the value for draftenabled is false rather than true.
It looks like there is some confusion around how the keywords apply to instances (data) of different types.
properties only applies to objects
additionalItems and contains only apply to arrays
Since your instance is an object, additionalItems and contains will be ignored.
Based on your description of what you want, I would do something like the following:
{
"$schema": "http://json-schema.org/draft-04/schema#",
"properties": {
"$schema": {
"type": "string"
},
"draftenabled": {
"const": "true"
},
"prefix": {
"type": "string"
}
},
"required": [
"draftenabled"
]
}
This moves the definitions you have in the contains into the main schema. You got that bit right, just in the wrong place.
You also mention that this is a "scenario 1." If there are other scenarios, I suggest creating schemas like this for each scenario then wrapping all of them together in a oneOf or anyOf:
{
"oneOf": [
{ <scenario 1> },
{ <scenario 2> },
...
]
}

Why is Leaflet Ajax not processing and displaying GeoJSON data?

I have been trying for weeks to get Leaflet Ajax to accept data requests from the Land Information New Zealand (LINZ) API without success.
I have a valid key (not included in the snippet) and have tried several tests to load this data in. Other datasets from the LINZ API do not worth either.
What am I doing wrong here?
<html>
<head>
<!-- Style -->
<link rel="stylesheet" href="css/style.css">
<!-- Leaflet -->
<link rel="stylesheet" href="https://unpkg.com/leaflet#1.7.1/dist/leaflet.css"
integrity="sha512-xodZBNTC5n17Xt2atTPuE1HxjVMSvLVW9ocqUKLsCC5CXdbqCmblAshOMAS6/keqq/sMZMZ19scR4PsZChSR7A=="
crossorigin=""/>
<script src="https://unpkg.com/leaflet#1.7.1/dist/leaflet.js"
integrity="sha512-XQoYMqMTK8LvdxXYG3nZ448hOEQiglfqkJs1NOQV44cWnUrBc8PkAOcXy20w0vlaXaVUearIOBhiXZ5V3ynxwA=="
crossorigin=""></script>
<!-- Leaflet Ajax -->
<script type='text/javascript' src="./js/leaflet.ajax.js"></script>
<div id="map"></div>
</head>
<body>
<script>
map = L.map('map').setView([-41.29132, 174.77931],16)
var OpenTopoMap = L.tileLayer('https://{s}.tile.opentopomap.org/{z}/{x}/{y}.png', {
maxZoom: 17,
attribution: 'Map data: © OpenStreetMap contributors, SRTM | Map style: © OpenTopoMap (CC-BY-SA)'
});
OpenTopoMap.addTo(map)
property_tiles_link = "https://data.linz.govt.nz/services/query/v1/vector.json?key=KEY_GOES_HERE&layer=50804&x=172.61706383056807&y=-43.57379489129212&max_results=3&radius=10000&geometry=true&with_field_names=true"
geojson = new L.GeoJSON.AJAX(property_tiles_link).addTo(map)
console.log(geojson)
overlays = {
"geojson": geojson
}
basemaps = {
"OpenTopoMap": OpenTopoMap
}
L.control.layers(basemaps, overlays).addTo(map)
</script>
</body>
The code snippet results in this output:
Looking through the logged GeoJSON object does not seem to show any successfully parsed data. The error message in Firefox translates roughly to "The configuration of HTML characters hasn't been declared. The document will show 'rubbish' text in some configurations of the browser."
Any ideas would be super helpful!
An example of the response:
{
"vectorQuery": {
"layers": {
"50804": {
"crs": {
"type": "name",
"properties": {
"name": "EPSG:4326"
}
},
"field_names": ["id", "title_no", "status", "type", "land_district", "issue_date", "guarantee_status", "estate_description", "number_owners", "spatial_extents_shared"],
"type": "FeatureCollection",
"features": [{
"geometry": {
"type": "MultiPolygon",
"coordinates": [
[
[
[175.4776337167, -41.2221699],
[175.4782420833, -41.2225527833],
[175.4801549333, -41.2237566167],
[175.476269, -41.2259343],
[175.47357595, -41.22444375],
[175.4776337167, -41.2221699]
]
]
]
},
"distance": 0,
"type": "Feature",
"properties": {
"id": 1468560,
"title_no": "WN53B/277",
"status": "LIVE",
"type": "Freehold",
"land_district": "Wellington",
"issue_date": "1998-04-16 00:00:00",
"guarantee_status": "Guarantee",
"estate_description": "Fee Simple, 1/1, Lot 1 Deposited Plan 85426, 110,945 m2",
"number_owners": 1,
"spatial_extents_shared": false
},
"id": 1191838
}, {
"geometry": {
"type": "MultiPolygon",
"coordinates": [
[
[
[175.48005638330002, -41.2282570333],
[175.48105425000003, -41.2286012667],
[175.4789359, -41.2297867333],
[175.47874645, -41.2298923],
[175.4767530167, -41.2310074667],
[175.47604405, -41.2314040667],
[175.47550265, -41.23170695000001],
[175.4749415833, -41.2320208833],
[175.4745023167, -41.2322666333],
[175.474015, -41.2317699833],
[175.4735909, -41.23133785000001],
[175.4735833, -41.2313303667],
[175.4732046667, -41.23094425],
[175.4728425667, -41.2305752833],
[175.4725057833, -41.2302328833],
[175.4722412333, -41.2299625],
[175.4719444667, -41.2296600833],
[175.4715930333, -41.22930195],
[175.47127355, -41.2289763667],
[175.4712437333, -41.2289459833],
[175.4708617, -41.22855675],
[175.4704157833, -41.2281024167],
[175.4699766167, -41.227654983300006],
[175.4692410167, -41.2269055],
[175.4692395833, -41.2269040667],
[175.46921793330003, -41.2268834667],
[175.4718439333, -41.2254143333],
[175.4733724167, -41.2245578167],
[175.48005638330002, -41.2282570333]
]
]
]
},
"distance": 134,
"type": "Feature",
"properties": {
"id": 2348803,
"title_no": "WN103/58",
"status": "LIVE",
"type": "Freehold",
"land_district": "Wellington",
"issue_date": "1899-10-23 00:00:00",
"guarantee_status": "Guarantee",
"estate_description": "Fee Simple, 1/1, Lot 75 Deposited Plan 579, 409,390 m2",
"number_owners": 1,
"spatial_extents_shared": true
},
"id": 5113879
}, {
"geometry": {
"type": "MultiPolygon",
"coordinates": [
[
[
[175.48005638330002, -41.2282570333],
[175.48105425000003, -41.2286012667],
[175.4789359, -41.2297867333],
[175.47874645, -41.2298923],
[175.4767530167, -41.2310074667],
[175.47604405, -41.2314040667],
[175.47550265, -41.23170695000001],
[175.4749415833, -41.2320208833],
[175.4745023167, -41.2322666333],
[175.474015, -41.2317699833],
[175.4735909, -41.23133785000001],
[175.4735833, -41.2313303667],
[175.4732046667, -41.23094425],
[175.4728425667, -41.2305752833],
[175.4725057833, -41.2302328833],
[175.4722412333, -41.2299625],
[175.4719444667, -41.2296600833],
[175.4715930333, -41.22930195],
[175.47127355, -41.2289763667],
[175.4712437333, -41.2289459833],
[175.4708617, -41.22855675],
[175.4704157833, -41.2281024167],
[175.4699766167, -41.227654983300006],
[175.4692410167, -41.2269055],
[175.4692395833, -41.2269040667],
[175.46921793330003, -41.2268834667],
[175.4718439333, -41.2254143333],
[175.4733724167, -41.2245578167],
[175.48005638330002, -41.2282570333]
]
]
]
},
"distance": 134,
"type": "Feature",
"properties": {
"id": 4177014,
"title_no": "94991",
"status": "LIVE",
"type": "Leasehold",
"land_district": "Wellington",
"issue_date": "2003-06-10 09:00:00",
"guarantee_status": "Guarantee",
"estate_description": "Leasehold, 1/1, Lot 75 Deposited Plan 579, 409,390 m2",
"number_owners": 1,
"spatial_extents_shared": true
},
"id": 5116291
}]
}
}
}
}
The Leaflet-ajax plugin expects directly a GeoJSON compliant object in the loaded data, whereas in the sample response you show, the structure of the response is:
{
"vectorQuery": {
"layers": {
[layerId]: {
// GeoJSON FeatureCollection
}
}
}
}
Therefore you have to convert this data into a GeoJSON object first. Here in this case it looks quite simple, as you just have to extract the FeatureCollection. You can use leaflet-ajax middleware option to perform this conversion between the reception of the data and before it is processed to be transformed into Leaflet layers:
new L.GeoJSON.AJAX("url", {
middleware(rawData) {
// Extract the GeoJSON FeatureCollection
const layerId = 50804;
return rawData.vectorQuery.layers[layerId];
}
});

Vue.js Filtered list Method

I am still learning Vue.js. At the moment I am trying to make a simple filtered list method that pulls the data from a json file in Vue. I think that I am having trouble figuring out the correct syntax.
I just cant seem to get it right. Any help is more than welcome :)
This is Vue file:
<template>
<section>
<ul>
<li v-for="product in rings" :key="product">
{{product.title}}
</li>
</ul>
</section>
</template>
<script>
import data from '#/assets/data.json';
export default {
data() {
return {
products: []
}
},
methods: {
computed: {
rings(){
return this.products.filter(product => product.type == 'Ring')
}
}
}
}
</script>
And this is the Json file:
{ "products": [
{
"title": "Ring 1",
"description": "something",
"type": "Ring",
"year": "2018",
"image": "...",
"price": "2000,00 kr."
},
{
"title": "Halskæde 1",
"description": "something",
"type": "Halskæde",
"year": "2018",
"image": "...",
"price": "2000,00 kr."
},
{
"title": "Armbånd 1",
"description": "something",
"type": "Armbånd",
"year": "2018",
"image": "...",
"price": "2000,00 kr."
},
{
"title": "Ørering 1",
"description": "something",
"type": "Ørering",
"year": "2018",
"image": "...",
"price": "2000,00 kr."
}
]
}
You imported the data but never used anywhere inside the component:
import data from '#/assets/data.json';
// notice the data here is just a variable and it has nothing to do with the
// component's data property
export default {
data () {
return {
products: data.products // init products with imported data
}
},
Or with the destructuring syntax:
import { products } from '#/assets/data.json';
export default {
data () {
return {
products // init products with imported data
}
},

Storing HTML Documents in Elasticsearch

Scenario
I have HTML documents, let's say: emails. I want to store these on elastic search and search the plaintext of HTML emails.
Problem
Elasticsearch would index all the HTML tags and attributes, too. I don't want that. I want to search for span if it is a plain text, not a html element. For example <span>span</span> could be a hit, but not <span>some other content</span>.
Question
Would you recommend, to store a HTML stripped field and a HTML field in a document? Or should I store the HTML document on S3 and rather leave a stripped HTML version in elastic search? Does it even make sense
I honestly don't know what happens if elastic search is indexing a HTML document, but I could imagine that it will also index divs and spans and all the attributes. These are things I totally don't search for. So: any suggestion on solving the problem here would be great!
What am I doing now?
Right now before I store a document in ES, I check if the index exists for the document type. If not, I create a collection with a given mapping. The mapping looks like this
{
"analysis": {
"analyzer": {
"htmlStripAnalyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": "standard",
"char_filter": [
"html_strip"
]
}
}
},
"mappings": {
"${type}": {
"dynamic_templates": [
{
"_metadata": {
"path_match": "_metadata.*",
"mapping": {
"type": "keyword"
}
}
}
],
"properties": {
"_tags": {
"type": "nested",
"dynamic": true
}
}
}
}
}
Warning: Ignore the existing mappings. It has nothing to do with my intentions. They are just there.
I am replacing ${type} with the document type, let's say emails.
What would it look like to tell ES to not index the HTML stuff?
A complete test case:
DELETE /test
PUT /test
{
"settings": {
"analysis": {
"analyzer": {
"htmlStripAnalyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["lowercase"],
"char_filter": [
"html_strip"
]
}
}
}
},
"mappings": {
"test": {
"properties": {
"html": {
"type": "text",
"analyzer": "htmlStripAnalyzer"
}
}
}
}
}
POST /test/test/1
{
"html": "<td><tr>span<td></tr>"
}
POST /test/test/2
{
"html": "<span>whatever</span>"
}
POST /test/test/3
{
"html": "<html> <body> <h1 style=\"font-family: Arial\">Test</h1> <span>More test</span> </body> </html>"
}
POST /test/_search
{
"query": {
"match": {
"html": "span"
}
}
}
POST /test/_search
{
"query": {
"match": {
"html": "body"
}
}
}
POST /test/_search
{
"query": {
"match": {
"html": "more"
}
}
}
Update for Elasticsearch >=7 (removal of types)
DELETE /test
PUT /test
{
"settings": {
"analysis": {
"analyzer": {
"htmlStripAnalyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["lowercase"],
"char_filter": [
"html_strip"
]
}
}
}
},
"mappings": {
"properties": {
"html": {
"type": "text",
"analyzer": "htmlStripAnalyzer"
}
}
}
}
POST /test/_doc/1
{
"html": "<td><tr>span<td></tr>"
}
POST /test/_doc/2
{
"html": "<span>whatever</span>"
}
POST /test/_doc/3
{
"html": "<html> <body> <h1 style=\"font-family: Arial\">Test</h1> <span>More test</span> </body> </html>"
}
POST /test/_search
{
"query": {
"match": {
"html": "span"
}
}
}
POST /test/_search
{
"query": {
"match": {
"html": "body"
}
}
}
POST /test/_search
{
"query": {
"match": {
"html": "more"
}
}
}
By default Elasticsearch will dynamically add new fields if it finds any during the indexing process (see this):
When Elasticsearch encounters a previously unknown field in a document, it uses dynamic mapping to determine the datatype for the field and automatically adds the new field to the type mapping.
To disable this behavior (see the doc for more details), the simplest is to put dynamic to false (prevents the automatic creation) or to strict (throws an exception and does not create a new document). In that case, you would need to explicitly write the mapping for the tags you wish to keep inside your _tags section, and pre parse the HTML document to only feed the tags you are interested in to Elasticsearch.
So let's say you have the following HTML document:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>A simple example</title>
</head>
<body>
<div>
<p><span class="ref">A sentence I want to reference from this HTML document</span></p>
<p><span class="">Something less important</span></p>
</body>
</html>
The first thing you want to have is a static mapping inside Elasticsearch, I would do the following (assuming the ref is a string):
PUT html
{
"mappings": {
"test":{
"dynamic": "strict",
"properties": {
"ref":{
"type": "string"
}
}
}
}
Now if you try adding a document this way, it will succeed:
PUT html/test/1
{
"ref": "A sentence I want to reference from this HTML document"
}
But this won't succeed:
PUT html/test/2
{
"ref": "A sentence I want to reference from this HTML document",
"some_field": "Some field"
}
Now the only thing remaining is to parse the HTML to retrieve the "ref" field, and create the above query (use whatever language you like, Java, Python...)
Edit: Actually to store the HTML without indexing it, in your mapping you simply need to set index to no (see here):
"_tags": {
"type": "nested",
"dynamic": true,
"index": "no"
}