How to fuzzy match email or telephone by Elasticsearch? - mysql

I want to make fuzzy match for email or telephone by Elasticsearch. For example:
match all emails end with #gmail.com
or
match all telephone startwith 136.
I know I can use wildcard,
{
"query": {
"wildcard" : {
"email": "*gmail.com"
}
}
}
but the performance is very poor. I tried to use regexp:
{"query": {"regexp": {"email": {"value": "*163\.com*"} } } }
But doesn't work.
Is there better way to make it?
curl -XGET localhost:9200/user_data
{
"user_data": {
"aliases": {},
"mappings": {
"user_data": {
"properties": {
"address": {
"type": "string"
},
"age": {
"type": "long"
},
"comment": {
"type": "string"
},
"created_on": {
"type": "date",
"format": "dateOptionalTime"
},
"custom": {
"properties": {
"key": {
"type": "string"
},
"value": {
"type": "string"
}
}
},
"gender": {
"type": "string"
},
"name": {
"type": "string"
},
"qq": {
"type": "string"
},
"tel": {
"type": "string"
},
"updated_on": {
"type": "date",
"format": "dateOptionalTime"
},
}
}
},
"settings": {
"index": {
"creation_date": "1458832279465",
"uuid": "Fbmthc3lR0ya51zCnWidYg",
"number_of_replicas": "1",
"number_of_shards": "5",
"version": {
"created": "1070299"
}
}
},
"warmers": {}
}
}
the mapping:
{
"settings": {
"analysis": {
"analyzer": {
"index_phone_analyzer": {
"type": "custom",
"char_filter": [ "digit_only" ],
"tokenizer": "digit_edge_ngram_tokenizer",
"filter": [ "trim" ]
},
"search_phone_analyzer": {
"type": "custom",
"char_filter": [ "digit_only" ],
"tokenizer": "keyword",
"filter": [ "trim" ]
},
"index_email_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [ "lowercase", "name_ngram_filter", "trim" ]
},
"search_email_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [ "lowercase", "trim" ]
}
},
"char_filter": {
"digit_only": {
"type": "pattern_replace",
"pattern": "\\D+",
"replacement": ""
}
},
"tokenizer": {
"digit_edge_ngram_tokenizer": {
"type": "edgeNGram",
"min_gram": "3",
"max_gram": "15",
"token_chars": [ "digit" ]
}
},
"filter": {
"name_ngram_filter": {
"type": "ngram",
"min_gram": "3",
"max_gram": "20"
}
}
}
},
"mappings" : {
"user_data" : {
"properties" : {
"name" : {
"type" : "string",
"analyzer" : "ik"
},
"age" : {
"type" : "integer"
},
"gender": {
"type" : "string"
},
"qq" : {
"type" : "string"
},
"email" : {
"type" : "string",
"analyzer": "index_email_analyzer",
"search_analyzer": "search_email_analyzer"
},
"tel" : {
"type" : "string",
"analyzer": "index_phone_analyzer",
"search_analyzer": "search_phone_analyzer"
},
"address" : {
"type": "string",
"analyzer" : "ik"
},
"comment" : {
"type" : "string",
"analyzer" : "ik"
},
"created_on" : {
"type" : "date",
"format" : "dateOptionalTime"
},
"updated_on" : {
"type" : "date",
"format" : "dateOptionalTime"
},
"custom": {
"type" : "nested",
"properties" : {
"key" : {
"type" : "string"
},
"value" : {
"type" : "string"
}
}
}
}
}
}
}

An easy way to do this is to create a custom analyzer which makes use of the n-gram token filter for emails (=> see below index_email_analyzer and search_email_analyzer + email_url_analyzer for exact email matching) and edge-ngram token filter for phones (=> see below index_phone_analyzer and search_phone_analyzer).
The full index definition is available below.
PUT myindex
{
"settings": {
"analysis": {
"analyzer": {
"email_url_analyzer": {
"type": "custom",
"tokenizer": "uax_url_email",
"filter": [ "trim" ]
},
"index_phone_analyzer": {
"type": "custom",
"char_filter": [ "digit_only" ],
"tokenizer": "digit_edge_ngram_tokenizer",
"filter": [ "trim" ]
},
"search_phone_analyzer": {
"type": "custom",
"char_filter": [ "digit_only" ],
"tokenizer": "keyword",
"filter": [ "trim" ]
},
"index_email_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [ "lowercase", "name_ngram_filter", "trim" ]
},
"search_email_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [ "lowercase", "trim" ]
}
},
"char_filter": {
"digit_only": {
"type": "pattern_replace",
"pattern": "\\D+",
"replacement": ""
}
},
"tokenizer": {
"digit_edge_ngram_tokenizer": {
"type": "edgeNGram",
"min_gram": "1",
"max_gram": "15",
"token_chars": [ "digit" ]
}
},
"filter": {
"name_ngram_filter": {
"type": "ngram",
"min_gram": "1",
"max_gram": "20"
}
}
}
},
"mappings": {
"your_type": {
"properties": {
"email": {
"type": "string",
"analyzer": "index_email_analyzer",
"search_analyzer": "search_email_analyzer"
},
"phone": {
"type": "string",
"analyzer": "index_phone_analyzer",
"search_analyzer": "search_phone_analyzer"
}
}
}
}
}
Now, let's dissect it one bit after another.
For the phone field, the idea is to index phone values with index_phone_analyzer, which uses an edge-ngram tokenizer in order to index all prefixes of the phone number. So if your phone number is 1362435647, the following tokens will be produced: 1, 13, 136, 1362, 13624, 136243, 1362435, 13624356, 13624356, 136243564, 1362435647.
Then when searching we use another analyzer search_phone_analyzer which will simply take the input number (e.g. 136) and match it against the phone field using a simple match or term query:
POST myindex
{
"query": {
"term":
{ "phone": "136" }
}
}
For the email field, we proceed in a similar way, in that we index the email values with the index_email_analyzer, which uses an ngram token filter, which will produce all possible tokens of varying length (between 1 and 20 chars) that can be taken from the email value. For instance: john#gmail.com will be tokenized to j, jo, joh, ..., gmail.com, ..., john#gmail.com.
Then when searching, we'll use another analyzer called search_email_analyzer which will take the input and try to match it against the indexed tokens.
POST myindex
{
"query": {
"term":
{ "email": "#gmail.com" }
}
}
The email_url_analyzer analyzer is not used in this example but I've included it just in case you need to match on the exact email value.

Related

Should be valid to one and only one of schema, but more than one are valid error

Im trying to create a json schema for following object models. But im getting following error when trying to validate data against the schemas.
should be valid to one and only one of schema, but more than one are valid: {"$ref":"#/$defs/drug-treatment"}{"$ref":"#/$defs/surgery-treatment"}
below is my schema
{
"$id": "someid",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"description": "JSON Schema for treatments",
"oneOf" : [
{ "$ref" : "#/$defs/drug-treatment" },
{ "$ref" : "#/$defs/surgery-treatment" }
],
"$defs": {
"base": {
"type": "object",
"properties": {
"type-tag": {
"enum": [ "SURGERY", "DRUGTREATMENT", "RADIOLOGY", "PHYSIOTHERAPY" ]
},
"id": {
"type": "string",
"format": "uuid"
},
"patient-id": {
"type": "string",
"format": "uuid"
},
"patient-name": {
"type": "string"
},
"provider-id": {
"type": "string",
"format": "uuid"
},
"provider-name": {
"type": "string"
},
"diagnosis": {
"type": "string"
},
"followup-treatments": {
"type" : "array",
"items" : { "$ref" : "#" }
}
},
"required": [
"id",
"type-tag",
"patient-id",
"patient-name",
"provider-id",
"provider-name",
"diagnosis",
"followup-treatments"
]
},
"drug-treatment": {
"allOf": [
{ "$ref" : "#/$defs/base" }
],
"properties": {
"drug": {
"type": "string"
},
"dosage": {
"type": "number"
},
"start-date": {
"type": "string",
"format": "date"
},
"end-date": {
"type": "string",
"format": "date"
},
"frequency": "integer"
},
"required": [
"drug",
"dosage",
"start-date",
"end-date",
"frequency"
],
"unevaluatedProperties" : false
},
"surgery-treatment": {
"allOf": [
{ "$ref" : "#/$defs/base" }
],
"properties": {
"surgery-date": {
"type": "string",
"format": "date"
},
"discharge-instructions": {
"type": "string"
},
"required": [
"surgery-date",
"discharge-instructions"
],
"unevaluatedProperties" : false
}
},
}
}
Here is sample data that im validating agains the schema.
"treatments": [
{
"type-tag": "DRUGTREATMENT",
"drug": "fdsds",
"dosage": 2.0,
"start-date": "2222-02-12",
"end-date": "2222-02-12",
"frequency": 2,
"id": "aa7da984-0252-45b1-b0cd-f1dbe98662e2",
"patient-id": "ab62420e-0bd8-4e39-8e0b-36e464b7abb2",
"patient-name": "Tom",
"provider-id": "154523b2-7598-4ed4-aab1-b2ef1692109c",
"provider-name": "gdsfdsd",
"diagnosis": "sfdsfds",
"followup-treatments": []
},
{
"type-tag": "SURGERY",
"surgery-date": "2222-02-12",
"discharge-instructions": "dsfdsfdsfds",
"id": "12d2e565-8966-4029-840b-1959277b37f6",
"patient-id": "ab62420e-0bd8-4e39-8e0b-36e464b7abb2",
"patient-name": "Tom",
"provider-id": "154523b2-7598-4ed4-aab1-b2ef1692109c",
"provider-name": "gdsfdsd",
"diagnosis": "fdsfds",
"followup-treatments": [],
}
]
but when im validating the type "DRUGTREATMENT" its giving me the error
should be valid to one and only one of schema, but more than one are valid: {"$ref":"#/$defs/drug-treatment"}{"$ref":"#/$defs/surgery-treatment"}
any idea whats Im missing here ?
I made some corrections to your schema but this should work for the sample data from your question. Your schema has some syntax errors and wrongly placed keywords:
{
"$id": "someid",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"description": "JSON schema generated with JSONBuddy https://www.json-buddy.com",
"type": "object",
"properties": {
"treatments": {
"type": "array",
"items": {
"$ref": "#/$defs/treatment-item"
}
}
},
"$defs": {
"treatment-item": {
"oneOf": [
{
"$ref": "#/$defs/drug-treatment"
},
{
"$ref": "#/$defs/surgery-treatment"
}
]
},
"base": {
"type": "object",
"properties": {
"type-tag": {
"enum": [
"SURGERY",
"DRUGTREATMENT",
"RADIOLOGY",
"PHYSIOTHERAPY"
]
},
"id": {
"type": "string",
"format": "uuid"
},
"patient-id": {
"type": "string",
"format": "uuid"
},
"patient-name": {
"type": "string"
},
"provider-id": {
"type": "string",
"format": "uuid"
},
"provider-name": {
"type": "string"
},
"diagnosis": {
"type": "string"
},
"followup-treatments": {
"type": "array",
"items": {
"$ref": "#/$defs/treatment-item"
},
"minItems": 0
}
},
"required": [
"id",
"type-tag",
"patient-id",
"patient-name",
"provider-id",
"provider-name",
"diagnosis",
"followup-treatments"
]
},
"drug-treatment": {
"allOf": [ { "$ref": "#/$defs/base" } ],
"properties": {
"drug": {
"type": "string"
},
"dosage": {
"type": "number"
},
"start-date": {
"type": "string",
"format": "date"
},
"end-date": {
"type": "string",
"format": "date"
},
"frequency": {
"type": "integer"
}
},
"required": [
"drug",
"dosage",
"start-date",
"end-date",
"frequency"
],
"unevaluatedProperties": false
},
"surgery-treatment": {
"allOf": [ { "$ref": "#/$defs/base" } ],
"properties": {
"surgery-date": {
"type": "string",
"format": "date"
},
"discharge-instructions": {
"type": "string"
},
"unevaluatedProperties": false
},
"required": [ "surgery-date", "discharge-instructions" ]
}
}
}
Edit: Showing error message with an arbitrary additional property.

Azure ARM Template for flexible mysql server gives "Internal Server Error"

I am trying to deploy a MySQL Flexible server cluster using ARM Templates and terraform (since terraform doesn't have any resource for mysql_flexible) but it gives me the following "Internal Server Error" without any meaningful information.
Please provide string value for 'version' (? for help): 5.7
{"status":"Failed","error":{"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/DeployOperations for usage details.","details":[{"code":"Conflict","message":"{\r\n "status": "Failed",\r\n "error": {\r\n "code": "ResourceDeploymentFailure",\r\n "message": "The resource operation completed with terminal provisioning state 'Failed'.",\r\n "details": [\r\n {\r\n "code": "InternalServerError",\r\n "message": "An unexpected error occured while processing the request. Tracking ID: 'b8ab3a01-d4f2-40d5-92cf-2c9a239bdac3'"\r\n }\r\n ]\r\n }\r\n}"}]}}
There's not much information when I paste this tracking ID in Azure Activity Log.
Here's my sample template.json file which I am using.
{
"$schema" : "http://schema.management.azure.com/schemas/2014-04-01-preview/deploymentTemplate.json#",
"contentVersion" : "1.0.0.0",
"parameters" : {
"administratorLogin" : {
"type" : "String"
},
"administratorLoginPassword" : {
"type" : "SecureString"
},
"availabilityZone" : {
"type" : "String"
},
"location" : {
"type" : "String"
},
"name" : {
"type" : "String"
},
"version" : {
"type" : "String"
}
},
"resources" : [
{
"apiVersion" : "2021-05-01-preview",
"identity" : {
"type" : "SystemAssigned"
},
"location" : "eastus",
"name" : "mysql-abcd-eastus",
"properties" : {
"administratorLogin" : "randomuser",
"administratorLoginPassword" : "randompasswd",
"availabilityZone" : "1",
"backup" : {
"backupRetentionDays" : "7",
"geoRedundantBackup" : "Disabled"
},
"createMode" : "Default",
"highAvailability" : {
"mode" : "Enabled",
"standbyAvailabilityZone" : "2"
},
"network" : {
"delegatedSubnetResourceId" : "myactualsubnetid",
"privateDnsZoneResourceId" : "myactualprivatednszoneid"
},
"version" : "[parameters('version')]"
},
"sku" : {
"name" : "Standard_E4ds_v4",
"tier" : "MemoryOptimized"
},
"type" : "Microsoft.DBforMySQL/flexibleServers"
}
]
}
I tested your code and faced the same issue . So, as a solution you can try with below Code :
provider "azurerm" {
features {}
}
data "azurerm_resource_group" "example" {
name = "yourresourcegroup"
}
resource "azurerm_resource_group_template_deployment" "example" {
name = "acctesttemplate-01"
resource_group_name = data.azurerm_resource_group.example.name
parameters_content = jsonencode({
"administratorLogin"= {
"value"= "sqladmin"
},
"administratorLoginPassword"= {
"value": "password"
},
"location"= {
"value": "eastus"
},
"serverName"= {
"value"= "ansumantestsql1234"
},
"serverEdition"= {
"value"= "GeneralPurpose"
},
"vCores"= {
"value"= 2
},
"storageSizeGB"= {
"value"= 64
},
"haEnabled"= {
"value"= "ZoneRedundant"
},
"availabilityZone"= {
"value"= "1"
},
"standbyAvailabilityZone"= {
"value"= "2"
},
"version"= {
"value"= "5.7"
},
"tags"= {
"value"= {}
},
"firewallRules"= {
"value"= {
"rules"= []
}
},
"backupRetentionDays"= {
"value"= 7
},
"geoRedundantBackup"= {
"value"= "Disabled"
},
"vmName"= {
"value"= "Standard_D2ds_v4"
},
"publicNetworkAccess"= {
"value"= "Enabled"
},
"storageIops"= {
"value": 1000
},
"storageAutogrow"= {
"value"= "Enabled"
},
"vnetData"= {
"value"= {
"virtualNetworkName"= "testVnet",
"subnetName"= "testSubnet",
"virtualNetworkAddressPrefix"= "10.0.0.0/16",
"virtualNetworkResourceGroupName"= "[resourceGroup().name]",
"location"= "eastus2",
"subscriptionId"= "[subscription().subscriptionId]",
"subnetProperties"= {},
"isNewVnet"= false,
"subnetNeedsUpdate"= false,
"Network"= {}
}
},
"infrastructureEncryption"= {
"value"= "Disabled"
}
})
template_content = <<DEPLOY
{
"$schema": "http://schema.management.azure.com/schemas/2014-04-01-preview/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"administratorLogin": {
"type": "string"
},
"administratorLoginPassword": {
"type": "securestring"
},
"location": {
"type": "string"
},
"serverName": {
"type": "string"
},
"serverEdition": {
"type": "string"
},
"vCores": {
"type": "int",
"defaultValue": 4
},
"storageSizeGB": {
"type": "int"
},
"haEnabled": {
"type": "string",
"defaultValue": "Disabled"
},
"availabilityZone": {
"type": "string"
},
"standbyAvailabilityZone": {
"type": "string"
},
"version": {
"type": "string"
},
"tags": {
"type": "object",
"defaultValue": {}
},
"firewallRules": {
"type": "object",
"defaultValue": {}
},
"backupRetentionDays": {
"type": "int"
},
"geoRedundantBackup": {
"type": "string"
},
"vmName": {
"type": "string",
"defaultValue": "Standard_B1ms"
},
"publicNetworkAccess": {
"type": "string",
"metadata": {
"description": "Value should be either Enabled or Disabled"
}
},
"storageIops": {
"type": "int"
},
"storageAutogrow": {
"type": "string",
"defaultValue": "Enabled"
},
"vnetData": {
"type": "object",
"metadata": {
"description": "Vnet data is an object which contains all parameters pertaining to vnet and subnet"
},
"defaultValue": {
"virtualNetworkName": "testVnet",
"subnetName": "testSubnet",
"virtualNetworkAddressPrefix": "10.0.0.0/16",
"virtualNetworkResourceGroupName": "[resourceGroup().name]",
"location": "westus2",
"subscriptionId": "[subscription().subscriptionId]",
"subnetProperties": {},
"isNewVnet": false,
"subnetNeedsUpdate": false,
"Network": {}
}
},
"infrastructureEncryption": {
"type": "string"
}
},
"variables": {
"api": "2021-05-01-preview",
"firewallRules": "[parameters('firewallRules').rules]"
},
"resources": [
{
"apiVersion": "[variables('api')]",
"location": "[parameters('location')]",
"name": "[parameters('serverName')]",
"properties": {
"version": "[parameters('version')]",
"administratorLogin": "[parameters('administratorLogin')]",
"administratorLoginPassword": "[parameters('administratorLoginPassword')]",
"publicNetworkAccess": "[parameters('publicNetworkAccess')]",
"Network": "[if(empty(parameters('vnetData').Network), json('null'), parameters('vnetData').Network)]",
"Storage": {
"StorageSizeGB": "[parameters('storageSizeGB')]",
"Iops": "[parameters('storageIops')]",
"Autogrow": "[parameters('storageAutogrow')]"
},
"Backup": {
"backupRetentionDays": "[parameters('backupRetentionDays')]",
"geoRedundantBackup": "[parameters('geoRedundantBackup')]"
},
"availabilityZone": "[parameters('availabilityZone')]",
"highAvailability": {
"mode": "[parameters('haEnabled')]",
"standbyAvailabilityZone": "[parameters('standbyAvailabilityZone')]"
},
"dataencryption": {
"infrastructureEncryption": "[parameters('infrastructureEncryption')]"
}
},
"sku": {
"name": "[parameters('vmName')]",
"tier": "[parameters('serverEdition')]",
"capacity": "[parameters('vCores')]"
},
"tags": "[parameters('tags')]",
"type": "Microsoft.DBforMySQL/flexibleServers"
},
{
"condition": "[greater(length(variables('firewallRules')), 0)]",
"type": "Microsoft.Resources/deployments",
"apiVersion": "2019-08-01",
"name": "[concat('firewallRules-', copyIndex())]",
"copy": {
"count": "[if(greater(length(variables('firewallRules')), 0), length(variables('firewallRules')), 1)]",
"mode": "Serial",
"name": "firewallRulesIterator"
}
}
]
}
DEPLOY
deployment_mode = "Incremental"
}
Output:

Cloudformation template to create EMR cluster

I am trying to create EMR-5.30.1 clusters with applications such as Hadoop, livy, Spark, ZooKeeper, and Hive with the help of the CloudFormation template. But the issue is with this template is I am able the cluster with only one application from the above list of applications.
below is the CloudFormation Template
{
"AWSTemplateFormatVersion": "2010-09-09",
"Description": "Best Practice EMR Cluster for Spark or S3 backed Hbase",
"Parameters": {
"EMRClusterName": {
"Description": "Name of the cluster",
"Type": "String",
"Default": "emrcluster"
},
"KeyName": {
"Description": "Must be an existing Keyname",
"Type": "String",
"Default": "keyfilename"
},
"MasterInstanceType": {
"Description": "Instance type to be used for the master instance.",
"Type": "String",
"Default": "m5.xlarge"
},
"CoreInstanceType": {
"Description": "Instance type to be used for core instances.",
"Type": "String",
"Default": "m5.xlarge"
},
"NumberOfCoreInstances": {
"Description": "Must be a valid number",
"Type": "Number",
"Default": 1
},
"SubnetID": {
"Description": "Must be Valid public subnet ID",
"Default": "subnet-ee15b3e0",
"Type": "String"
},
"LogUri": {
"Description": "Must be a valid S3 URL",
"Default": "s3://aws/elasticmapreduce/",
"Type": "String"
},
"S3DataUri": {
"Description": "Must be a valid S3 bucket URL ",
"Default": "s3://aws/elasticmapreduce/",
"Type": "String"
},
"ReleaseLabel": {
"Description": "Must be a valid EMR release version",
"Default": "emr-5.30.1",
"Type": "String"
},
"Applications": {
"Description": "Please select which application will be installed on the cluster this would be either Ganglia and spark, or Ganglia and s3 backed Hbase",
"Type": "String",
"AllowedValues": [
"Spark",
"Hbase",
"Hive",
"Livy",
"ZooKeeper"
]
}
},
"Mappings": {},
"Conditions": {
"Spark": {
"Fn::Equals": [
{
"Ref": "Applications"
},
"Spark"
]
},
"Hbase": {
"Fn::Equals": [
{
"Ref": "Applications"
},
"Hbase"
]
},
"Hive": {
"Fn::Equals": [
{
"Ref": "Applications"
},
"Hive"
]
},
"Livy": {
"Fn::Equals": [
{
"Ref": "Applications"
},
"Livy"
]
},
"ZooKeeper": {
"Fn::Equals": [
{
"Ref": "Applications"
},
"ZooKeeper"
]
}
},
"Resources": {
"EMRCluster": {
"DependsOn": [
"EMRClusterServiceRole",
"EMRClusterinstanceProfileRole",
"EMRClusterinstanceProfile"
],
"Type": "AWS::EMR::Cluster",
"Properties": {
"Applications": [
{
"Name": "Ganglia"
},
{
"Fn::If": [
"Spark",
{
"Name": "Spark"
},
{
"Ref": "AWS::NoValue"
}
]
},
{
"Fn::If": [
"Hbase",
{
"Name": "Hbase"
},
{
"Ref": "AWS::NoValue"
}
]
},
{
"Fn::If": [
"Hive",
{
"Name": "Hive"
},
{
"Ref": "AWS::NoValue"
}
]
},
{
"Fn::If": [
"Livy",
{
"Name": "Livy"
},
{
"Ref": "AWS::NoValue"
}
]
},
{
"Fn::If": [
"ZooKeeper",
{
"Name": "ZooKeeper"
},
{
"Ref": "AWS::NoValue"
}
]
}
],
"Configurations": [
{
"Classification": "hbase-site",
"ConfigurationProperties": {
"hbase.rootdir":{"Ref":"S3DataUri"}
}
},
{
"Classification": "hbase",
"ConfigurationProperties": {
"hbase.emr.storageMode": "s3"
}
}
],
"Instances": {
"Ec2KeyName": {
"Ref": "KeyName"
},
"Ec2SubnetId": {
"Ref": "SubnetID"
},
"MasterInstanceGroup": {
"InstanceCount": 1,
"InstanceType": {
"Ref": "MasterInstanceType"
},
"Market": "ON_DEMAND",
"Name": "Master"
},
"CoreInstanceGroup": {
"InstanceCount": {
"Ref": "NumberOfCoreInstances"
},
"InstanceType": {
"Ref": "CoreInstanceType"
},
"Market": "ON_DEMAND",
"Name": "Core"
},
"TerminationProtected": false
},
"VisibleToAllUsers": true,
"JobFlowRole": {
"Ref": "EMRClusterinstanceProfile"
},
"ReleaseLabel": {
"Ref": "ReleaseLabel"
},
"LogUri": {
"Ref": "LogUri"
},
"Name": {
"Ref": "EMRClusterName"
},
"AutoScalingRole": "EMR_AutoScaling_DefaultRole",
"ServiceRole": {
"Ref": "EMRClusterServiceRole"
}
}
},
"EMRClusterServiceRole": {
"Type": "AWS::IAM::Role",
"Properties": {
"AssumeRolePolicyDocument": {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": [
"elasticmapreduce.amazonaws.com"
]
},
"Action": [
"sts:AssumeRole"
]
}
]
},
"ManagedPolicyArns": [
"arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceRole"
],
"Path": "/"
}
},
"EMRClusterinstanceProfileRole": {
"Type": "AWS::IAM::Role",
"Properties": {
"AssumeRolePolicyDocument": {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": [
"ec2.amazonaws.com"
]
},
"Action": [
"sts:AssumeRole"
]
}
]
},
"ManagedPolicyArns": [
"arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role"
],
"Path": "/"
}
},
"EMRClusterinstanceProfile": {
"Type": "AWS::IAM::InstanceProfile",
"Properties": {
"Path": "/",
"Roles": [
{
"Ref": "EMRClusterinstanceProfileRole"
}
]
}
}
},
"Outputs": {}
}
Also, I want to add a bootstrap script in this template as well, Can anyone please help me with the issue.
As per my knoweldge and understanding, Applications in your case should be an array like below, as mentioned in documentation
"Applications" : [ Application, ... ],
In you case, you can list applications like
"Applications" : [
{"Name" : "Spark"},
{"Name" : "Hbase"},
{"Name" : "Hive"},
{"Name" : "Livy"},
{"Name" : "Zookeeper"},
]
For more arguments other than Name to individual application dictionary , see detail here, you can pass Args, Additional_info etc
You can use following way:-
If you set "ReleaseLabel" then there is no need to mention versions of applications
"Applications": [{
"Name": "Hive"
},
{
"Name": "Presto"
},
{
"Name": "Spark"
}
]
For bootstrap:-
"BootstrapActions": [{
"Name": "setup",
"ScriptBootstrapAction": {
"Path": "s3://bucket/key/Bootstrap.sh"
}
}]
Define like this to create all applications at once.
{
"Type": "AWS::EMR::Cluster",
"Properties": {
"Applications": [
{
"Name": "Ganglia"
},
{
"Name": "Spark"
},
{
"Name": "Livy"
},
{
"Name": "ZooKeeper"
},
{
"Name": "JupyterHub"
}
]
}
}

if-then-else in JSON schema for multiple fields

How can I make few fields as "required" based on condition?
For example; in below case if status==Failed then i want only four fields as required ("orgId",
"subunitId",
"fundOutType",
"status")
but any other value of status will change the required field list: ("transactionId",
"orgId",
"subunitId",
"fundOutType",
"fundOutAmount",
"status")
My below solution is not working. Please help.
{
"$schema": "http://json-schema.org/draft-07/schema#",
"versionId": "1.0",
"javaInterfaces": [
"java.io.Serializable"
],
"type": "object",
"properties": {
"transactionId": {
"type": "string"
},
"orgId": {
"type": "string"
},
"subunitId": {
"type": "string"
},
"fundOutType": {
"type": "string"
},
"fundOutAmount": {
"type": "string"
},
"status": {
"type": "string"
},
"lang": {
"type": "string"
},
"transactionCreatedDateTime": {
"type": "integer",
"format": "date-time"
},
"userId": {
"type": "string"
}
},
"if": {
"properties": {
"status": {
"const": "Failed"
}
},
"required": [ "status" ]
},
"then": {
"required": [
"orgId",
"subunitId",
"fundOutType",
"status"
]
},
"else": {
"required": [
"transactionId",
"orgId",
"subunitId",
"fundOutType",
"fundOutAmount",
"status"
]
}
}
You can use the oneOf clause instead of the if/else:
{
"$schema": "http://json-schema.org/draft-07/schema#",
"versionId": "1.0",
"javaInterfaces": [
"java.io.Serializable"
],
"type": "object",
"properties": {
"transactionId": {
"type": "string"
},
"orgId": {
"type": "string"
},
"subunitId": {
"type": "string"
},
"fundOutType": {
"type": "string"
},
"fundOutAmount": {
"type": "string"
},
"status": {
"type": "string"
},
"lang": {
"type": "string"
},
"transactionCreatedDateTime": {
"type": "integer",
"format": "date-time"
},
"userId": {
"type": "string"
}
},
"oneOf": [
{
"properties": {
"status": {
"const": "Failed"
}
},
"required": [
"orgId",
"subunitId",
"fundOutType",
"status"
]
},
{
"required": [
"transactionId",
"orgId",
"subunitId",
"fundOutType",
"fundOutAmount",
"status"
]
}
]
}
See documentation https://json-schema.org/draft/2019-09/json-schema-core.html#rfc.section.9.2.1

Swagger - Invalid JSON errors

I'm just starting with Swagger UI and I'm trying to understand how it works.
So far I've entered some JSON (manually) and this is the result:
{
"swagger": "2.0",
"info": {
"version": "1.0.0",
"title": "PhakeApps API",
"contact": {
"name": "PhakeApps API team",
"url": "http://phakeapps.com/"
},
"license": {
"name": "Creative Commons 4.0 International",
"url": "http://creativecommons.org/licenses/by/4.0/"
}
},
"host": "api.phakeapps.com",
"basePath": "/v1",
"schemes": [
"http"
],
"produces": [
"application/json"
],
"consumes": [
"application/json"
],
"paths": {
"/places/search": {
"post": {
"tags": [
"Places"
],
"description": "Search for (a) place(s) <br /><br /> <b>id</b> - The ID of the request. <br /> <b>api_key</b> - API Key for the platform the request is sent. <i>Currently, not required.</i> <br /> <b>Params</b> - Required. <i>See model & model schema.</i>",
"operationId": "PlacesSearch",
"produces": [
"application/json"
],
"consumes": [
"application/json"
],
"parameters": [
{
"name": "request",
"in": "body",
"paramType": "body",
"description": "Object containing the <u>id</u>, <u>api_key</u> and certain <u>params</u>.",
"required": true,
"schema": {
"$ref": "#/definitions/Search"
}
}
],
"responses": {
"200": {
"description": "Success",
"schema": {
"$ref": "#/definitions/PlacesResult"
}
},
"403": {
"description": "Validation error or Server Failure",
"schema": {
"$ref": "#/definitions/Error"
}
}
}
}
}
},
"definitions": {
"PlacesResult": {
"required": [
"data",
"id",
"code"
],
"properties": {
"data": {
"$ref": "#/definitions/Places"
},
"id": {
"type": "integer",
"format": "int32"
},
"code": {
"type": "integer",
"format": "int32"
}
}
},
"Places": {
"required": [
"places"
],
"properties": {
"places": {
"$ref": "#/definitions/Place"
}
}
},
"City": {
"required": [
"id",
"name"
],
"properties": {
"id": {
"type": "string"
},
"name": {
"type": "string"
}
}
},
"Neighbourhood": {
"required": [
"id",
"name"
],
"properties": {
"id": {
"type": "string"
},
"name": {
"type": "string"
}
}
},
"Cuisine": {
"required": [
"id",
"name"
],
"properties": {
"id": {
"type": "string"
},
"name": {
"type": "string"
}
}
},
"Place": {
"required": [
"id",
"name",
"city",
"neighbourhood",
"address",
"cuisine",
"price",
"photos_cnt",
"lat",
"lng",
"is_fav"
],
"properties": {
"id": {
"type": "string"
},
"name": {
"type": "string"
},
"city": {
"type": "array",
"items": {
"$ref": "#/definitions/City"
}
},
"neighbourhood": {
"type": "array",
"items": {
"$ref": "#/definitions/Neighbourhood"
}
},
"address": {
"type": "string"
},
"cuisine": {
"type": "array",
"items": {
"$ref": "#/definitions/Cuisine"
}
},
"price": {
"type": "integer",
"format": "int32"
},
"photos_cnt": {
"type": "integer",
"format": "int32"
},
"lat": {
"type": "double"
},
"lng": {
"type": "double"
},
"is_fav": {
"type": "boolean"
}
}
},
"Search": {
"required": [
"id",
"api_key",
"params"
],
"properties": {
"id": {
"type": "integer",
"format": "int64"
},
"api_key": {
"type": "string"
},
"params": {
"$ref": "#/definitions/SearchParams"
}
}
},
"SearchParams": {
"required": [
"user_id",
"city_id",
"people",
"dt",
"locale"
],
"properties": {
"user_id": {
"type": "string",
"default": "956dda4c21c72e48f5f17a7cd783a0f7"
},
"city_id": {
"type": "string",
"default": "4ec4b3e6098c9d23c925b0c2451eb06a"
},
"people": {
"type": "integer",
"format": "int32",
"minimum": 1,
"default": 2
},
"dt": {
"type": "integer",
"format": "int32",
"default": "1427742000"
},
"locale": {
"type": "string",
"default": "bg"
},
"place_id": {
"type": "string",
"default": "0"
},
"neighborhood_id": {
"type": "string",
"default": "0"
},
"cuisine_id": {
"type": "string",
"default": "0"
},
"kids_place": {
"type": "boolean",
"default": false
},
"price": {
"type": "integer",
"format": "int64",
"default": 1
},
"outdoors": {
"type": "boolean",
"default": false
}
}
},
"Error": {
"required": [
"code",
"data"
],
"properties": {
"code": {
"type": "integer",
"format": "int32"
},
"data": {
"type": "array",
"items": {
"type": "array"
}
}
}
}
}
}
However, swagger's validator says it's not valid. The error I get is this
[
{
"level": "error",
"domain": "validation",
"keyword": "anyOf",
"message": "instance failed to match at least one required schema among 2",
"schema": {
"loadingURI": "http://json-schema.org/draft-04/schema#",
"pointer": "/properties/type"
},
"instance": {
"pointer": "/definitions/Place/properties/lat/type"
}
}
]
Note that it works as expected (so far). It displays the data (models and models' structure) properly. Make requests and retrieves responses. Yet the validator says it's not valid. (The yellow badge saying 'Invalid', not the red one that says 'Error').
What am I missing?
Your spec is indeed not valid. In your Place definition, you use "type": "double" to describe the type of the lat (and also lng) property, but such a type does not exist.
If you want to describe a numeric value of 'double' size, you should change the definition as follows:
"lng": {
"type": "number",
"format": "double"
}
Do that everywhere you use the double type, and it should resolve that issue.