Azure Congnitive Service Skill not persisting custom skill value - json

I have create a custom skill for my Azure Cognitive Service. After creating the datasouce, index, indexer and running the indexer I can see that my function is being called and it is outputting the correct information, but when I search the index the field that is connected to the custom skill is empty.
This is my indexer definition:
{
"name": "idxdocs",
"description": null,
"dataSourceName": "dsdocs",
"skillsetName": "skillset-procuradores",
"targetIndexName": "customer-documents",
"disabled": null,
"schedule": null,
"parameters": {
"batchSize": null,
"maxFailedItems": -1,
"maxFailedItemsPerBatch": null,
"base64EncodeKeys": null,
"configuration": {
"indexedFileNameExtensions": ".pdf,.docx,.doc",
"dataToExtract": "contentAndMetadata",
"failOnUnprocessableDocument": false,
"failOnUnsupportedContentType": false,
"indexStorageMetadataOnlyForOversizedDocuments": true,
"allowSkillsetToReadFileData": true
}
},
"fieldMappings": [],
"outputFieldMappings": [
{
"sourceFieldName": "/document/content/procuradores",
"targetFieldName": "procuradores"
}
],
"cache": null,
"encryptionKey": null
}
This is my index:
{
"name": "customer-documents",
"fields": [
{
"name": "key",
"type": "Edm.String",
"facetable": true,
"filterable": true,
"key": true,
"retrievable": true,
"searchable": false,
"sortable": true,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "content",
"type": "Edm.String",
"facetable": false,
"filterable": false,
"key": false,
"retrievable": true,
"searchable": true,
"sortable": false,
"analyzer": "es.lucene",
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "procuradores",
"type": "Edm.String",
"facetable": false,
"filterable": true,
"key": false,
"retrievable": true,
"searchable": false,
"sortable": false,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
}
],
"suggesters": [],
"scoringProfiles": [],
"defaultScoringProfile": null,
"corsOptions": null,
"analyzers": [],
"charFilters": [],
"tokenFilters": [],
"tokenizers": [],
"similarity": {
"#odata.type": "#Microsoft.Azure.Search.BM25Similarity",
"k1": null,
"b": null
},
"encryptionKey": null,
"#odata.etag": "\"0x8D98BC85E9F6996\""
}
My skill set definition:
{
"name": "skillset-procuradores",
"description": "",
"skills": [
{
"#odata.type": "#Microsoft.Skills.Custom.WebApiSkill",
"name": "procuradores",
"description": "",
"context": "/document",
"uri": "my url ommited for secure reasons",
"httpMethod": "POST",
"timeout": "PT3M50S",
"batchSize": 1,
"degreeOfParallelism": null,
"inputs": [
{
"name": "text",
"source": "/document/content"
}
],
"outputs": [
{
"name": "procuradores",
"targetName": "procuradores"
}
],
"httpHeaders": {}
}
],
"cognitiveServices": null,
"knowledgeStore": null,
"encryptionKey": null
}
And finally my function output:
{
"values": [
{
"recordId": "1",
"data": {
"procuradores": "people's names"
}
}
]
}
What am I missing?

Rafael, can you give debug sessions a try? I suspect there is something happening in your custom skill that means the data is not coming back. By using this, you can test the actual input and output.
https://learn.microsoft.com/en-us/azure/search/cognitive-search-debug-session

This is just a hunch, but in the indexer definition, try changing your output field mapping sourceFieldName from "/document/content/procuradores" to "/document/procuradores". The context that you are giving in the skill is just "/document" so it should append the output onto that.

Related

How I can go deep into a JSON data and loop thorugh an array

I have an external api which return the following JSON:-
{
"resultSetMetaData": {
"page": 0,
"numPages": 2,
"numRows": 3,
"format": "json",
"rowType": [
{
"name": "FIRST_NAME",
"database": "PROD",
"schema": "RIC",
"table": "OWNER_VW",
"type": "text",
"scale": null,
"precision": null,
"nullable": true,
"byteLength": 16777216,
"collation": null,
"length": 16777216
},
{
"name": "LAST_NAME",
"database": "PROD",
"schema": "RIC",
"table": "OWNER_VW",
"type": "text",
"scale": null,
"precision": null,
"nullable": true,
"byteLength": 16777216,
"collation": null,
"length": 16777216
},
{
"name": "EMAIL",
"database": "PROD",
"schema": "RIC",
"table": "OWNER_VW",
"type": "text",
"scale": null,
"precision": null,
"nullable": true,
"byteLength": 16777216,
"collation": null,
"length": 16777216
},
{
"name": "CITY",
"database": "PROD",
"schema": "RIC",
"table": "OWNER_VW",
"type": "text",
"scale": null,
"precision": null,
"nullable": true,
"byteLength": 16777216,
"collation": null,
"length": 16777216
},
{
"name": "ZIP",
"database": "PROD",
"schema": "RIC",
"table": "OWNER_VW",
"type": "text",
"scale": null,
"precision": null,
"nullable": true,
"byteLength": 16777216,
"collation": null,
"length": 16777216
},
{
"name": "STATE",
"database": "PROD",
"schema": "RIC",
"table": "OWNER_VW",
"type": "text",
"scale": null,
"precision": null,
"nullable": true,
"byteLength": 16777216,
"collation": null,
"length": 16777216
},
{
"name": "ADDRESS",
"database": "PROD",
"schema": "RIC",
"table": "OWNER_VW",
"type": "text",
"scale": null,
"precision": null,
"nullable": true,
"byteLength": 16777216,
"collation": null,
"length": 16777216
},
{
"name": "PHONE",
"database": "PROD",
"schema": "RIC",
"table": "OWNER_VW",
"type": "text",
"scale": null,
"precision": null,
"nullable": true,
"byteLength": 16777216,
"collation": null,
"length": 16777216
},
{
"name": "UID",
"database": "PROD",
"schema": "RIC",
"table": "OWNER_VW",
"type": "text",
"scale": null,
"precision": null,
"nullable": true,
"byteLength": 16777216,
"collation": null,
"length": 16777216
}
]
},
"data": [
[
"0",
"HoT",
"Hot",
"No_Email#no-email.com",
"Miami",
"33",
"Florida",
"",
"",
""
],
[
"1",
"Richie",
"Hot",
"No_Email#no-email.com",
"Miami",
"331",
"Florida",
"",
"",
""
],
[
"2",
"Jeff",
"Hot",
"No_Email#no-email.com",
"Miami",
"33",
"Florida",
"",
"(0",
"4"
]
],
"code": "090001",
"statementStatusUrl": "/api/statements/01a4d180-0b03-2ad4-0000-f67903a02826?requestId=4d32040e-ba17-4936-a013-7dd7ab1797aa",
"requestId": "4d32040e-ba17-4936-a013-7dd7ab1797aa",
"sqlState": "00000",
"statementHandle": "01a4d180-0b03-2ad4-0000-f67903a02826",
"message": "Statement executed successfully.",
"createdOn": 1654725152956
}
now i want to loop through the items inside the data[] array, so how i can do so?
Now i wrote a test Flow and i create a Parse JSON action with the above JSON as the input and as the template, then i will get these.. so i only get rowType.. so how i can loop through the data[] array inside the rowType?
Thanks
I loaded your data into an object variable called Data and then constructed a For each action beneath that ...
The expression in the screenshot is simply ...
variables('Data')['data']
That will loop through your array of data elements.
To get each item (which in this case is an array) then follow this pattern.
Firstly, initialise a variable (top arrow, variable is called Initialize Item) that is of type Array ...
Then within the For each loop, set that variable (as shown) by using the Current item dynamic object.
You will get your result from there.
Result ...

Modify value in the object containing a particular string using jq

I am trying to modify a large json file (a Grafana dashboard), replacing a single value, then output the whole file with the change. How can I do this?
You can see the value I want to edit here. The actual file is quite large, so there are many other top-level values, but I only need to edit a specific item under the "templating" block.
"templating": {
"list": [
{
"allValue": ".*",
"current": {},
"datasource": "$Source",
"hide": 0,
"includeAll": false,
"label": null,
"multi": true,
"name": "node",
"options": [],
"query": "label_values(node_boot_time{env=~\"$env\"}, instance)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"tags": [],
"text": "",
"value": ""
},
"datasource": "$Source",
"definition": "label_values(env)",
"hide": 0,
"includeAll": true,
"label": "env",
"multi": false,
"name": "env",
"options": [],
"query": "label_values(env)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"current": {
"tags": [],
"text": "",
"value": ""
},
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "Source",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
}
]
},
The piece I need to change is the block containing "query": "label_values(env)", and I just need to change the value of "regex": "",
I have tried:
jq '.templating.list[] | select(.name == "env") |= . + {regex:"*"}' "dashboard.json" > test.json
The problem is then it only prints the ".list[]" elements instead of the whole file. I need to be able to make this change for multiple other files that will have the same block, but not necessarily in the same place so I can't just select by index number.
Output of above script:
{
"allValue": ".*",
"current": {},
"datasource": "$Source",
"hide": 0,
"includeAll": false,
"label": null,
"multi": true,
"name": "node",
"options": [],
"query": "label_values(node_boot_time{env=~\"$env\"}, instance)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
{
"allValue": null,
"current": {
"tags": [],
"text": "",
"value": ""
},
"datasource": "$Source",
"definition": "label_values(env)",
"hide": 0,
"includeAll": true,
"label": "env",
"multi": false,
"name": "env",
"options": [],
"query": "label_values(env)",
"refresh": 1,
"regex": "*",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
{
"current": {
"tags": [],
"text": "",
"value": ""
},
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "Source",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
}
Position |= earlier to retain the original structure.
.templating.list[] |= (select(.name == "env") .regex = "*")
Online demo
Your expected output isn't quite matching with your description of your problem. If your requirement is to find inside templating list find the query containing "label_values(env)" and update the regex to "" you need below. To change it to *, use regex = "*"
.templating.list[] |= ( select(.query == "label_values(env)").regex = "")
The key is to use the right path and use the select operator to get the object to update using the |= operator
jq-play snippet

How to extract multiple correlating variables from a JSon

I have to extract multiple correlating variables from a response (which is json) in JMeter. Part of the response is listed below:
[
{
"data": {
"id": "efaa6876-7a8d-4723-9d85-1ed99e822f06",
"type": "courses",
"attributes": {
"created-at": "2019-02-07T16:38:50.735Z",
"contents-count": 267,
"units": [
{
"id": "31b5fcb1-24ee-441e-a0ee-ca859fc9a89d",
"position": null,
"progress": 0,
"completed": false,
"show_name": false,
"node_id": "1",
"children": [
{
"id": "b8ed75a3-0390-4273-82c3-03ee6eba729c",
"position": null,
"image": null,
"progress": 0,
"completed": false,
"show_name": true,
"node_id": "2",
"children": [],
"contents": [
{
"id": "fa1bdc2f-4330-425c-9c10-3734d07125aa",
"link": {
"url": "#",
"target": "_blank",
"class": "learning-object-link",
"data": {
"id": "fa1bdc2f-4330-425c-9c10-3734d07125aa",
"user-role": "teacher",
"open-method-tablet": "newtab",
"open-method-desktop": "modal",
"content-open-method": null,
"modal-size-method": "fully_responsive",
"fixed-width": null,
"fixed-height": null,
"aspect-ratio": null
}
},
"is_work": false,
"is_fun": false,
"completed": false,
"total_activities": 2,
"completed_activities": 0,
"progress": 0,
"updated_at": false,
"attempts": 0,
"duration": null
},
{
"id": "ceceabfd-5151-4656-af5d-3392c5a4c04c",
"link": {
"url": "#",
"target": "_blank",
"class": "learning-object-link",
"data": {
"id": "ceceabfd-5151-4656-af5d-3392c5a4c04c",
"user-role": "teacher",
"open-method-tablet": "newtab",
"open-method-desktop": "modal",
"content-open-method": null,
"modal-size-method": "fully_responsive",
"fixed-width": null,
"fixed-height": null,
"aspect-ratio": null
}
},
"is_work": false,
"is_fun": false,
"completed": false,
"total_activities": 2,
"completed_activities": 0,
"progress": 0,
"updated_at": false,
"attempts": 0,
"duration": null
}
]
},
{
"id": "60639cbd-f872-492d-b8e9-db83f8789fcf",
"position": null,
"image": null,
"progress": 0,
"completed": false,
"show_name": true,
"node_id": "3",
"children": [],
"contents": [
{
"id": "1825f834-7099-4bb4-b7a2-fc634faffc86",
"link": {
"url": "#",
"target": "_blank",
"class": "learning-object-link",
"data": {
"id": "1825f834-7099-4bb4-b7a2-fc634faffc86",
"user-role": "teacher",
"open-method-tablet": "newtab",
"open-method-desktop": "modal",
"content-open-method": null,
"modal-size-method": "fully_responsive",
"fixed-width": null,
"fixed-height": null,
"aspect-ratio": null
}
},
To proceed with the next request, I have to extract unit id (e.g. 31b5fcb1-24ee-441e-a0ee-ca859fc9a89d), children id (e.g. b8ed75a3-0390-4273-82c3-03ee6eba729c) and contents id (e.g. fa1bdc2f-4330-425c-9c10-3734d07125aa). There are several units, each unit has several children and each children has several contents. Each content id matches just one children id and each children id matches just one unit id. Ids have to be selected on a random basis.
I've tried to extract all ids from the response and use them randomly, but it doesn't work this way.
To extract only Unit Ids, you can use following JSON Path Expressions:
$..data.attributes.units[?(#.id)].id
Random Value for Unit Id also can be extracted using JMeter JSON Extractor:

Load a JSON using Python and edit a key value and then save it and post it using request

I have a JSON of the below format
{
"board_title": "test",
"read_only": false,
"isIntegration": false,
"board_bgtype": "board_graph",
"created": "2017-08-16T06:40:47.158868+00:00",
"original_title": "Revised_CID_Templating-test(cloned)",
"modified": "2017-08-31T11:52:22.115661+00:00",
"disableEditing": false,
"height": 111,
"width": "100%",
"template_variables": [
{
"default": "identity",
"prefix": "v1",
"name": "env"
}
],
"created_by": {
"disabled": false,
"handle": "xx.com",
"name": null,
"is_admin": false,
"role": null,
"access_role": "st",
"verified": true,
"email": "xx.com",
"icon": "https://secure.gravatar.com/avatar/86fd6c17deba27cfc4081134a5bc0c6a?s=48&d=retro"
},
...
}
I need to load this JSON using Python and edit the value of
"template_variables": [
{
"default": "identity"
To some other value, say:
default : "com"
I load it using Python script and traverse and print the key value pair but not able to understand how to modify it.
How can I assign a value to the first child of template variables and save the JSON in the same file and post it using request?
You can think of json object as a dictionary.
try this.
import json
jsonData = '''{
"board_title": "test",
"read_only": false,
"isIntegration": false,
"board_bgtype": "board_graph",
"created": "2017-08-16T06:40:47.158868+00:00",
"original_title": "Revised_CID_Templating-test(cloned)",
"modified": "2017-08-31T11:52:22.115661+00:00",
"disableEditing": false,
"height": 111,
"width": "100%",
"template_variables": [
{
"default": "identity",
"prefix": "v1",
"name": "env"
}
],
"created_by": {
"disabled": false,
"handle": "xx.com",
"name": null,
"is_admin": false,
"role": null,
"access_role": "st",
"verified": true,
"email": "xx.com",
"icon": "https://secure.gravatar.com/avatar/86fd6c17deba27cfc4081134a5bc0c6a?s=48&d=retro"
}
}'''
jsonToPython = json.loads(jsonData)
print (jsonToPython['template_variables'][0]['default'])
jsonToPython['template_variables'][0]['default'] = 'test'
print (jsonToPython['template_variables'][0]['default'])
as you can see jsonToPython is being modified.

Ansible: EC2 provisioning and Iterations

I am trying to start a bunch of EC2 instances, then install something on them based on the IP given by AWS. With only one EC2, I can add the host and proceed without any issue,but when I chain them using with_dict, I can't achieve it anymore...
The following runs as I want, but I can't understand how to deal with the registered variable ec2_infos I got from the provisioning...
- name: Create Test EC2 instances
ec2:
group: default
image: ami-40d28157
instance_type: '{{item.value.type}}'
instance_tags:
Name: "{{ tag+'-'+item.value.name }}"
key_name: privatekey
region: us-west-1
vpc_subnet_id: subnet-REDACTD
wait: yes
with_dict: '{{ec2_stack}}'
register: ec2_infos
With a dictionary like
ec2_stack:
serv1:
type: t2.micro
name: server1
serv2:
type: t2.small
name: server2
ec2_infos is structures like:
"ec2_infos": {
"changed": true,
"msg": "All items completed",
"results": [
{
"_ansible_item_result": true,
"_ansible_no_log": false,
"_ansible_parsed": true,
"changed": true,
"instance_ids": [
"i-0fewq09812ddq6"
],
"instances": [
{
"ami_launch_index": "0",
"architecture": "x86_64",
"block_device_mapping": {
"/dev/sda1": {
"delete_on_termination": true,
"status": "attached",
"volume_id": "vol-0987654"
}
},
"dns_name": "",
"ebs_optimized": false,
"groups": {
"sg-qdwdww": "default"
},
"hypervisor": "xen",
"id": "i-083665656521dwq6",
"image_id": "ami-40d28157",
"launch_time": "2016-11-24T20:38:53.000Z",
"placement": "us-west-1d",
"private_ip": "x.x.x.x",
"public_dns_name": "",
"public_ip": null,
"ramdisk": null,
"region": "us-east-1",
"root_device_name": "/dev/sda1",
"root_device_type": "ebs",
"state": "running",
"state_code": 16,
"tags": {
"Name": "server1",
"Team": "blah"
},
"tenancy": "default","tenancy": "default",
"virtualization_type": "hvm"
}
],
"invocation": {
"module_args": {
"assign_public_ip": false,
"exact_count": null,
"group": [
"default"
],
"group_id": null,
"id": null,
"image": "ami-40d28157",
"instance_ids": null,
"instance_initiated_shutdown_behavior": null,
"instance_profile_name": null,
"instance_tags": {
"Name": "server1",
"Team": "blah"
},
"instance_type": "t2.micro",
"kernel": null,
"volumes": null,
"vpc_subnet_id": "subnet-abcdfed",
"wait": true,
"wait_timeout": "300",
"zone": null
},
"module_name": "ec2"
},
"item": {
"key": "serv1",
"value": {
"name": "server1",
"type": "t2.micro"
}
},
"tagged_instances": []
},
{
"_ansible_item_result": true,
"_ansible_no_log": false,
"_ansible_parsed": true,
"changed": true,
"instance_ids": [
"i-0971278624334fd"
],
"instances": [
{
"ami_launch_index": "0",
"architecture": "x86_64",
"block_device_mapping": {
"/dev/sda1": {
"delete_on_termination": true,
"status": "attached",
"volume_id": "vol-9999999"
}
},
"dns_name": "",
"ebs_optimized": false,
"groups": {
"sg-redactd": "default"
},
"launch_time": "2016-11-24T20:39:21.000Z",
"private_ip": "y.y.y.y",
"public_dns_name": "",
"public_ip": null,
"ramdisk": null,
"state": "running",
"state_code": 16,
"tags": {
"Name": "serv2",
"Team": "blah"
},
"tenancy": "default",
"virtualization_type": "hvm"
}
],
"invocation": {
"module_args": {
"assign_public_ip": false,
"wait_timeout": "300",
"zone": null
},
"module_name": "ec2"
},
"item": {
"key": "server2",
"value": {
"name": "serv2",
"type": "t2.small"
}
},
"tagged_instances": []
}
]
}
I tried with_items and with_subelements in different ways, but I can't manage to get every IPs of the new EC2. I don't even need to sort them just extract them from the instances part and feed them to add_host so I can proceed.
Anybody knows a clean way to do so, or would be kind enough to explain to me how to deal with a registered variable after a loop properly ?
Answer from the comments:
ec2_infos.results | map(attribute='instances') | sum(start=[]) | map(attribute='private_ip') | list