Below mentioned query works perfectly fine while running it on phpmyadmin. I want to index these tables completely using solr and generate aggregated result using single query.
"select biblio.biblionumber as 'id', biblio.*, biblioitems.*, items.*, branches.* from biblio
inner join biblioitems ON (biblioitems.biblionumber=biblio.biblionumber)
inner join items ON (items.biblionumber=biblio.biblionumber)
inner join branches ON (branches.uid=items.uid);
I gave it a try on solr but could not get the desired result using this :
<document>
<entity name="id" query="select biblio.biblionumber as 'id', biblio.* from biblio ;">
<field column="BIBLIONUMBER" name="biblionumber" />
<field column="AUTHOR" name="author" />
<field column="TITLE" name="title" />
<field column="SERIESTITLE" name="seriestitle" />
<field column="COPYRIGHTDATE" name="copyrightdate" />
<field column="ABSTRACT" name="abstract" />
<entity name="id2" query="select biblioitems.biblioitemnumber as 'id2', biblioitems.* from biblioitems where biblionumber='${biblio.id}'">
<field name="BIBLIOITEMNUMBER" column="biblioitemnumber" />
<field name="ISBN" column="isbn" />
<field name="ISSN" column="issn" />
<field name="PUBLISHERCODE" column="publishercode" />
<field name="EDITIONSTATEMENT" column="editionstatement" />
<field name="PAGES" column="pages" />
<field name="PLACE" column="place" />
<field name="URL" column="url" />
</entity>
<entity name="id3" query="select items.uid as 'id3', items.* from items where biblionumber='${biblio.id}'">
<field name="ITEMNUMBER" column="itemnumber" />
<field name="PRICE" column="price" />
<field name="BARCODE" column="barcode" />
<field name="ENUMCHRON" column="enumchron" />
<field name="UID" column="uid" />
<field name="HOMEBRANCH" column="homebranch" />
<entity name="id4" query="select branches.uid AS 'id4', branches.* from branches where uid = '${items.id3}'">
<field name="UID" column="uid" />
<field name="BRANCHNAME" column="branchname" />
</entity>
</entity>
</entity>
</document>
The result is displayed upto abstract the moment join operation comes into play. I'm struggling with the query.
I request you all to help me with this query.
Thanks in Advance!!!
Related
I want to index two tables from MySQL using Apache Solr. Please see my data-config and schema files below.
<dataConfig>
<dataSource type="JdbcDataSource" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://localhost:3306/test" user="root" password="root" batchSize="1" />
<document name="tb_location">
<entity name="tb_location" query="SELECT * FROM tb_location">
<field column="loc_code" name="id"/>
<field column="loc_code" name="loc_code"/>
<field column="loc_name" name="loc_name"/>
<field column="loc_name" name="loc_name_ci"/>
<field column="ADM1_FULL_NAME" name="state"/>
</entity>
</document>
<document name="person">
<entity name="person" query="SELECT * FROM person">
<field column="id" name="personid"/>
<field column="fname" name="fname"/>
<field column="lname" name="lname"/>
<field column="town" name="town"/>
</entity>
</document>
</dataConfig>
Schema.xml
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> -
<field name="loc_code" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="loc_name" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="loc_name_ci" type="string_ci" indexed="true" stored="true" required="true" multiValued="false" />
<field name="state" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="personid" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="fname" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="lname" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="town" type="string" indexed="true" stored="true" required="true" multiValued="false" />
Also i created unique id for each tables (id and personid). But when i execute the dataimport module, nothing is fetched or indexed. Can someone help me to figure out where exactly the problem ?
Please check the below link for Multiple indexes...
Multiple indexes
Fixed it !!! data-config.xml should be as follows.
<dataConfig>
<dataSource type="JdbcDataSource" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://localhost:3306/test" user="root" password="root" batchSize="1" />
<document name="tb_location">
<entity name="tb_location" query="SELECT * FROM tb_location">
<field column="loc_code" name="id"/>
<field column="loc_code" name="loc_code"/>
<field column="loc_name" name="loc_name"/>
<field column="loc_name" name="loc_name_ci"/>
<field column="ADM1_FULL_NAME" name="state"/>
</entity>
<entity name="person" query="SELECT * FROM person">
<field column="id" name="personid"/>
<field column="fname" name="fname"/>
<field column="lname" name="lname"/>
<field column="town" name="town"/>
</entity>
</document>
</dataConfig>
I did a basic solr setup, Configured dataImportHandler and create very simple data config file with two fields and indexed it. It all worked fine.. But now I am adding new fields there and doing full import after that but for some reason new fields are just not showing in search result ( using solr interface for search). I have tried restarting solr, running config-reload to no effect.
this is my data config file. Not sure what's wrong here.
<dataConfig>
<dataSource type="JdbcDataSource" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://localhost/msl4" user="root" password=""/>
<document>
<entity name="hub_contents" query="select * from hub_contents" deltaQuery="select * from hub_contents where last_modified > '${dataimporter.last_index_time}'">
<field column="id_original" name="id" />
<field column="title" name="title" />
<field column="parent_id" name="parent_id" />
<field column="item_type" name="item_type" />
<field column="status" name="status" />
<field column="updated_at" name="updated_at" />
</entity>
</document>
</dataConfig>
You can add the below fields in your schema.xml
<field name="id" type="long" indexed="true" stored="true"/>
<field name="title" type="text_general" indexed="true" stored="true"/>
<field name="parent_id" type="long" indexed="true" stored="true"/>
<field name="item_type" type="text_general" indexed="true" stored="true"/>
<field name="status" type="text_general" indexed="true" stored="true" />
<field name="updated_at" type="date" indexed="true" stored="true"/>
It is left to you what type(fieldType) you want to add depending on your requirement.
indexed: true if this field should be indexed (searchable or
sortable)
stored: true if this field should be retrievable
Add the below tag:
<uniqueKey>id</uniqueKey>
This is to use to determine and enforce document uniqueness.
I am trying to use solr for indexing data from my data base.
After I index data, when I query *.*
I get just the id field in result. not all the fields which I had in my query.
My data-config.xml
<document name="content">
<entity name="documen" query="SELECT indexId ,brand_id, category_id, product_name from Production">
<field column="indexId" name="id" />
<field column="category_id" name="categoryid" />
<field column="brand_id" name="brandid" />
<field column="product_name" name="id" />
</entity>
</document>
My schema.xml looks like this :
<field name="id" type="int" indexed="true" stored="true" required="true"/>
<field name="categoryid" type="int" indexed="true" stored="true"/>
<field name="brandid" type="int" indexed="true" stored="true" />
<field name="productname" type="string" indexed="true" stored="true"/>
When I query using *.* I get
<doc>
<str name="id">1</str>
<long name="_version_">1426653005792411648</long></doc>
<doc>
<str name="id">2</str>
<long name="_version_">1426653005793460224</long></doc>
<doc>
I get only "id" field as result.
Actually, whatever field is in "uniquekey" tag is returned as query result
I have a problem to search on two dataSource. When I importAll, I see all my records import but when I search, I have in my results, only dataSource's 2 records.
In my data-config.xml :
<document>
<entity name="one" dataSource="ds-1" query="SELECT * FROM artist">
<field column="name" name="name" />
</entity>
<entity name="two" dataSource="ds-2" query="SELECT * FROM faqdata">
<field column="thema" name="thema" />
</entity>
</document>
And in my schema.xml :
<fields>
<field name="id" type="int" indexed="true" stored="true" required="true" />
<field name="slug" type="string" indexed="false" stored="true"/>
<field name="name" type="text" indexed="true" stored="true" />
<field name="alt_name" type="text" indexed="false" stored="true"/>
<field name="created_at" type="date" indexed="false" stored="true"/>
<field name="updated_at" type="date" indexed="false" stored="true"/>
<field name="thema" type="text" indexed="true" stored="true" />
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
<dynamicField name="*" type="ignored" multiValued="true" />
</fields>
<uniqueKey>id</uniqueKey>
<defaultSearchField>text</defaultSearchField>
<solrQueryParser defaultOperator="OR"/>
<copyField source="name" dest="text"/>
<copyField source="thema" dest="text"/>
What is problems?
Thank
Ids in Solr needs to be unique.
If you insert Entities with the same Ids the previous record would get overwritten.
Solr does not update records. It deletes and reinserts the records.
If you want both the records, define a unique id.
e.g. Prepend Artist and faqdata to the id so that artists and faqdata don't overwrite each other.
SELECT A.*, 'ARTIST_' || ID PRIMARY_ID FROM ARTIST A
SELECT A.*, 'FAQDATA_' || ID PRIMARY_ID FROM FAQDATA A
and use PRIMARY_ID as the primary id and unique field.
I'm having problems getting solr and mysql dates to play nice. If I comment out the sent field from the schema everything works fine. However, as soon as I add back in the date field I get this error for every document.
org.apache.solr.common.SolrException: [doc=116] missing required field: sent
Here's how I have solr configured. I've ched to make sure that there are no empty/null dates and there are not. I've also tried dateTimeFormat=yyyy-MM-dd'T'hh:mm:ss and no dateTimeFormat being set. I've also tried both date and tdate for the type of sent in the schema.
dataconfig.xml
<dataConfig>
<dataSource driver="com.mysql.jdbc.Driver" url="jdbc:mysql://localhost:3306/hoplite" user="root" password="root"/>
<document>
<entity name="document" query="select * from document">
<field column="ID" name="id" />
<field column="RAW_TEXT" name="raw_text" />
<entity name="email" query="select * from email where document_id='${document.id}'">
<field column="TIME_SENT" name="sent" dateTimeFormat="yyyy-MM-dd'T'hh:mm:ss'Z'"/>
<field column="BODY" name="body" />
</entity>
</entity>
</document>
</dataConfig>
schema.xml
<field name="id" type="tint" indexed="true" stored="true" required="true" />
<field name="raw_text" type="text_general" indexed="true" stored="false" required="true" multiValued="true"/>
<field name="sent" type="date" indexed="true" stored="true" required="true" /> <!-- Import succeeds if I comment this line out -->
<field name="body" type="text_general" indexed="true" stored="true" required="true" />
Apparently for dates the field name has to be the same as the column name. So changing the files to the below fixed the problem. Note that time_sent is now both the column and field name.
data-config.xml
<dataConfig>
<dataSource driver="com.mysql.jdbc.Driver" url="jdbc:mysql://localhost:3306/hoplite" user="root" password="root"/>
<document>
<entity name="document" query="select * from document">
<field column="ID" name="id" />
<field column="RAW_TEXT" name="raw_text" />
<entity name="email" query="select * from email where document_id='${document.id}'">
<field column="TIME_SENT" name="time_sent" dateTimeFormat="yyyy-MM-dd'T'hh:mm:ss'Z'"/>
<field column="BODY" name="body" />
</entity>
</entity>
</document>
</dataConfig>
schema.xml
<field name="id" type="tint" indexed="true" stored="true" required="true" />
<field name="raw_text" type="text_general" indexed="true" stored="false" required="true" multiValued="true"/>
<field name="time_sent" type="date" indexed="true" stored="true" required="true" /> <!-- Import succeeds if I comment this line out -->
<field name="body" type="text_general" indexed="true" stored="true" required="true" />