Select all possible pairs of attributes in RapidMiner - rapidminer

I have a list where the total amount of attributes is unknown at first.
I want to concenate all pairs of attributes, without knowing how many of them are there.
There is aLoop Attribute Subsets Operator, but sadly there is no Output.
Currently my process looks like this:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.1.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="7.1.001" expanded="true" height="82" name="Generate Data" width="90" x="45" y="75">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="45" y="30">
<list key="attribute_values">
<parameter key="Group_1" value=""A""/>
<parameter key="Group_2" value=""B""/>
<parameter key="Group_3" value=""C""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (2)" width="90" x="180" y="30">
<list key="attribute_values">
<parameter key="Group_1" value=""B""/>
<parameter key="Group_2" value=""C""/>
<parameter key="Group_3" value=""D""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (3)" width="90" x="315" y="30">
<list key="attribute_values">
<parameter key="Group_1" value=""D""/>
<parameter key="Group_2" value=""A""/>
<parameter key="Group_3" value=""B""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (4)" width="90" x="450" y="30">
<list key="attribute_values">
<parameter key="Group_1" value=""A""/>
<parameter key="Group_2" value=""C""/>
<parameter key="Group_3" value=""M""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (5)" width="90" x="585" y="30">
<list key="attribute_values">
<parameter key="Group_1" value=""C""/>
<parameter key="Group_2" value=""M""/>
<parameter key="Group_3" value=""M""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="append" compatibility="7.1.001" expanded="true" height="148" name="Append" width="90" x="720" y="30"/>
<operator activated="true" class="declare_missing_value" compatibility="6.4.000" expanded="true" height="76" name="Declare Missing Value" width="90" x="855" y="30">
<parameter key="mode" value="nominal"/>
<parameter key="nominal_value" value="M"/>
</operator>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 2"/>
<connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
<connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 4"/>
<connect from_op="Generate Data by User Specification (5)" from_port="output" to_op="Append" to_port="example set 5"/>
<connect from_op="Append" from_port="merged set" to_op="Declare Missing Value" to_port="example set input"/>
<connect from_op="Declare Missing Value" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="extract_macro" compatibility="7.1.001" expanded="true" height="68" name="Extract Macro (2)" width="90" x="179" y="75">
<parameter key="macro" value="num_attr"/>
<parameter key="macro_type" value="number_of_attributes"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (2)" width="90" x="313" y="75"/>
<operator activated="true" class="multiply" compatibility="7.1.001" expanded="true" height="124" name="Multiply (2)" width="90" x="179" y="300"/>
<operator activated="true" class="select_attributes" compatibility="7.1.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="380" y="210">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="|att1|att2"/>
</operator>
<operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (5)" width="90" x="514" y="210"/>
<operator activated="true" class="select_attributes" compatibility="7.1.001" expanded="true" height="82" name="Select Attributes (3)" width="90" x="380" y="300">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="att1||att3"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.1.001" expanded="true" height="82" name="Select Attributes (4)" width="90" x="380" y="390">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="att2||att3"/>
</operator>
<operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (3)" width="90" x="514" y="390"/>
<operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (4)" width="90" x="514" y="300"/>
<operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples (2)" width="90" x="648" y="210">
<parameter key="condition_class" value="no_missing_attributes"/>
<list key="filters_list"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples (3)" width="90" x="648" y="300">
<parameter key="condition_class" value="no_missing_attributes"/>
<list key="filters_list"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples (4)" width="90" x="648" y="390">
<parameter key="condition_class" value="no_missing_attributes"/>
<list key="filters_list"/>
</operator>
<operator activated="true" class="generate_concatenation" compatibility="7.1.001" expanded="true" height="82" name="Generate Concatenation (2)" width="90" x="782" y="390">
<parameter key="first_attribute" value="att1"/>
<parameter key="second_attribute" value="att2"/>
</operator>
<operator activated="true" class="generate_concatenation" compatibility="7.1.001" expanded="true" height="82" name="Generate Concatenation (3)" width="90" x="782" y="300">
<parameter key="first_attribute" value="att1"/>
<parameter key="second_attribute" value="att2"/>
</operator>
<operator activated="true" class="generate_concatenation" compatibility="7.1.001" expanded="true" height="82" name="Generate Concatenation (4)" width="90" x="782" y="210">
<parameter key="first_attribute" value="att1"/>
<parameter key="second_attribute" value="att2"/>
</operator>
<operator activated="true" class="append" compatibility="7.1.001" expanded="true" height="124" name="Append (3)" width="90" x="916" y="255"/>
<connect from_op="Generate Data" from_port="out 1" to_op="Extract Macro (2)" to_port="example set"/>
<connect from_op="Extract Macro (2)" from_port="example set" to_op="Rename by Generic Names (2)" to_port="example set input"/>
<connect from_op="Rename by Generic Names (2)" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
<connect from_op="Rename by Generic Names (2)" from_port="original" to_port="result 2"/>
<connect from_op="Multiply (2)" from_port="output 1" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Multiply (2)" from_port="output 2" to_op="Select Attributes (3)" to_port="example set input"/>
<connect from_op="Multiply (2)" from_port="output 3" to_op="Select Attributes (4)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Rename by Generic Names (5)" to_port="example set input"/>
<connect from_op="Rename by Generic Names (5)" from_port="example set output" to_op="Filter Examples (2)" to_port="example set input"/>
<connect from_op="Select Attributes (3)" from_port="example set output" to_op="Rename by Generic Names (4)" to_port="example set input"/>
<connect from_op="Select Attributes (4)" from_port="example set output" to_op="Rename by Generic Names (3)" to_port="example set input"/>
<connect from_op="Rename by Generic Names (3)" from_port="example set output" to_op="Filter Examples (4)" to_port="example set input"/>
<connect from_op="Rename by Generic Names (4)" from_port="example set output" to_op="Filter Examples (3)" to_port="example set input"/>
<connect from_op="Filter Examples (2)" from_port="example set output" to_op="Generate Concatenation (4)" to_port="example set input"/>
<connect from_op="Filter Examples (3)" from_port="example set output" to_op="Generate Concatenation (3)" to_port="example set input"/>
<connect from_op="Filter Examples (4)" from_port="example set output" to_op="Generate Concatenation (2)" to_port="example set input"/>
<connect from_op="Generate Concatenation (2)" from_port="example set output" to_op="Append (3)" to_port="example set 3"/>
<connect from_op="Generate Concatenation (3)" from_port="example set output" to_op="Append (3)" to_port="example set 2"/>
<connect from_op="Generate Concatenation (4)" from_port="example set output" to_op="Append (3)" to_port="example set 1"/>
<connect from_op="Append (3)" from_port="merged set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>

This is a tough one. The Loop Subsets operator doesn't return a single example set because it will create multiple different example sets each consisting of attributes made from combinations of the input attributes. To get round this, the Recall and Remember operators can be used to store running totals. The story isn't finished yet because it's usually the case that a single example set is required so this means some extreme gymnastics are needed to rename and join.
To cut a long story short, I've enclosed a standalone example process that illustrates all of this. It won't work without adaptation to your data.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
<parameter key="repository_entry" value="//Samples/data/Iris"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.0.001" expanded="true" height="103" name="Multiply" width="90" x="45" y="136"/>
<operator activated="true" class="loop_attribute_subsets" compatibility="7.0.001" expanded="true" height="68" name="Loop Subsets" width="90" x="179" y="34">
<parameter key="exact_number_of_attributes" value="2"/>
<parameter key="min_number_of_attributes" value="2"/>
<parameter key="limit_max_number" value="true"/>
<parameter key="max_number_of_attributes" value="2"/>
<process expanded="true">
<operator activated="true" class="log" compatibility="7.0.001" expanded="true" height="82" name="Log" width="90" x="112" y="34">
<list key="log">
<parameter key="Attributes" value="operator.Loop Subsets.value.feature_names"/>
</list>
</operator>
<operator activated="true" class="log_to_data" compatibility="7.0.001" expanded="true" height="103" name="Log to Data" width="90" x="112" y="238">
<parameter key="log_name" value="Log"/>
</operator>
<operator activated="true" class="subprocess" compatibility="7.0.001" expanded="true" height="103" name="Subprocess" width="90" x="246" y="238">
<process expanded="true">
<operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="Extract Macro" width="90" x="179" y="136">
<parameter key="macro" value="remember"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="Attributes"/>
<parameter key="example_index" value="1"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="clear_log" compatibility="7.0.001" expanded="true" height="82" name="Clear Log" width="90" x="380" y="136">
<parameter key="log_name" value="Log"/>
<parameter key="delete_table" value="true"/>
</operator>
<connect from_port="in 1" to_port="out 1"/>
<connect from_port="in 2" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Extract Macro" from_port="example set" to_op="Clear Log" to_port="through 1"/>
<connect from_op="Clear Log" from_port="through 1" to_port="out 2"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="source_in 3" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
<portSpacing port="sink_out 3" spacing="0"/>
</process>
</operator>
<operator activated="true" class="materialize_data" compatibility="7.0.001" expanded="true" height="82" name="Materialize Data" width="90" x="246" y="34"/>
<operator activated="true" class="rename_by_generic_names" compatibility="7.0.001" expanded="true" height="82" name="Rename by Generic Names" width="90" x="380" y="34"/>
<operator activated="true" class="generate_concatenation" compatibility="7.0.001" expanded="true" height="82" name="Generate Concatenation" width="90" x="380" y="136">
<parameter key="first_attribute" value="att1"/>
<parameter key="second_attribute" value="att2"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="380" y="238">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="att2|att1"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="rename" compatibility="7.0.001" expanded="true" height="82" name="Rename" width="90" x="514" y="34">
<parameter key="old_name" value="att1_att2"/>
<parameter key="new_name" value="%{remember}"/>
<list key="rename_additional_attributes"/>
</operator>
<operator activated="true" class="handle_exception" compatibility="7.0.001" expanded="true" height="82" name="Handle Exception" width="90" x="514" y="136">
<process expanded="true">
<operator activated="true" class="recall" compatibility="7.0.001" expanded="true" height="68" name="Recall (2)" width="90" x="45" y="187">
<parameter key="name" value="runningTotal"/>
<parameter key="remove_from_store" value="false"/>
</operator>
<operator activated="true" class="join" compatibility="7.0.001" expanded="true" height="82" name="Join" width="90" x="179" y="34">
<list key="key_attributes">
<parameter key="Play" value="Play"/>
</list>
</operator>
<operator activated="true" class="remember" compatibility="7.0.001" expanded="true" height="68" name="Remember" width="90" x="246" y="187">
<parameter key="name" value="runningTotal"/>
</operator>
<connect from_port="in 1" to_op="Join" to_port="left"/>
<connect from_op="Recall (2)" from_port="result" to_op="Join" to_port="right"/>
<connect from_op="Join" from_port="join" to_op="Remember" to_port="store"/>
<connect from_op="Remember" from_port="stored" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="remember" compatibility="7.0.001" expanded="true" height="68" name="Remember (2)" width="90" x="179" y="34">
<parameter key="name" value="runningTotal"/>
</operator>
<connect from_port="in 1" to_op="Remember (2)" to_port="store"/>
<connect from_op="Remember (2)" from_port="stored" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<connect from_port="example set" to_op="Log" to_port="through 1"/>
<connect from_op="Log" from_port="through 1" to_op="Log to Data" to_port="through 1"/>
<connect from_op="Log to Data" from_port="exampleSet" to_op="Subprocess" to_port="in 2"/>
<connect from_op="Log to Data" from_port="through 1" to_op="Subprocess" to_port="in 1"/>
<connect from_op="Subprocess" from_port="out 1" to_op="Materialize Data" to_port="example set input"/>
<connect from_op="Materialize Data" from_port="example set output" to_op="Rename by Generic Names" to_port="example set input"/>
<connect from_op="Rename by Generic Names" from_port="example set output" to_op="Generate Concatenation" to_port="example set input"/>
<connect from_op="Generate Concatenation" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Handle Exception" to_port="in 1"/>
<portSpacing port="source_example set" spacing="0"/>
</process>
</operator>
<operator activated="true" class="subprocess" compatibility="7.0.001" expanded="true" height="82" name="Subprocess (2)" width="90" x="313" y="34">
<process expanded="true">
<operator activated="true" class="recall" compatibility="7.0.001" expanded="true" height="68" name="Recall" width="90" x="246" y="85">
<parameter key="name" value="runningTotal"/>
</operator>
<connect from_op="Recall" from_port="result" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="join" compatibility="7.0.001" expanded="true" height="82" name="Join (2)" width="90" x="581" y="136">
<list key="key_attributes"/>
</operator>
<connect from_op="Retrieve Iris" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Loop Subsets" to_port="example set"/>
<connect from_op="Multiply" from_port="output 2" to_op="Join (2)" to_port="right"/>
<connect from_op="Loop Subsets" from_port="example set" to_op="Subprocess (2)" to_port="in 1"/>
<connect from_op="Subprocess (2)" from_port="out 1" to_op="Join (2)" to_port="left"/>
<connect from_op="Join (2)" from_port="join" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="90"/>
</process>
</operator>
</process>
Points to note
The Loop Subsets operator is set to pick pairs of attributes
Using Log and Log to Data inside Loop Subsets allows the current pair of attributes to be logged, transferred to an example set and then copied into a macro.
Attributes are renamed to a common name, concatenated and then the result is renamed back to the original name.
A running total example set is created by using Join to the previous iteration. The first time in, there is no previous iteration and this is handled by the Handle Exception operator.
Outside the Loop Subsets operator, the running total example set is recalled inside a Sub Process to ensure the execution order comes out correctly.
The running total is joined to the original data to make it easy to see if it worked or not.
One final point, the Materialize Data operator is required even though it shouldn't be.

Related

How to store association rules from RapidMiner into MySQL table

I need to export fp-growth association rules from RapidMiner to a MySQL database.
The table contains those columns: premises, conclusion, support and confidence.
Which operator should I use?
you can use the "Association Rules to ExampleSet" Operator from the Converters extension, available at the RapidMiner marketplace. The relevant attributes from the resulting example set can be easily stored in a database.
See the sample process below for an example.
<?xml version="1.0" encoding="UTF-8"?><process version="9.0.002">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.0.002" expanded="true" height="68" name="Iris" origin="GENERATED_TUTORIAL" width="90" x="45" y="120">
<parameter key="repository_entry" value="//Samples/data/Iris"/>
</operator>
<operator activated="true" class="discretize_by_frequency" compatibility="7.1.001" expanded="true" height="103" name="Discretize by Frequency" origin="GENERATED_TUTORIAL" width="90" x="179" y="120">
<parameter key="number_of_bins" value="5"/>
<parameter key="range_name_type" value="short"/>
</operator>
<operator activated="true" class="nominal_to_binominal" compatibility="7.1.001" expanded="true" height="103" name="Nominal to Binominal" origin="GENERATED_TUTORIAL" width="90" x="313" y="120">
<parameter key="transform_binominal" value="true"/>
<parameter key="use_underscore_in_name" value="true"/>
</operator>
<operator activated="true" class="concurrency:fp_growth" compatibility="9.0.002" expanded="true" height="82" name="FPGrowth" origin="GENERATED_TUTORIAL" width="90" x="447" y="120">
<parameter key="min_support" value="0.1"/>
<parameter key="find_min_number_of_itemsets" value="false"/>
<parameter key="min_number_of_itemsets" value="1"/>
<enumeration key="must_contain_list"/>
</operator>
<operator activated="true" class="create_association_rules" compatibility="9.0.002" expanded="true" height="82" name="Create Association Rules" origin="GENERATED_TUTORIAL" width="90" x="581" y="120"/>
<operator activated="true" class="converters:rules_2_example_set" compatibility="0.4.000" expanded="true" height="82" name="Association Rules to ExampleSet" width="90" x="782" y="136"/>
<connect from_op="Iris" from_port="output" to_op="Discretize by Frequency" to_port="example set input"/>
<connect from_op="Discretize by Frequency" from_port="example set output" to_op="Nominal to Binominal" to_port="example set input"/>
<connect from_op="Nominal to Binominal" from_port="example set output" to_op="FPGrowth" to_port="example set"/>
<connect from_op="FPGrowth" from_port="frequent sets" to_op="Create Association Rules" to_port="item sets"/>
<connect from_op="Create Association Rules" from_port="rules" to_op="Association Rules to ExampleSet" to_port="rules input"/>
<connect from_op="Association Rules to ExampleSet" from_port="example set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="90"/>
<portSpacing port="sink_result 2" spacing="18"/>
</process>
</operator>
</process>

Calculate Percentage Values

My Rapidminer process results are published as follows
Row No. Count
1 9.0
2 11.0
3 32.0
If I want to calculate:
(9/32)*100 and
(11/32)*100
from this result set, how would I do it?
the solution is not quite straight forward, as RapidMiner normally treats Examples (rows) as independent of each other.
What you can do is to extract the value needed as a macro and use it in the Generate Attributes Operator.
See the attached sample process for a solution to your particular problem. Just copy and paste the XML below to your process window in RapidMiner.
Also feel free to ask further, or re-post, questions in the RapidMiner community forum.
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="7.6.001" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="112" y="85">
<list key="attribute_values">
<parameter key="Count" value="9"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.6.001" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="112" y="187">
<list key="attribute_values">
<parameter key="Count" value="11"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.6.001" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="112" y="340">
<list key="attribute_values">
<parameter key="Count" value="32"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="append" compatibility="7.6.001" expanded="true" height="124" name="Append" width="90" x="380" y="187"/>
<operator activated="true" class="extract_macro" compatibility="7.6.001" expanded="true" height="68" name="Extract Macro" width="90" x="581" y="187">
<parameter key="macro" value="divisor"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="Count"/>
<parameter key="example_index" value="3"/>
<list key="additional_macros"/>
<description align="center" color="green" colored="true" width="126">Extracting the third value as a macro. It can be the called using the %{macro_name} syntax</description>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.6.001" expanded="true" height="82" name="Generate Attributes" width="90" x="782" y="187">
<list key="function_descriptions">
<parameter key="Percentage" value="5"/>
</list>
<description align="center" color="green" colored="true" width="126">Creating a new Attribute (column) with the desired calculation<br><br>Check the final paragraph of the help text for the &quot;Generate Attribute&quot; Operator for a description of how to work with macros</description>
</operator>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
<connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
<connect from_op="Append" from_port="merged set" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Extract Macro" from_port="example set" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<description align="center" color="yellow" colored="false" height="581" resized="true" width="444" x="56" y="18">Generating sample data to fit the original problem</description>
</process>
</operator>
</process>

Rapidminer, sort and filter attributes

I am using RapidMiner, and I would like to order my attributes' per example, and then filter the 15th's highest values. Can someone give me a clue on that?
Thanks.
So if I understand you correctly, you have an example set and for each row you want to sort the attributes and keep only the 15 biggest?
So imagine you have this random data set:
You try to get this result for your first example (limited to 5 biggest attributes for simplicity):
This can be done by looping over your example set with Loop Examples, filter each row with Filter Example Range, then Transpose the rows to columns, which then again can be sorted with Sort and limited to the max 15 values using Filter Example Range again.
Please notice that these new examples (rows) cannot be easily represented in a single example set, since they assumingly all have different attributes in a different order, thus you will get a collection of separate, sorted example sets.
Here is an example process:
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.5.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="generate_data" compatibility="7.5.000" expanded="true" height="68" name="Generate Data" width="90" x="45" y="34">
<parameter key="number_examples" value="10"/>
<parameter key="number_of_attributes" value="10"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.5.000" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="label"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="true"/>
<description align="center" color="transparent" colored="false" width="126">remove label for clarity</description>
</operator>
<operator activated="true" class="loop_examples" compatibility="7.5.000" expanded="true" height="103" name="Loop Examples" width="90" x="313" y="34">
<process expanded="true">
<operator activated="true" class="filter_example_range" compatibility="7.5.000" expanded="true" height="82" name="Filter Example Range" width="90" x="45" y="34">
<parameter key="first_example" value="%{example}"/>
<parameter key="last_example" value="%{example}"/>
<description align="center" color="transparent" colored="false" width="126">only work on current example</description>
</operator>
<operator activated="true" class="transpose" compatibility="7.5.000" expanded="true" height="82" name="Transpose" width="90" x="179" y="34">
<description align="center" color="transparent" colored="false" width="126">transpose the row to columns with attribute name and attribute value</description>
</operator>
<operator activated="true" class="rename" compatibility="7.5.000" expanded="true" height="82" name="Rename" width="90" x="313" y="34">
<parameter key="old_name" value="id"/>
<parameter key="new_name" value="Example%{example}AttributeName"/>
<list key="rename_additional_attributes">
<parameter key="att_1" value="Value"/>
</list>
<description align="center" color="transparent" colored="false" width="126">rename for clarity, include original &quot;row&quot; number in attribute name</description>
</operator>
<operator activated="true" class="sort" compatibility="7.5.000" expanded="true" height="82" name="Sort" width="90" x="447" y="34">
<parameter key="attribute_name" value="Value"/>
<parameter key="sorting_direction" value="decreasing"/>
<description align="center" color="transparent" colored="false" width="126">sort decreasing by attribute value</description>
</operator>
<operator activated="true" class="filter_example_range" compatibility="7.5.000" expanded="true" height="82" name="Filter Example Range (2)" width="90" x="581" y="34">
<parameter key="first_example" value="1"/>
<parameter key="last_example" value="5"/>
<description align="center" color="transparent" colored="false" width="126">keep 5 attributes with biggest values</description>
</operator>
<connect from_port="example set" to_op="Filter Example Range" to_port="example set input"/>
<connect from_op="Filter Example Range" from_port="example set output" to_op="Transpose" to_port="example set input"/>
<connect from_op="Transpose" from_port="example set output" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Sort" to_port="example set input"/>
<connect from_op="Sort" from_port="example set output" to_op="Filter Example Range (2)" to_port="example set input"/>
<connect from_op="Filter Example Range (2)" from_port="example set output" to_port="output 1"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Loop over all examples, current index is stored in the macro &quot;example&quot;</description>
</operator>
<connect from_op="Generate Data" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Loop Examples" to_port="example set"/>
<connect from_op="Loop Examples" from_port="output 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>

Build Correlation Matrix to use two different cells of two Csv files in Rapid miner

I want to build correlation matrix with two different cells of two different csv files. Anyone can help me to tell how I can specify one column from one file and same as other file?.
You have to create a new example set by joining the two columns together with the Join operator and then you can calculate the correlation matrix. Make sure when joining that the two example sets have the same ID attribute.
The code block below shows an example process of how to select and join two attributes.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.4.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.0.000-SNAPSHOT" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.0.000-SNAPSHOT" expanded="true" height="60" name="Retrieve Iris" width="90" x="45" y="75">
<parameter key="repository_entry" value="//Samples/data/Iris"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.0.000-SNAPSHOT" expanded="true" height="76" name="Select Attributes" width="90" x="179" y="75">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="a1"/>
</operator>
<operator activated="true" class="retrieve" compatibility="7.0.000-SNAPSHOT" expanded="true" height="60" name="Retrieve Iris (2)" width="90" x="45" y="255">
<parameter key="repository_entry" value="//Samples/data/Iris"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.0.000-SNAPSHOT" expanded="true" height="76" name="Select Attributes (2)" width="90" x="179" y="255">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="a2"/>
</operator>
<operator activated="true" class="join" compatibility="7.0.000-SNAPSHOT" expanded="true" height="76" name="Join" width="90" x="380" y="165">
<list key="key_attributes"/>
</operator>
<operator activated="true" class="correlation_matrix" compatibility="7.0.000-SNAPSHOT" expanded="true" height="94" name="Correlation Matrix" width="90" x="581" y="165"/>
<connect from_op="Retrieve Iris" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Join" to_port="left"/>
<connect from_op="Retrieve Iris (2)" from_port="output" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Join" to_port="right"/>
<connect from_op="Join" from_port="join" to_op="Correlation Matrix" to_port="example set"/>
<connect from_op="Correlation Matrix" from_port="matrix" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>

RapidMiner - process data table rows as documents

I need help about the following question: i would process each row from a data table (Example set structure (label[datatype]): mid[int], body[text]), obtained by Read Database, as a document. In this way i can apply some filters to each document (stop-words, filters, and so on). Can anyone help me?
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_database" compatibility="5.3.015" expanded="true" height="60" name="Server Connection" width="90" x="112" y="120">
<parameter key="connection" value="Server"/>
<parameter key="query" value="SELECT `mid`, `body` FROM `message` WHERE `mid` < 10 ORDER BY `mid`"/>
<enumeration key="parameters"/>
</operator>
<operator activated="true" class="loop_data_sets" compatibility="5.3.015" expanded="true" height="76" name="Loop Data Sets" width="90" x="111" y="210">
<process expanded="true">
<operator activated="true" class="text:extract_document" compatibility="5.3.002" expanded="true" name="Extract Document (2)">
<parameter key="attribute_name" value="body"/>
<parameter key="example_index" value="1"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="5.3.002" expanded="true" name="Transform Cases (2)"/>
<operator activated="true" class="text:tokenize" compatibility="5.3.002" expanded="true" name="Tokenize (2)"/>
<operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.3.002" expanded="true" name="Filter Stopwords (2)">
<parameter key="file" value="C:\User\stopwords.txt"/>
</operator>
<operator activated="true" class="text:filter_by_length" compatibility="5.3.002" expanded="true" name="Filter Tokens (2)">
<parameter key="min_chars" value="2"/>
</operator>
<connect from_port="example set" to_op="Extract Document (2)" to_port="example set"/>
<connect from_op="Extract Document (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
<connect from_op="Transform Cases (2)" from_port="document" to_op="Tokenize (2)" to_port="document"/>
<connect from_op="Tokenize (2)" from_port="document" to_op="Filter Stopwords (2)" to_port="document"/>
<connect from_op="Filter Stopwords (2)" from_port="document" to_op="Filter Tokens (2)" to_port="document"/>
<connect from_op="Filter Tokens (2)" from_port="document" to_port="output 1"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<connect from_op="Server Connection" from_port="output" to_op="Loop Data Sets" to_port="example set 1"/>
<connect from_op="Loop Data Sets" from_port="output 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
I've finally found a solution:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_database" compatibility="5.3.015" expanded="true" height="60" name="Server Connection" width="90" x="45" y="75">
<parameter key="connection" value="Server"/>
<parameter key="query" value="SELECT `mid`, `body` FROM `message` WHERE `mid` < 10 ORDER BY `mid`"/>
<enumeration key="parameters"/>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="5.3.002" expanded="true" height="76" name="Process Documents from Data (2)" width="90" x="45" y="165">
<parameter key="create_word_vector" value="false"/>
<parameter key="add_meta_information" value="false"/>
<parameter key="keep_text" value="true"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="web:extract_html_text_content" compatibility="5.3.001" expanded="true" height="60" name="Extract Content" width="90" x="45" y="30">
<parameter key="minimum_text_block_length" value="3"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="5.3.002" expanded="true" height="60" name="Transform Cases" width="90" x="45" y="120"/>
<operator activated="true" class="text:tokenize" compatibility="5.3.002" expanded="true" height="60" name="Tokenize (3)" width="90" x="45" y="210"/>
<operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.3.002" expanded="true" height="76" name="Filter Stopwords (3)" width="90" x="45" y="300">
<parameter key="file" value="C:\User\stopwords.txt"/>
</operator>
<operator activated="true" class="text:filter_by_length" compatibility="5.3.002" expanded="true" height="60" name="Filter Tokens (3)" width="90" x="45" y="390">
<parameter key="min_chars" value="3"/>
</operator>
<operator activated="true" class="text:tokenize" compatibility="5.3.002" expanded="true" height="60" name="Delete links" width="90" x="45" y="480">
<parameter key="mode" value="regular expression"/>
<parameter key="expression" value="(http[s]?://[a-z_\.A-Z0-9\-]*)"/>
</operator>
<connect from_port="document" to_op="Extract Content" to_port="document"/>
<connect from_op="Extract Content" from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Tokenize (3)" to_port="document"/>
<connect from_op="Tokenize (3)" from_port="document" to_op="Filter Stopwords (3)" to_port="document"/>
<connect from_op="Filter Stopwords (3)" from_port="document" to_op="Filter Tokens (3)" to_port="document"/>
<connect from_op="Filter Tokens (3)" from_port="document" to_op="Delete links" to_port="document"/>
<connect from_op="Delete links" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="generate_id" compatibility="5.3.015" expanded="true" height="76" name="Generate ID" width="90" x="179" y="165"/>
<connect from_op="Server Connection" from_port="output" to_op="Process Documents from Data (2)" to_port="example set"/>
<connect from_op="Process Documents from Data (2)" from_port="example set" to_op="Generate ID" to_port="example set input"/>
<connect from_op="Generate ID" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>