How to fix timeless execution in cypher query - Neo4j Graph Database? - json

I'm dealing with the import of Common Weakness Enumeration Catalog (.json file) to the Neo4j Graph Database, using cypher language query and the apoc library. Although i import properly the fields: Weaknesses, Views, External_References, i have an execution problem (without any error) with the import of the field: Categories which is executing without ending. Below i present the structure of .json file and my cypher code.
"Weakness_Catalog": {
"Weaknesses": {"Weakness":[...]}
"Categories": {"Category":[...]}
"Views": {"View":[...]}
"External_References": {"External_Reference":[...]}
}
Cypher Query
After several tests i think that the logic error is between the last 2 parts [with value....(catRef)], without them, the query executes pretty good, at normal time. I've also changed a setting param. at the db configuration file due to an error (cypher.lenient_create_relationship = true). And i tested the different import sequence with the same bad results (weakness, categories, views, ext. references etc.)
call apoc.load.json(files) yield value
unwind value.Weakness_Catalog.Weaknesses.Weakness as weakness
merge (i:GeneralInfo_CWE {Name:value.Weakness_Catalog.Name, Version:value.Weakness_Catalog.Version,
Date:value.Weakness_Catalog.Date, Schema:'https://cwe.mitre.org/data/xsd/cwe_schema_v6.4.xsd'})
merge(w:CWE {Name:'CWE-' + weakness.ID})
set w.Extended_Name=weakness.Name, w.Abstraction=weakness.Abstraction,
w.Structure=weakness.Structure, w.Status=weakness.Status, w.Description=weakness.Description,
w.Extended_Description= apoc.convert.toString(weakness.Extended_Description),
w.Likelihood_Of_Exploit=weakness.Likelihood_Of_Exploit,
w.Background_Details=apoc.convert.toString(weakness.Background_Details.Background_Detail),
w.Modes_Of_Introduction=[value in weakness.Modes_Of_Introduction.Introduction | value.Phase],
w.Submission_Date=weakness.Content_History.Submission.Submission_Date,
w.Submission_Name=weakness.Content_History.Submission.Submission_Name,
w.Submission_Organization=weakness.Content_History.Submission.Submission_Organization,
w.Modifications=[value in weakness.Content_History.Modification | apoc.convert.toString(value)],
w.Alternate_Terms=apoc.convert.toString(weakness.Alternate_Terms),
w.Notes=[value in weakness.Notes.Note | apoc.convert.toString(value)],
w.Affected_Resources=[value in weakness.Affected_Resources.Affected_Resource | value],
w.Functional_Areas=[value in weakness.Functional_Areas.Functional_Area | value]
merge (w)-[:belongsTo]->(i)
with w, weakness, value
unwind weakness.Related_Weaknesses.Related_Weakness as Rel_Weakness
match (cwe:CWE) where cwe.Name='CWE-' + Rel_Weakness.CWE_ID
merge (w)-[:Related_Weakness{Nature:Rel_Weakness.Nature}]->(cwe)
with w, weakness, value
unwind weakness.Applicable_Platforms as appPl
foreach (lg in appPl.Language |
merge(ap:Applicable_Platform{Type:'Language', Prevalence:lg.Prevalence,
Name:coalesce(lg.Name, 'NOT SET'), Class:coalesce(lg.Class, 'NOT SET')})
merge(w)-[:Applicable_Platform]->(ap))
with w, weakness, value, appPl
foreach (tch in appPl.Technology |
merge(ap:Applicable_Platform{Type:'Technology', Prevalence:tch.Prevalence,
Name:coalesce(tch.Name, 'NOT SET'), Class:coalesce(tch.Class, 'NOT SET')})
merge(w)-[:Applicable_Platform]->(ap))
with w, weakness, value, appPl
foreach (arc in appPl.Architecture |
merge(ap:Applicable_Platform{Type:'Architecture', Prevalence:arc.Prevalence,
Name:coalesce(arc.Name, 'NOT SET'), Class:coalesce(arc.Class, 'NOT SET')})
merge(w)-[:Applicable_Platform]->(ap))
with w, weakness, value, appPl
foreach (os in appPl.Operating_System |
merge(ap:Applicable_Platform{Type:'Operating System', Prevalence:os.Prevalence,
Name:coalesce(os.Name, 'NOT SET'), Class:coalesce(os.Class, 'NOT SET')})
merge(w)-[:Applicable_Platform]->(ap))
with w, weakness, value
foreach (example in weakness.Demonstrative_Examples.Demonstrative_Example |
merge(ex:Demonstrative_Example {Intro_Text:apoc.convert.toString(example.Intro_Text)})
set ex.Body_Text=[value in example.Body_Text | apoc.convert.toString(value)],
ex.Example_Code=[value in example.Example_Code | apoc.convert.toString(value)]
merge (w)-[:hasExample]->(ex))
with w, weakness, value
foreach (consequence in weakness.Common_Consequences.Consequence |
merge (con:Consequence{CWE:w.Name, Scope:[value in consequence.Scope | value]})
set con.Impact=[value in consequence.Impact | value],
con.Note=consequence.Note, con.Likelihood=consequence.Likelihood
merge(w)-[:hasConsequence]->(con))
with w, weakness, value
foreach (dec in weakness.Detection_Methods.Detection_Method |
merge(d:Detection_Method {Method:dec.Method})
merge(w)-[wd:canBeDetected{Description:apoc.convert.toString(dec.Description)}]->(d)
set wd.Effectiveness=dec.Effectiveness, wd.Effectiveness_Notes=dec.Effectiveness_Notes,
wd.Detection_Method_ID=dec.Detection_Method_ID)
with w, weakness, value
foreach (mit in weakness.Potential_Mitigations.Mitigation |
merge(m:Mitigation {Description:apoc.convert.toString(mit.Description)})
set m.Phase=[value in mit.Phase | value], m.Strategy=mit.Strategy,
m.Effectiveness=mit.Effectiveness, m.Effectiveness_Notes=mit.Effectiveness_Notes,
m.Mitigation_ID=mit.Mitigation_ID
merge(w)-[:hasMitigation]->(m))
with w, weakness, value
foreach (rap in weakness.Related_Attack_Patterns.Related_Attack_Pattern |
merge(cp:CAPEC {Name:rap.CAPEC_ID})
merge(w)-[:RelatedAttackPattern]->(cp))
with w, weakness, value
foreach (reference in value.Weakness_Catalog.External_References.External_Reference |
merge(r:External_Reference{Reference_ID:reference.Reference_ID})
set r.Author=[value in reference.Author | value], r.Title=reference.Title,
r.Edition=reference.Edition, r.URL=reference.URL,
r.Publication_Year=reference.Publication_Year, r.Publisher=reference.Publisher)
with w, weakness, value
unwind weakness.References.Reference as exReference
match (ref:External_Reference) where ref.Reference_ID=exReference.External_Reference_ID
merge(w)-[:hasExternal_Reference]->(ref)
with value
unwind value.Weakness_Catalog.Views.View as view
merge (v:CWE_VIEW{ViewID:view.ID})
set v.Name=view.Name, v.Type=view.Type, v.Status=view.Status,
v.Objective=apoc.convert.toString(view.Objective), v.Filter=view.Filter,
v.Notes=apoc.convert.toString(view.Notes),
v.Submission_Name=view.Content_History.Submission.Submission_Name,
v.Submission_Date=view.Content_History.Submission.Submission_Date,
v.Submission_Organization=view.Content_History.Submission.Submission_Organization,
v.Modification=[value in view.Content_History.Modification | apoc.convert.toString(value)]
foreach (value in view.Audience.Stakeholder |
merge (st:Stakeholder{Type:value.Type})
merge (v)-[rel:usefulFor]->(st)
set rel.Description=value.Description)
with v, view, value
unwind (case view.Members.Has_Member when [] then [null] else view.Members.Has_Member end) as members
optional match (MemberWeak:CWE{Name:'CWE-' + members.CWE_ID})
merge (v)-[:hasMember{ViewID:members.View_ID}]->(MemberWeak)
with v, view, value
unwind (case view.References.Reference when [] then [null] else view.References.Reference end) as viewExReference
optional match (viewRef:External_Reference{Reference_ID:viewExReference.External_Reference_ID})
merge (v)-[:hasExternal_Reference{ViewID:v.ViewID}]->(viewRef)
with value
unwind value.Weakness_Catalog.Categories.Category as category
merge (c:CWE_Category{CategoryID:category.ID})
set c.Name=category.Name, c.Status=category.Status, c.Summary=apoc.convert.toString(category.Summary),
c.Notes=apoc.convert.toString(category.Notes), c.Submission_Name=category.Content_History.Submission.Submission_Name,
c.Submission_Date=category.Content_History.Submission.Submission_Date,
c.Submission_Organization=category.Content_History.Submission.Submission_Organization,
c.Modification=[value in category.Content_History.Modification | apoc.convert.toString(value)]
with c, category
unwind (case category.References.Reference when [] then [null] else category.References.Reference end) as categoryExReference
optional match (catRef:External_Reference{Reference_ID:categoryExReference.External_Reference_ID})
merge (c)-[:hasExternal_Reference{CategoryID:c.CategoryID}]->(catRef)

So, the problem was that every time i use with, i'm working in nested loops. The more nested loops, the slower the query will be. A good way to speed up, is to create simplier queries when it's possible.
For example in the json file:
"Weakness_Catalog": {
"Weaknesses": {"Weakness":[...]}
"Categories": {"Category":[...]}
"Views": {"View":[...]}
"External_References": {"External_Reference":[...]}
}
i will execute one query for Weaknesses, one for Categories, one for Views and one for External_References.

Related

F# error FS0588: The block following this 'let' is unfinished. Every code block is an expression and must have a result

I am tasked with finishing an interpreter in F#, but I'm having some trouble, as I im getting the error: error FS0588: The block following this 'let' is unfinished. Every code block is an expression and must have a result. 'let' cannot be the final code element in a block. Consider giving this block an explicit result.
Its been a long time since last time I programmed I F#.
The following is my code. I have a helper function inside my eval function, called OperateAux. It gets called in the pattern matching, when it matches e with OPERATE. It should then call OperateAux, and calculate the given expression. The error I'm getting is at line: let OperateAux (op:BINOP) (e1:EXP) (e2:EXP) : VALUE =
so I guess somehow my helper function isn't finished, I just cant figure out where.
let rec eval (vtab : SymTab) (e : EXP) : VALUE =
match e with
| CONSTANT n -> n
| VARIABLE v -> lookup v vtab
| OPERATE (op, e1, e2) -> OperateAux op e1 e2//(eval vtab e1) (eval vtab e2)
| LET_IN (var, e1, e2) -> failwith "case for LET_IN not handled"
| OVER (rop, var, e1, e2, e3) -> failwith "case for OVER not handled"
let OperateAux (op:BINOP) (e1:EXP) (e2:EXP) : VALUE =
let (INT e1) = eval vtab e1
let (INT e2) = eval vtab e2
match op with
| BPLUS -> (e1+e2)
| BMINUS -> (e1-e2)
| BTIMES -> (e1*e2)
| _ -> ()
Here is some types, I'm not sure if they are relevant for this question, but for good measure I'll show them.
type VALUE = INT of int
type BINOP = BPLUS | BMINUS | BTIMES
type RANGEOP = RSUM | RPROD | RMAX | RARGMAX
type EXP =
| CONSTANT of VALUE
| VARIABLE of string
| OPERATE of BINOP * EXP * EXP
| LET_IN of string * EXP * EXP
| OVER of RANGEOP * string * EXP * EXP * EXP
(* A list mapping variable names to their values. *)
type SymTab = (string * VALUE) list
Nevermind, I figured it out. You have to "initialise" your helper function before actually calling it. So the helper function operateAux should come before the pattern matching which calls it.

Check for duplicate values for a specific JSON key

I have the following JSON records stored in a container
{"memberId":"123","city":"New York"}
{"memberId":"234","city":"Chicago"}
{"memberId":"345","city":"San Francisco"}
{"memberId":"123","city":"New York"}
{"memberId":"345","city":"San Francisco"}
I am looking to check if there is any duplication of the memberId - ideally return a true/false and then also return the duplicated values.
Desired Output:
true
123
345
Here's an efficient approach using inputs. It requires invoking jq with the -n command-line option. The idea is to create a dictionary that keeps count of each memberId string value.
The dictionary can be created as follows:
reduce (inputs|.memberId|tostring) as $id ({}; .[$id] += 1)
Thus, to produce a true/false indicator, followed by the duplicates if any, you could write:
reduce (inputs|.memberId|tostring) as $id ({}; .[$id] += 1)
| to_entries
| map(select(.value > 1))
| (length > 0), .[].key
(If all the .memberId values are known to be strings, then of course the call to tostring can be dropped. Conversely, if .memberId is both string and integer-valued, then the above program won't differentiate between occurrences of 1 and "1", for example.)
bow
The aforementioned dictionary is sometimes called a "bag of words" (https://en.wikipedia.org/wiki/Bag-of-words_model). This leads to the generic function:
def bow(stream):
reduce stream as $word ({}; .[($word|tostring)] += 1);
The solution can now be written more concisely:
bow(inputs.memberId)
| to_entries
| map(select(.value > 1))
| (length > 0), .[].key
For just the values which have duplicates, one could write the more efficient query:
bow(inputs.memberId)
| keys_unsorted[] as $k
| select(.[$k] > 1)
| $k

Using ANTLR4 to create functions with no argument

I am still new to ANTLR4 and I am trying to achieve the following
I have business rules which consist of logical operation
(A= 'text' or B < 1) and getDataDB
the function getDataDB does not take any argument. the function will retrieve some data to validate it and return either true or false.
my grammar is below
/*
* Test grammar
*/
grammar FunctionRule;
parse: expr EOF
;
expr
: expr binop expr #logicalExpression
| lhs=VARIABLE compop rhs=VARIABLE #variableExpression
| lhs=VARIABLE compop rhs=STRING #stringExpression
| lhs=VARIABLE compop rhs=NUMBER #numberExpression
| TRUE #booleanTrue
| FALSE #booleanFalse
| function #functionExpression
| VARIABLE #booleanVariable
| LEFTPAREN expr RIGHTPAREN #enclosedExpression
;
binop : AND | OR
;
compop: EQUAL | LT | GT | LTE | GTE | NE
;
function : ID {System.out.println("HELLLL");};
TRUE: 'true' | 'TRUE' ;
FALSE: 'false' | 'FALSE';
STRING: '"' ~([\t\n\r]| '"')* '"'
;
ID : [getDataDB];
LEFTPAREN: '(';
RIGHTPAREN: ')';
EQUAL : '=' | 'EQ';
LT : '<' | 'LT';
GT : '>' | 'GT';
LTE : '<=' | 'LE';
GTE : '>=' | 'GE';
NE : '!=' | 'NE';
AND : 'AND' | '&' | 'and';
OR : 'OR' | 'or' | '|';
VARIABLE : [a-zA-Z]+[a-zA-Z0-9_.-]*;
NUMBER : [0-9]+ ('.'[0-9]+)?;
SPACE : [ \t\r\n] -> skip;
When I generate classes from the grammar, i did not see anything related to the function.
1-how do I define a function correctly in the grammar file.
2- where i can put the code for this function after creating the classes, is it only in the action clause, is there is a way to put the class name in the grammar where i can put the implementation
Thanks for the help!
ID : [getDataDB];
This means that ID matches a single letter that could be either one of g, e, t, D, a or B. What you likely wanted is ID: 'getDataDB'; which matches the string getDataDB. Note that calling this ID is highly misleading.
where i can put the code for this function
Are you writing an interpreter using a visitor? Then you'd put the code into the visitFunction method or rather in a getDataDB method that you call from visitFunction if the function name was equal to getDataDB (right now that would always be the case, but I'm assuming you eventually want to introduce more than one function).
Alternatively you could also structure your grammar slightly differently like this (removing the ID rule):
function : 'getDataDB' # GetDataDB
| 'otherFunction' # OtherFunction
;
Then you could define the functions in visitGetDataDB and visitOtherFunction respectively.
All that's assuming that you want function names to be keywords (which implies that there can't be user-definable functions). If you don't, you should not have separate tokens for function names, so zero-argument functions and variables become indistinguishable syntactically (unless you add a requirement to add () for functions, but it doesn't look like that's what you want). So you should just have one rule that could be either a variable or a zero-argument function and then check whether the given identifier is the name of a function in visitVariableOrNullaryFunction (which maybe you'd just call visitVariable for brevity).

spark rdd fliter by query mysql

I use spark streaming to stream data from Kafka and I want to filter data judge by data in MySql.
For example, I get data from kafka just like:
{"id":1, "data":"abcdefg"}
and there are data in MySql like this:
id | state
1 | "success"
I need to query the MySql to get the state of term id.
I can define a connect to MySql in the function of filter, and it works. The code like this:
def isSuccess(x):
id = x["id"]
sql = """
SELECT *
FROM Test
WHERE id = "{0}"
""".format(id)
conn = mysql_connection(......)
result = rdbi.query_one(sql)
if result == None:
return False
else:
return True
successRDD = rdd.filter(isSuccess)
But it will define connection for every row of the RDD, and will waste a lot of computing resource.
How to do in filter?
I suggest you go for using mapPartition available in Apache Spark to prevent initialization of MySQL connection for every RDD.
This is the MySQL table that I created:
create table test2(id varchar(10), state varchar(10));
With the following values:
+------+---------+
| id | state |
+------+---------+
| 1 | success |
| 2 | stopped |
+------+---------+
Use the following PySpark Code as reference:
import MySQLdb
data1=[["1", "afdasds"],["2","dfsdfada"],["3","dsfdsf"]] #sampe data, in your case streaming data
rdd = sc.parallelize(data1)
def func1(data1):
con = MySQLdb.connect(host="127.0.0.1", user="root", passwd="yourpassword", db="yourdb")
c=con.cursor()
c.execute("select * from test2;")
data=c.fetchall()
dict={}
for x in data:
dict[x[0]]=x[1]
list1=[]
for x in data1:
if x[0] in dict:
list1.append([x[0], x[1], dict[x[0]]])
else:
list1.append([x[0], x[1], "none"]) # i assign none if id in table and one received from streaming dont match
return iter(list1)
print rdd.mapPartitions(func1).filter(lambda x: "none" not in x[2]).collect()
The output that i got was:
[['1', 'afdasds', 'success'], ['2', 'dfsdfada', 'stopped']]

OCaml : Raise an error inside a match with structure

In OCaml, I have a list of strings that contains names of towns (Something like "1-New York; 2-London; 3-Paris"). I need to ask the user to type a number (if they want London they have to type 2).
I want to raise an exception message saying that the town is not valid, if the person types for example "4", in the example.
I tried this, but it doesn't work :
let chosenTown = match int_of_string (input_line stdin) with
| x > (length listOfTowns) -> raise (Err "Not a valid town")
What's the good way to code "if the chosen number is bigger than the length of the list then raise the error" ??
Pattern can't contain arbitrary expressions. It can be a constant, a constructor name, record field inside curly braces, list, array, etc.
But patterns can be guarded, e.g.
match int_of_string (input_line stding) with
| x when x >= length listOfTowns ->
invalid_arg "the number is too large"
| x -> List.nth listOfTowns x
To complete the answer, patter matching relies on unification and does not expect assertion (it is not the equivalent of a switch in C or so).
The idea is that you provide different "shapes" (patterns) that your term (the thing you match on) could have.
For a list for instance:
match l with
| e :: e' :: r -> (*...*)
| e :: r -> (*...*)
| [] -> (*...*)
It also had a binding effect, if you pass on, say, [1] (a very small list indeed), it won't match e :: e' :: r, but will match e :: r and then e = 1 and r = [].
As ivg said, you can add conditions, as booleans this time, thanks to the keyword when.
However, when manipulating lists like this, I would go for a recursive function:
let rec find_town n l =
match l with
| t :: _ when n = 1 -> t
| _ :: r -> find_town (n-1) r
| [] -> raise (Err "Not a valid town")
This is basically writing again List.nth but changing the exception that it raises.