--- title: Syntax_examples updated: 2022-07-18 09:37:40Z created: 2021-05-04 14:58:11Z --- ## show database schema info CALL db.schema.visualization() CALL db.schema.relTypeProperties() CALL db.schema.nodeTypeProperties() CALL db.propertyKeys() ## syntax MATCH (variable:Label {propertyKey: propertyValue, propertyKey2: propertyValue2}) RETURN variable ## relationships () // a node ()--() // 2 nodes have some type of relationship ()-[]-() // 2 nodes have some type of relationship ()-->() // the first node has a relationship to the second node ()<--() // the second node has a relationship to the first node MATCH (node1)-[:REL_TYPE]->(node2) RETURN node1, node2 MATCH (node1)-[:REL_TYPEA | REL_TYPEB]->(node2) RETURN node1, node2 ## show node with name "Tom Hanks" MATCH (tom {name: "Tom"}) RETURN tom ## return all nodes in database MATCH (a:Person) WHERE a.name = "Tom" RETURN a MATCH (a:Person) RETURN a.name ## with where clause match (a:Movie) where a.released >= 1990 and a.released < 1999 return a.title; ## a list of all properties that match a string MATCH (n) WITH keys(n) AS p UNWIND p AS x WITH DISTINCT x WHERE x =~ ".*" RETURN collect(x) AS SET; ## delete all nodes and relations MATCH (n) DETACH DELETE n ## create ```cypher create (:Person {name = 'jan', age = 32}) ``` match(n:Person {age: 32}) return n match(n:Person {age: 32}) create (n)-[:RELATIE]->(:Person {name:"klaas"}) MATCH (n:Person) DETACH DELETE n MATCH (p:Person)-[:ACTED_IN]->(m:Movie)<-[:DIRECTED]-(other:Person) where toLower(p.name) =~ 'gene.*' and other.born IN [1950,1930] and exists((other)-[:DIRECTED]->(m)) return m.title, other.name, other.born as YearBorn MATCH (p:Person)-[:ACTED_IN]->(m:Movie) where p.name = 'Tom Hanks' with m, datetime().year - m.released as Ago, m.released - p.born as Age where 20 <= Ago <= 33 return m.title, Ago MATCH (m:Movie) WITH m, size((:Person)-[:DIRECTED]->(m)) AS directors WHERE directors >= 2 OPTIONAL MATCH (p:Person)-[:REVIEWED]->(m) RETURN m.title, p.name match (a:Person), (m:Movie), (b:Person) where a.name = 'Liam Neeson' and b.name = 'Benjamin Melniker' and m.title = 'Batman Begins' create (a)-[:ACTED_IN {roles: ['Rachel','Rachel Dawes']}]->(m)<-[:PRODUCED]-(b) return a,m,b MATCH (a:Person),(m:Movie) WHERE a.name = 'Christian Bale' AND m.title = 'Batman Begins' AND NOT exists((a)-[:ACTED_IN]->(m)) CREATE (a)-[rel:ACTED_IN]->(m) SET rel.roles = ['Bruce Wayne','Batman'] RETURN a, rel, m MATCH (p:Person)-[rel:ACTED_IN]->(m:Movie) where m.title = 'Forrest Gump' set rel.roles = case p.name when 'Tom Hanks' then ['Forrest Gum'] when 'Robin Wright' then ['Jenny Curran'] when 'Gary Sinise' then ['Lieutenant Dan Taylor'] end return p,rel,m MATCH (p:Person)-[rel:HELPED]->(p2:Person) where p.name = 'Tom Hanks' and p2.name = 'Gary Sinise' set rel += {research:'war history'} return p,rel,p2 merge (m:Movie {name:'Forrest Gump'}) on match set m.year = 1994 on match set m.tagline = 'Life is like a box of chocolates…​you never know what you’re gonna get.' return m merge (p:Movie {name:'Forrest Gump'}) on match set p:OlderMovie return p match (p:Person {name:'Robert Zemeckis'}), (m:Movie {title:'Forrest Gump'}) merge (p)-[r:DIRECTED]->(m) return p,r,m ## constrain uniqueness CREATE CONSTRAINT UniqueMovieTitleConstraint ON (m:Movie) ASSERT m.title IS UNIQUE ## constrain uniqueness over two properties ## only enterprise edition CREATE CONSTRAINT UniqueNameBornConstraint ON (p:Person) ASSERT (p.name, p.born) IS NODE KEY ## needs enterprise edition of neo4j create constraint PersonBornExistsConstraint on (p:Person) assert exists(p.born) ## existence constraint (possible for node CREATE CONSTRAINT ExistsMovieTagline ON (m:Movie) ASSERT exists(m.tagline) DROP CONSTRAINT MovieTitleConstraint ## existence constraint for relationship ## only enterprise edition of neo4j CREATE CONSTRAINT ExistsREVIEWEDRating ON ()-[rel:REVIEWED]-() ASSERT exists(rel.rating) ## drop constraint DROP CONSTRAINT ExistsREVIEWEDRating CALL db.constraints() better SHOW CONSTRAINTS ## Indexes ## Single property index CREATE INDEX MovieReleased FOR (m:Movie) ON (m.released) ## composite index CREATE INDEX MovieReleasedVideoFormat FOR (m:Movie) ON (m.released, m.videoFormat) ## full-text schema index CALL db.index.fulltext.createNodeIndex( 'MovieTitlePersonName',['Movie', 'Person'], ['title', 'name']) ### To use a full-text schema index, you must call the query procedure that uses the index. CALL db.index.fulltext.queryNodes( 'MovieTitlePersonName', 'Jerry') YIELD node, score RETURN node.title, score ### Searching on a particular property CALL db.index.fulltext.queryNodes( 'MovieTitlePersonName', 'name: Jerry') YIELD node RETURN node ## drop index DROP INDEX MovieReleasedVideoFormat ## dropping full-text schema index CALL db.index.fulltext.drop('MovieTitlePersonName') ## search a full-text schema index CALL db.index.fulltext.queryNodes('MovieTaglineFTIndex', 'real OR world') YIELD node RETURN node.title, node.tagline ## set parameters :param year => 2000 :params {actorName: 'Tom Cruise', movieName: 'Top Gun'} ## for statement MATCH (p:Person)-[:ACTED_IN]->(m:Movie) WHERE p.name = $actorName AND m.title = $movieName RETURN p, m ## clear :params {} ## view :params ## Analyzing queries - EXPLAIN provides estimates of the graph engine processing that will occur, but does not execute the Cypher statement. - PROFILE provides real profiling information for what has occurred in the graph engine during the query and executes the Cypher statement. (run-time performance metrics) ## Monitoring queries :queries ## exercise :params {year:2006, ratingValue:65} match (p:Person)-[r:REVIEWED]->(m:Movie)<-[:ACTED_IN]-(a:Person) where m.released = $year and r.rating = $ratingValue return p.name, m.title, m.released, r.rating, collect(a.name) :auto USING PERIODIC COMMIT LOAD CSV commit every 1000 rows Eager operators don't act on this command, ie: collect() count() ORDER BY DISTINCT LOAD CSV WITH HEADERS FROM 'https://data.neo4j.com/v4.0-intro-neo4j/directors.csv' AS row MATCH (movie:Movie {id:toInteger(row.movieId)}) MATCH (person:Person {id: toInteger(row.personId)}) MERGE (person)-[:DIRECTED]->(movie) ON CREATE SET person:Director LOAD CSV WITH HEADERS FROM 'http://data.neo4j.com/v4.0-intro-neo4j/actors.csv' AS line MERGE (actor:Person {name: line.name}) ON CREATE SET actor.born = toInteger(trim(line.birthYear)), actor.actorId = line.id ON MATCH SET actor.actorId = line.id ## before load CREATE CONSTRAINT UniqueMovieIdConstraint ON (m:Movie) ASSERT m.id IS UNIQUE; ## after load CREATE INDEX MovieTitleIndex ON (m:Movie) FOR (m.title); // Delete all constraints and indexes CALL apoc.schema.assert({},{},true); // Delete all nodes and relationships CALL apoc.periodic.iterate( 'MATCH (n) RETURN n', 'DETACH DELETE n', { batchSize:500 } ) ## test apoc CALL dbms.procedures() YIELD name WHERE name STARTS WITH "apoc" RETURN name ## Graph modelling How does Neo4j support graph data modeling? - allows you to create property graphs. - traversing the graph: traversal means anchoring a query based upon a property value, then traversing the graph to satisfy the query Nodes and relationships are the key components of a graph. Nodes must have labels to categorize entities. A label is used to categorize a set of nodes. Relationships must have direction and type. A relationship is only traversed once during a query. Nodes and relationships can have properties. Properties are used to provide specific values to a node or relationship. ## Your model must address Nodes: - Uniqueness of nodes: always have a property (or set of properties) that uniquely identify a node. - Complex data: balance between number of properties that represent complex data vs. multiple nodes and relationships. super nodes = (a node with lots of fan-in or fan-out) - Reduce property duplication (no repeating property values) - Reduce gather-and-inspect (traversal) ## Best practices for modeling relationships - Using specific relationship types. - Reducing symmetric relationships. - No semantically identical relationships (PARENT_OF and CHILD_OF) - Not all mutual relationships are semantically symmetric(FOLLOWS) - Using types vs. properties. ## Property best practices In the case of property value complexity, it depends on how the property is used. Anchors and traversal paths that use property values need to be parsed at query time. - Property lookups have a cost. - Parsing a complex property adds more cost. - Anchors and properties used for traversal will be as simple as possible. - Identifiers, outputs, and decoration are OK as complex values. ## Hierarchy of accessibility 1. Anchor node label, indexed anchor node properties (cheap) 2. Relationship types (cheap) 3. Non-indexed anchor node properties 4. Downstream node labels 5. Relationship properties, downstream node properties Downstream labels and properties are most expensive. ## Common graph structures used in modeling: 1. Intermediate nodes - (solve hyperedge; n-ary relationships) - sharing context (share contextual information) - sharing data (deduplicate information) - organizing data (avoid density of nodes) 2. Linked lists (useful whenever the sequence of objects matters) - Interleaved linked list - Head and tail of linked list (root point to head and tail) - No double linked-lists (redundant symmetrical relationships) 3. Timeline trees - use time as either an anchor or a navigational aid - topmost node in the timeline is an “all time” node - ​timeline trees consume a lot of space 4. Multiple structures in a single graph CREATE (:Airport {code: "ABQ"})<-[:CONNECTED_TO {airline: "WN", flightNumber: 500, date: "2019-1-3", depature: 1445, arrival: 1710}]-(:Airport {code: "LAS"})-[:CONNECTED_TO {airline: "WN", flightNumber: 82, date: "2019-1-3", depature: 1715, arrival: 1820}]->(:Airport {code: "LAX"}) LOAD CSV WITH HEADERS FROM 'file:///flights_2019_1k.csv' AS row MERGE (origin:Airport {code: row.Origin}) MERGE (destination:Airport {code: row.Dest}) MERGE (origin)-[connection:CONNECTED_TO { airline: row.UniqueCarrier, flightNumber: row.FlightNum, date: toInteger(row.Year) + '-' + toInteger(row.Month) + '-' + toInteger(row.DayofMonth)}]->(destination) ON CREATE SET connection.departure = toInteger(row.CRSDepTime), connection.arrival = toInteger(row.CRSArrTime)