10 KiB
title | updated | created |
---|---|---|
Syntax_examples | 2022-07-18 09:37:40Z | 2021-05-04 14:58:11Z |
show database schema info
CALL db.schema.visualization() CALL db.schema.relTypeProperties() CALL db.schema.nodeTypeProperties() CALL db.propertyKeys()
syntax
MATCH (variable:Label {propertyKey: propertyValue, propertyKey2: propertyValue2}) RETURN variable
relationships
() // a node ()--() // 2 nodes have some type of relationship ()-[]-() // 2 nodes have some type of relationship ()-->() // the first node has a relationship to the second node ()<--() // the second node has a relationship to the first node
MATCH (node1)-[:REL_TYPE]->(node2) RETURN node1, node2
MATCH (node1)-[:REL_TYPEA | REL_TYPEB]->(node2) RETURN node1, node2
show node with name "Tom Hanks"
MATCH (tom {name: "Tom"}) RETURN tom
return all nodes in database
MATCH (a:Person) WHERE a.name = "Tom" RETURN a MATCH (a:Person) RETURN a.name
with where clause
match (a:Movie) where a.released >= 1990 and a.released < 1999 return a.title;
a list of all properties that match a string
MATCH (n) WITH keys(n) AS p UNWIND p AS x WITH DISTINCT x WHERE x =~ ".*" RETURN collect(x) AS SET;
delete all nodes and relations
MATCH (n) DETACH DELETE n
create
create (:Person {name = 'jan', age = 32})
match(n:Person {age: 32}) return n
match(n:Person {age: 32}) create (n)-[:RELATIE]->(:Person {name:"klaas"})
MATCH (n:Person) DETACH DELETE n
MATCH (p:Person)-[:ACTED_IN]->(m:Movie)<-[:DIRECTED]-(other:Person) where toLower(p.name) =~ 'gene.*' and other.born IN [1950,1930] and exists((other)-[:DIRECTED]->(m)) return m.title, other.name, other.born as YearBorn
MATCH (p:Person)-[:ACTED_IN]->(m:Movie) where p.name = 'Tom Hanks' with m, datetime().year - m.released as Ago, m.released - p.born as Age where 20 <= Ago <= 33 return m.title, Ago
MATCH (m:Movie) WITH m, size((:Person)-[:DIRECTED]->(m)) AS directors WHERE directors >= 2 OPTIONAL MATCH (p:Person)-[:REVIEWED]->(m) RETURN m.title, p.name
match (a:Person), (m:Movie), (b:Person) where a.name = 'Liam Neeson' and b.name = 'Benjamin Melniker' and m.title = 'Batman Begins' create (a)-[:ACTED_IN {roles: ['Rachel','Rachel Dawes']}]->(m)<-[:PRODUCED]-(b) return a,m,b
MATCH (a:Person),(m:Movie) WHERE a.name = 'Christian Bale' AND m.title = 'Batman Begins' AND NOT exists((a)-[:ACTED_IN]->(m)) CREATE (a)-[rel:ACTED_IN]->(m) SET rel.roles = ['Bruce Wayne','Batman'] RETURN a, rel, m
MATCH (p:Person)-[rel:ACTED_IN]->(m:Movie) where m.title = 'Forrest Gump' set rel.roles = case p.name when 'Tom Hanks' then ['Forrest Gum'] when 'Robin Wright' then ['Jenny Curran'] when 'Gary Sinise' then ['Lieutenant Dan Taylor'] end return p,rel,m
MATCH (p:Person)-[rel:HELPED]->(p2:Person) where p.name = 'Tom Hanks' and p2.name = 'Gary Sinise' set rel += {research:'war history'} return p,rel,p2
merge (m:Movie {name:'Forrest Gump'}) on match set m.year = 1994 on match set m.tagline = 'Life is like a box of chocolates…you never know what you’re gonna get.' return m
merge (p:Movie {name:'Forrest Gump'}) on match set p:OlderMovie return p
match (p:Person {name:'Robert Zemeckis'}), (m:Movie {title:'Forrest Gump'}) merge (p)-[r:DIRECTED]->(m) return p,r,m
constrain uniqueness
CREATE CONSTRAINT UniqueMovieTitleConstraint ON (m:Movie) ASSERT m.title IS UNIQUE
constrain uniqueness over two properties
only enterprise edition
CREATE CONSTRAINT UniqueNameBornConstraint ON (p:Person) ASSERT (p.name, p.born) IS NODE KEY
needs enterprise edition of neo4j
create constraint PersonBornExistsConstraint on (p:Person) assert exists(p.born)
existence constraint (possible for node
CREATE CONSTRAINT ExistsMovieTagline ON (m:Movie) ASSERT exists(m.tagline)
DROP CONSTRAINT MovieTitleConstraint
existence constraint for relationship
only enterprise edition of neo4j
CREATE CONSTRAINT ExistsREVIEWEDRating ON ()-[rel:REVIEWED]-() ASSERT exists(rel.rating)
drop constraint
DROP CONSTRAINT ExistsREVIEWEDRating
CALL db.constraints() better SHOW CONSTRAINTS
Indexes
Single property index
CREATE INDEX MovieReleased FOR (m:Movie) ON (m.released)
composite index
CREATE INDEX MovieReleasedVideoFormat FOR (m:Movie) ON (m.released, m.videoFormat)
full-text schema index
CALL db.index.fulltext.createNodeIndex( 'MovieTitlePersonName',['Movie', 'Person'], ['title', 'name'])
To use a full-text schema index, you must call the query procedure that uses the index.
CALL db.index.fulltext.queryNodes( 'MovieTitlePersonName', 'Jerry') YIELD node, score RETURN node.title, score
Searching on a particular property
CALL db.index.fulltext.queryNodes( 'MovieTitlePersonName', 'name: Jerry') YIELD node RETURN node
drop index
DROP INDEX MovieReleasedVideoFormat
dropping full-text schema index
CALL db.index.fulltext.drop('MovieTitlePersonName')
search a full-text schema index
CALL db.index.fulltext.queryNodes('MovieTaglineFTIndex', 'real OR world') YIELD node RETURN node.title, node.tagline
set parameters
:param year => 2000 :params {actorName: 'Tom Cruise', movieName: 'Top Gun'}
for statement
MATCH (p:Person)-[:ACTED_IN]->(m:Movie) WHERE p.name = $actorName AND m.title = $movieName RETURN p, m
clear
:params {}
view
:params
Analyzing queries
-
EXPLAIN provides estimates of the graph engine processing that will occur, but does not execute the Cypher statement.
-
PROFILE provides real profiling information for what has occurred in the graph engine during the query and executes the Cypher statement. (run-time performance metrics)
Monitoring queries
:queries
exercise
:params {year:2006, ratingValue:65}
match (p:Person)-[r:REVIEWED]->(m:Movie)<-[:ACTED_IN]-(a:Person) where m.released = $year and r.rating = $ratingValue return p.name, m.title, m.released, r.rating, collect(a.name)
:auto USING PERIODIC COMMIT LOAD CSV commit every 1000 rows Eager operators don't act on this command, ie: collect() count() ORDER BY DISTINCT
LOAD CSV WITH HEADERS FROM 'https://data.neo4j.com/v4.0-intro-neo4j/directors.csv' AS row MATCH (movie:Movie {id:toInteger(row.movieId)}) MATCH (person:Person {id: toInteger(row.personId)}) MERGE (person)-[:DIRECTED]->(movie) ON CREATE SET person:Director
LOAD CSV WITH HEADERS FROM 'http://data.neo4j.com/v4.0-intro-neo4j/actors.csv' AS line MERGE (actor:Person {name: line.name}) ON CREATE SET actor.born = toInteger(trim(line.birthYear)), actor.actorId = line.id ON MATCH SET actor.actorId = line.id
before load
CREATE CONSTRAINT UniqueMovieIdConstraint ON (m:Movie) ASSERT m.id IS UNIQUE;
after load
CREATE INDEX MovieTitleIndex ON (m:Movie) FOR (m.title);
// Delete all constraints and indexes CALL apoc.schema.assert({},{},true); // Delete all nodes and relationships CALL apoc.periodic.iterate( 'MATCH (n) RETURN n', 'DETACH DELETE n', { batchSize:500 } )
test apoc
CALL dbms.procedures() YIELD name WHERE name STARTS WITH "apoc" RETURN name
Graph modelling
How does Neo4j support graph data modeling?
- allows you to create property graphs.
- traversing the graph: traversal means anchoring a query based upon a property value, then traversing the graph to satisfy the query
Nodes and relationships are the key components of a graph. Nodes must have labels to categorize entities. A label is used to categorize a set of nodes. Relationships must have direction and type. A relationship is only traversed once during a query. Nodes and relationships can have properties. Properties are used to provide specific values to a node or relationship.
Your model must address Nodes:
- Uniqueness of nodes: always have a property (or set of properties) that uniquely identify a node.
- Complex data: balance between number of properties that represent complex data vs. multiple nodes and relationships.
super nodes = (a node with lots of fan-in or fan-out)
- Reduce property duplication (no repeating property values)
- Reduce gather-and-inspect (traversal)
Best practices for modeling relationships
- Using specific relationship types.
- Reducing symmetric relationships.
- No semantically identical relationships (PARENT_OF and CHILD_OF)
- Not all mutual relationships are semantically symmetric(FOLLOWS)
- Using types vs. properties.
Property best practices
In the case of property value complexity, it depends on how the property is used. Anchors and traversal paths that use property values need to be parsed at query time.
- Property lookups have a cost.
- Parsing a complex property adds more cost.
- Anchors and properties used for traversal will be as simple as possible.
- Identifiers, outputs, and decoration are OK as complex values.
Hierarchy of accessibility
- Anchor node label, indexed anchor node properties (cheap)
- Relationship types (cheap)
- Non-indexed anchor node properties
- Downstream node labels
- Relationship properties, downstream node properties
Downstream labels and properties are most expensive.
Common graph structures used in modeling:
- Intermediate nodes
- (solve hyperedge; n-ary relationships)
- sharing context (share contextual information)
- sharing data (deduplicate information)
- organizing data (avoid density of nodes)
- Linked lists (useful whenever the sequence of objects matters)
- Interleaved linked list
- Head and tail of linked list (root point to head and tail)
- No double linked-lists (redundant symmetrical relationships)
- Timeline trees
- use time as either an anchor or a navigational aid
- topmost node in the timeline is an “all time” node
- timeline trees consume a lot of space
- Multiple structures in a single graph
CREATE (:Airport {code: "ABQ"})<-[:CONNECTED_TO {airline: "WN", flightNumber: 500, date: "2019-1-3", depature: 1445, arrival: 1710}]-(:Airport {code: "LAS"})-[:CONNECTED_TO {airline: "WN", flightNumber: 82, date: "2019-1-3", depature: 1715, arrival: 1820}]->(:Airport {code: "LAX"})
LOAD CSV WITH HEADERS FROM 'file:///flights_2019_1k.csv' AS row MERGE (origin:Airport {code: row.Origin}) MERGE (destination:Airport {code: row.Dest}) MERGE (origin)-[connection:CONNECTED_TO { airline: row.UniqueCarrier, flightNumber: row.FlightNum, date: toInteger(row.Year) + '-' + toInteger(row.Month) + '-' + toInteger(row.DayofMonth)}]->(destination) ON CREATE SET connection.departure = toInteger(row.CRSDepTime), connection.arrival = toInteger(row.CRSArrTime)