Summaries/Databases/Neo4j/Syntax_examples.md

---
title: Syntax_examples
updated: 2022-07-18 09:37:40Z
created: 2021-05-04 14:58:11Z
---

## show database schema  info
CALL db.schema.visualization()
CALL db.schema.relTypeProperties()
CALL db.schema.nodeTypeProperties()
CALL db.propertyKeys()

## syntax
MATCH (variable:Label {propertyKey: propertyValue,  propertyKey2: propertyValue2})
RETURN variable

## relationships
()          // a node
()--()      // 2 nodes have some type of relationship
()-[]-()    // 2 nodes have some type of relationship
()-->()     // the first node has a relationship to the second node
()<--()     // the second node has a relationship to the first node

MATCH (node1)-[:REL_TYPE]->(node2)
RETURN node1, node2

MATCH (node1)-[:REL_TYPEA | REL_TYPEB]->(node2)
RETURN node1, node2


## show node with name "Tom Hanks"
MATCH (tom {name: "Tom"}) RETURN tom

## return all nodes in database
MATCH (a:Person) WHERE a.name = "Tom" RETURN a
MATCH (a:Person) RETURN a.name

## with where clause
match (a:Movie)
where a.released >= 1990 and a.released < 1999
return a.title;

##  a list of all properties that match a string
MATCH (n) WITH keys(n) AS p UNWIND p AS x WITH DISTINCT x WHERE x =~ ".*" RETURN collect(x) AS SET;

## delete all nodes and relations
MATCH (n)
DETACH DELETE n

## create
```cypher
create (:Person {name = 'jan', age = 32})
```

match(n:Person {age: 32}) return n

match(n:Person {age: 32})
create (n)-[:RELATIE]->(:Person {name:"klaas"})

MATCH (n:Person)
DETACH DELETE n

MATCH (p:Person)-[:ACTED_IN]->(m:Movie)<-[:DIRECTED]-(other:Person)
where toLower(p.name) =~ 'gene.*' and other.born IN [1950,1930]
    and exists((other)-[:DIRECTED]->(m))
return m.title, other.name, other.born as YearBorn

MATCH (p:Person)-[:ACTED_IN]->(m:Movie)
where p.name = 'Tom Hanks'
with m, datetime().year - m.released as Ago, m.released - p.born as Age
where  20 <= Ago <= 33
return m.title, Ago

MATCH (m:Movie)
WITH m, size((:Person)-[:DIRECTED]->(m)) AS directors
WHERE directors >= 2
OPTIONAL MATCH (p:Person)-[:REVIEWED]->(m)
RETURN  m.title, p.name


match (a:Person), (m:Movie), (b:Person)
where a.name = 'Liam Neeson'
    and b.name = 'Benjamin Melniker'
    and m.title = 'Batman Begins'
    create (a)-[:ACTED_IN {roles: ['Rachel','Rachel Dawes']}]->(m)<-[:PRODUCED]-(b)
    return a,m,b

MATCH (a:Person),(m:Movie)
WHERE a.name = 'Christian Bale' AND
      m.title = 'Batman Begins' AND
      NOT exists((a)-[:ACTED_IN]->(m))
CREATE (a)-[rel:ACTED_IN]->(m)
SET rel.roles = ['Bruce Wayne','Batman']
RETURN a, rel, m

MATCH (p:Person)-[rel:ACTED_IN]->(m:Movie)
where m.title = 'Forrest Gump'
set rel.roles = case p.name
when 'Tom Hanks' then  ['Forrest Gum']
when 'Robin Wright' then  ['Jenny Curran']
when 'Gary Sinise' then  ['Lieutenant Dan Taylor']
end
return p,rel,m

MATCH (p:Person)-[rel:HELPED]->(p2:Person)
where p.name = 'Tom Hanks' and p2.name = 'Gary Sinise'
set rel += {research:'war history'}
    return p,rel,p2

merge (m:Movie {name:'Forrest Gump'})
on match set m.year = 1994
on match set m.tagline = 'Life is like a box of chocolates…you never know what you’re gonna get.'
return m

merge (p:Movie {name:'Forrest Gump'})
on match set p:OlderMovie
return p

match (p:Person {name:'Robert Zemeckis'}), (m:Movie {title:'Forrest Gump'})
merge (p)-[r:DIRECTED]->(m)
return p,r,m


## constrain uniqueness
CREATE CONSTRAINT UniqueMovieTitleConstraint
    ON (m:Movie)
    ASSERT m.title IS UNIQUE

## constrain uniqueness over two properties
## only enterprise edition
CREATE CONSTRAINT UniqueNameBornConstraint
       ON (p:Person)
       ASSERT (p.name, p.born) IS NODE KEY

## needs enterprise edition of neo4j
create constraint PersonBornExistsConstraint on (p:Person)
assert exists(p.born)


## existence constraint (possible for node
CREATE CONSTRAINT ExistsMovieTagline
    ON (m:Movie)
    ASSERT exists(m.tagline)

DROP CONSTRAINT MovieTitleConstraint

## existence constraint for relationship
## only enterprise edition of neo4j
CREATE CONSTRAINT ExistsREVIEWEDRating
    ON ()-[rel:REVIEWED]-()
    ASSERT exists(rel.rating)

## drop constraint
DROP CONSTRAINT ExistsREVIEWEDRating

CALL db.constraints() better SHOW CONSTRAINTS

## Indexes
## Single property index
CREATE INDEX MovieReleased FOR (m:Movie) ON (m.released)

## composite index
CREATE INDEX MovieReleasedVideoFormat
    FOR (m:Movie)
    ON (m.released, m.videoFormat)

## full-text schema index
CALL db.index.fulltext.createNodeIndex(
      'MovieTitlePersonName',['Movie', 'Person'], ['title', 'name'])
### To use a full-text schema index, you must call the query procedure that uses the index.
CALL db.index.fulltext.queryNodes(
        'MovieTitlePersonName', 'Jerry')
    YIELD node, score
    RETURN node.title, score

### Searching on a particular property
CALL db.index.fulltext.queryNodes(
     'MovieTitlePersonName', 'name: Jerry') YIELD node
RETURN node

## drop index
DROP INDEX MovieReleasedVideoFormat

## dropping full-text schema index
CALL db.index.fulltext.drop('MovieTitlePersonName')

## search a full-text schema index
CALL db.index.fulltext.queryNodes('MovieTaglineFTIndex', 'real OR world')
    YIELD node
    RETURN node.title, node.tagline

## set parameters
:param year => 2000
:params {actorName: 'Tom Cruise', movieName: 'Top Gun'}
## for statement
MATCH (p:Person)-[:ACTED_IN]->(m:Movie)
WHERE p.name = $actorName AND m.title = $movieName
RETURN p, m
## clear
:params {}
## view
:params

## Analyzing queries
- EXPLAIN provides estimates of the graph engine processing that will occur, but does not execute the Cypher statement.

- PROFILE provides real profiling information for what has occurred in the graph engine during the query and executes the Cypher statement. (run-time performance metrics)

## Monitoring queries
:queries


## exercise
:params {year:2006, ratingValue:65}

match (p:Person)-[r:REVIEWED]->(m:Movie)<-[:ACTED_IN]-(a:Person)
where m.released = $year and r.rating = $ratingValue
return p.name, m.title, m.released, r.rating, collect(a.name)


:auto USING PERIODIC COMMIT LOAD CSV
commit every 1000 rows
Eager operators don't act on this command, ie:
    collect()
    count()
    ORDER BY
    DISTINCT

LOAD CSV WITH HEADERS FROM
'https://data.neo4j.com/v4.0-intro-neo4j/directors.csv' AS row
MATCH (movie:Movie {id:toInteger(row.movieId)})
MATCH (person:Person {id: toInteger(row.personId)})
MERGE (person)-[:DIRECTED]->(movie)
ON CREATE SET person:Director

LOAD CSV WITH HEADERS
FROM 'http://data.neo4j.com/v4.0-intro-neo4j/actors.csv'
AS line
MERGE (actor:Person {name: line.name})
  ON CREATE SET actor.born = toInteger(trim(line.birthYear)), actor.actorId = line.id
  ON MATCH SET actor.actorId = line.id


## before load
CREATE CONSTRAINT UniqueMovieIdConstraint ON (m:Movie) ASSERT m.id IS UNIQUE;
## after load
CREATE INDEX MovieTitleIndex ON (m:Movie) FOR (m.title);


// Delete all constraints and indexes
CALL apoc.schema.assert({},{},true);
// Delete all nodes and relationships
CALL apoc.periodic.iterate(
  'MATCH (n) RETURN n',
  'DETACH DELETE n',
  { batchSize:500 }
)


## test apoc
CALL dbms.procedures()
YIELD name WHERE name STARTS WITH "apoc"
RETURN name


## Graph modelling
How does Neo4j support graph data modeling?
- allows you to create property graphs.
- traversing the graph: traversal means anchoring a query based upon a property value, then traversing the graph to satisfy the query


Nodes and relationships are the key components of a graph.
Nodes must have labels to categorize entities.
A label is used to categorize a set of nodes.
Relationships must have direction and type.
A relationship is only traversed once during a query.
Nodes and relationships can have properties.
Properties are used to provide specific values to a node or relationship.

## Your model must address Nodes:
- Uniqueness of nodes: always have a property (or set of properties) that uniquely identify a node.
- Complex data: balance between number of properties that represent complex data vs. multiple nodes and relationships.

super nodes = (a node with lots of fan-in or fan-out)
- Reduce property duplication (no repeating property values)
- Reduce gather-and-inspect (traversal)

## Best practices for modeling relationships
- Using specific relationship types.
- Reducing symmetric relationships.
    - No semantically identical relationships (PARENT_OF and CHILD_OF)
    - Not all mutual relationships are semantically symmetric(FOLLOWS)
- Using types vs. properties.

## Property best practices
In the case of property value complexity, it depends on how the property is used. Anchors and traversal paths that use property values need to be parsed at query time.

- Property lookups have a cost.
- Parsing a complex property adds more cost.
- Anchors and properties used for traversal will be as simple as possible.
- Identifiers, outputs, and decoration are OK as complex values.

## Hierarchy of accessibility
1. Anchor node label, indexed anchor node properties (cheap)
2. Relationship types (cheap)
3. Non-indexed anchor node properties
4. Downstream node labels
5. Relationship properties, downstream node properties

Downstream labels and properties are most expensive.

## Common graph structures used in modeling:
1. Intermediate nodes
    - (solve hyperedge; n-ary relationships)
    - sharing context (share contextual information)
    - sharing data (deduplicate information)
    - organizing data (avoid density of nodes)
2. Linked lists (useful whenever the sequence of objects matters)
    - Interleaved linked list
    - Head and tail of linked list (root point to head and tail)
    - No double linked-lists (redundant symmetrical relationships)
3. Timeline trees
    - use time as either an anchor or a navigational aid
    - topmost node in the timeline is an “all time” node
    - timeline trees consume a lot of space
4. Multiple structures in a single graph


CREATE (:Airport {code: "ABQ"})<-[:CONNECTED_TO {airline: "WN", flightNumber: 500, date: "2019-1-3", depature: 1445, arrival: 1710}]-(:Airport {code: "LAS"})-[:CONNECTED_TO {airline: "WN", flightNumber: 82, date: "2019-1-3", depature: 1715, arrival: 1820}]->(:Airport {code: "LAX"})


LOAD CSV WITH HEADERS FROM  'file:///flights_2019_1k.csv' AS row
MERGE (origin:Airport {code: row.Origin})
MERGE (destination:Airport {code: row.Dest})
MERGE (origin)-[connection:CONNECTED_TO {
  airline: row.UniqueCarrier,
  flightNumber: row.FlightNum,
  date: toInteger(row.Year) + '-' + toInteger(row.Month) + '-' + toInteger(row.DayofMonth)}]->(destination)
ON CREATE SET connection.departure = toInteger(row.CRSDepTime), connection.arrival = toInteger(row.CRSArrTime)