elk
This commit is contained in:
parent
f187d49780
commit
51d68caa88
|
@ -0,0 +1,148 @@
|
||||||
|
# Some Python utility code for elasticsearch.
|
||||||
|
# uses the requests library (low level) rather than the Python elasticsearch wrapper
|
||||||
|
|
||||||
|
# https://www.pg4e.com/code/elastictool.py
|
||||||
|
|
||||||
|
# (If needed)
|
||||||
|
# https://www.pg4e.com/code/hidden-dist.py
|
||||||
|
# copy hidden-dist.py to hidden.py
|
||||||
|
# edit hidden.py and put in your credentials
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import hidden
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
warnings.filterwarnings("ignore", message="Unverified HTTPS request")
|
||||||
|
|
||||||
|
secrets = hidden.elastic()
|
||||||
|
|
||||||
|
|
||||||
|
# from lesson in course
|
||||||
|
queryurl = "http://pg4e_86f9:*@es.py4e.com:9210/prefx/testindex/_search?pretty"
|
||||||
|
|
||||||
|
body = json.dumps({"query": {"match all": {}}}) # match everything
|
||||||
|
|
||||||
|
hdict = {"Content-type": "application/json; charset=UTF-8"}
|
||||||
|
|
||||||
|
response = requests.post(queryurl, headers=hdict, data=body)
|
||||||
|
text = response.text
|
||||||
|
|
||||||
|
status = response.status_code
|
||||||
|
js = json.loads(text)
|
||||||
|
|
||||||
|
# Status codes:
|
||||||
|
# 200: OK
|
||||||
|
# 404: not found
|
||||||
|
# 500: error at server side
|
||||||
|
|
||||||
|
|
||||||
|
# end from lesson in course
|
||||||
|
|
||||||
|
url = "http://"
|
||||||
|
if secrets["scheme"] == "https":
|
||||||
|
url = "https://"
|
||||||
|
url = (
|
||||||
|
url
|
||||||
|
+ secrets["user"]
|
||||||
|
+ ":"
|
||||||
|
+ secrets["pass"]
|
||||||
|
+ "@"
|
||||||
|
+ secrets["host"]
|
||||||
|
+ ":"
|
||||||
|
+ str(secrets["port"])
|
||||||
|
)
|
||||||
|
if secrets.get("prefix"):
|
||||||
|
url = url + "/" + secrets["prefix"]
|
||||||
|
url = url + "/" + secrets["user"]
|
||||||
|
|
||||||
|
while True:
|
||||||
|
print()
|
||||||
|
try:
|
||||||
|
cmd = input("Enter command: ").strip()
|
||||||
|
except:
|
||||||
|
print()
|
||||||
|
break
|
||||||
|
|
||||||
|
if cmd.startswith("quit"):
|
||||||
|
break
|
||||||
|
|
||||||
|
pieces = cmd.split()
|
||||||
|
|
||||||
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-delete-index.html
|
||||||
|
if len(pieces) == 1 and pieces[0] == "delete":
|
||||||
|
prurl = url.replace(secrets["pass"], "*****")
|
||||||
|
print(prurl)
|
||||||
|
response = requests.delete(url)
|
||||||
|
text = response.text
|
||||||
|
status = response.status_code
|
||||||
|
print("Status:", status)
|
||||||
|
print(text)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-all-query.html
|
||||||
|
if len(pieces) == 1 and pieces[0] == "match_all":
|
||||||
|
queryurl = url + "/_search"
|
||||||
|
prurl = queryurl.replace(secrets["pass"], "*****")
|
||||||
|
print(prurl)
|
||||||
|
|
||||||
|
body = json.dumps({"query": {"match_all": {}}})
|
||||||
|
|
||||||
|
hdict = {"Content-type": "application/json; charset=UTF-8"}
|
||||||
|
response = requests.post(
|
||||||
|
queryurl, verify=False, headers=hdict, data=body
|
||||||
|
)
|
||||||
|
text = response.text
|
||||||
|
status = response.status_code
|
||||||
|
print(status)
|
||||||
|
print(text)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-get.html
|
||||||
|
if len(pieces) == 2 and pieces[0] == "get":
|
||||||
|
queryurl = url + "/_doc/" + pieces[1] + "?pretty"
|
||||||
|
prurl = queryurl.replace(secrets["pass"], "*****")
|
||||||
|
print(prurl)
|
||||||
|
|
||||||
|
response = requests.get(queryurl, verify=False)
|
||||||
|
text = response.text
|
||||||
|
status = response.status_code
|
||||||
|
print(status)
|
||||||
|
print(text)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
|
||||||
|
if len(pieces) == 2 and pieces[0] == "search":
|
||||||
|
queryurl = url + "/_search?pretty"
|
||||||
|
prurl = queryurl.replace(secrets["pass"], "*****")
|
||||||
|
print(prurl)
|
||||||
|
|
||||||
|
body = json.dumps({"query": {"query_string": {"query": pieces[1]}}})
|
||||||
|
|
||||||
|
# {"query": {"query_string": { "query": search, "default_field": "content" }}}
|
||||||
|
print(body)
|
||||||
|
|
||||||
|
hdict = {"Content-type": "application/json; charset=UTF-8"}
|
||||||
|
response = requests.post(
|
||||||
|
queryurl, verify=False, headers=hdict, data=body
|
||||||
|
)
|
||||||
|
text = response.text
|
||||||
|
status = response.status_code
|
||||||
|
if status == 200:
|
||||||
|
print(status)
|
||||||
|
print(json.dumps(json.loads(text), indent=2))
|
||||||
|
else:
|
||||||
|
print(text)
|
||||||
|
print()
|
||||||
|
print("Error, status=", status)
|
||||||
|
continue
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("Invalid command, please try:")
|
||||||
|
print("")
|
||||||
|
print(" quit")
|
||||||
|
print(" get id")
|
||||||
|
print(" search string")
|
||||||
|
print(" match_all")
|
||||||
|
print(" delete")
|
|
@ -0,0 +1,82 @@
|
||||||
|
# https://www.pg4e.com/code/elastictweet.py
|
||||||
|
|
||||||
|
# Example from:
|
||||||
|
# https://elasticsearch-py.readthedocs.io/en/master/
|
||||||
|
|
||||||
|
# pip install 'elasticsearch<7.14.0'
|
||||||
|
|
||||||
|
# (If needed)
|
||||||
|
# https://www.pg4e.com/code/hidden-dist.py
|
||||||
|
# copy hidden-dist.py to hidden.py
|
||||||
|
# edit hidden.py and put in your credentials
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from elasticsearch import Elasticsearch
|
||||||
|
from elasticsearch import RequestsHttpConnection
|
||||||
|
|
||||||
|
import hidden
|
||||||
|
|
||||||
|
secrets = hidden.elastic()
|
||||||
|
|
||||||
|
es = Elasticsearch(
|
||||||
|
[secrets["host"]],
|
||||||
|
http_auth=(secrets["user"], secrets["pass"]),
|
||||||
|
url_prefix=secrets["prefix"],
|
||||||
|
scheme=secrets["scheme"],
|
||||||
|
port=secrets["port"],
|
||||||
|
connection_class=RequestsHttpConnection,
|
||||||
|
)
|
||||||
|
indexname = secrets["user"]
|
||||||
|
|
||||||
|
# Start fresh
|
||||||
|
# https://elasticsearch-py.readthedocs.io/en/master/api.html#indices
|
||||||
|
res = es.indices.delete(index=indexname, ignore=[400, 404])
|
||||||
|
print("Dropped index")
|
||||||
|
print(res)
|
||||||
|
|
||||||
|
res = es.indices.create(index=indexname)
|
||||||
|
print("Created the index...")
|
||||||
|
print(res)
|
||||||
|
|
||||||
|
doc = {
|
||||||
|
"author": "kimchy",
|
||||||
|
"type": "tweet",
|
||||||
|
"text": "Elasticsearch: cool. bonsai cool.",
|
||||||
|
"timestamp": datetime.now(),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Note - you can't change the key type after you start indexing documents
|
||||||
|
res = es.index(index=indexname, id="abc", body=doc)
|
||||||
|
print("Added document...")
|
||||||
|
print(res["result"])
|
||||||
|
|
||||||
|
res = es.get(index=indexname, id="abc")
|
||||||
|
print("Retrieved document...")
|
||||||
|
print(res)
|
||||||
|
|
||||||
|
# Tell it to recompute the index - normally it would take up to 30 seconds
|
||||||
|
# Refresh can be costly - we do it here for demo purposes
|
||||||
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-refresh.html
|
||||||
|
res = es.indices.refresh(index=indexname)
|
||||||
|
print("Index refreshed")
|
||||||
|
print(res)
|
||||||
|
|
||||||
|
# Read the documents with a search term
|
||||||
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/query-filter-context.html
|
||||||
|
x = {
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must": {"match": {"text": "bonsai"}},
|
||||||
|
"filter": {"match": {"type": "tweet"}},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res = es.search(index=indexname, body=x)
|
||||||
|
print("Search results...")
|
||||||
|
print(res)
|
||||||
|
print()
|
||||||
|
print("Got %d Hits:" % len(res["hits"]["hits"]))
|
||||||
|
for hit in res["hits"]["hits"]:
|
||||||
|
s = hit["_source"]
|
||||||
|
print(f"{s['timestamp']} {s['author']}: {s['text']}")
|
|
@ -0,0 +1,58 @@
|
||||||
|
# Keep this file separate
|
||||||
|
|
||||||
|
# https://www.pg4e.com/code/hidden-dist.py
|
||||||
|
|
||||||
|
# psql -h pg.pg4e.com -p 5432 -U pg4e_be9e729093 pg4e_be9e729093
|
||||||
|
|
||||||
|
# %load_ext sql
|
||||||
|
# %config SqlMagic.autocommit=False
|
||||||
|
# %sql postgresql://pg4e_be9e729093:pg4e_p_d5fab7440699124@pg.pg4e.com:5432/pg4e_be9e729093
|
||||||
|
# %sql SELECT 1 as "Test"
|
||||||
|
|
||||||
|
def secrets():
|
||||||
|
return {"host": "pg.pg4e.com",
|
||||||
|
"port": 5432,
|
||||||
|
"database": "pg4e_be9e729093",
|
||||||
|
"user": "pg4e_be9e729093",
|
||||||
|
"pass": "pg4e_p_d5fab7440699124"}
|
||||||
|
|
||||||
|
def elastic() :
|
||||||
|
return {"host": "www.pg4e.com",
|
||||||
|
"prefix" : "elasticsearch",
|
||||||
|
"port": 443,
|
||||||
|
"scheme": "https",
|
||||||
|
"user": "pg4e_86f9be92a2",
|
||||||
|
"pass": "2008_9d454b1f"}
|
||||||
|
|
||||||
|
def readonly():
|
||||||
|
return {"host": "pg.pg4e.com",
|
||||||
|
"port": 5432,
|
||||||
|
"database": "readonly",
|
||||||
|
"user": "readonly",
|
||||||
|
"pass": "readonly_password"}
|
||||||
|
|
||||||
|
# Return a psycopg2 connection string
|
||||||
|
|
||||||
|
# import hidden
|
||||||
|
# secrets = hidden.readonly()
|
||||||
|
# sql_string = hidden.psycopg2(hidden.readonly())
|
||||||
|
|
||||||
|
# 'dbname=pg4e_data user=pg4e_data_read password=pg4e_p_d5fab7440699124 host=pg.pg4e.com port=5432'
|
||||||
|
|
||||||
|
def psycopg2(secrets) :
|
||||||
|
return ('dbname='+secrets['database']+' user='+secrets['user']+
|
||||||
|
' password='+secrets['pass']+' host='+secrets['host']+
|
||||||
|
' port='+str(secrets['port']))
|
||||||
|
|
||||||
|
# Return an SQLAlchemy string
|
||||||
|
|
||||||
|
# import hidden
|
||||||
|
# secrets = hidden.readonly()
|
||||||
|
# sql_string = hidden.alchemy(hidden.readonly())
|
||||||
|
|
||||||
|
# postgresql://pg4e_data_read:pg4e_p_d5fab7440699124@pg.pg4e.com:5432/pg4e_data
|
||||||
|
|
||||||
|
def alchemy(secrets) :
|
||||||
|
return ('postgresql://'+secrets['user']+':'+secrets['pass']+'@'+secrets['host']+
|
||||||
|
':'+str(secrets['port'])+'/'+secrets['database'])
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
# Application: ELK Stack
|
||||||
|
|
||||||
|
- Elasticsearch - distributed NoSQL database
|
||||||
|
- Logstash - ingests streams of activity data
|
||||||
|
- Kibana - Visualisation / Dashboard
|
||||||
|
|
||||||
|
# Fundamentals concepts
|
||||||
|
[Source: architecture](https://codersite.dev/hot-warm-architecture-elasticsearch/)
|
||||||
|
|
||||||
|
The act of storing data in Elasticsearch is called **indexing**.
|
||||||
|
|
||||||
|
An index is a collection of documents and each document is a collection of fields, which are the **key-value pairs** that contain your data. Every index has some properties like mappings, settings, and aliases.
|
||||||
|
|
||||||
|
In Elasticsearch, a document belongs to a type, and those types live inside an index. We can draw a parallel to a traditional relational database:
|
||||||
|
|
||||||
|
Relational DB ⇒ Databases ⇒ Tables ⇒ Rows ⇒ Columns
|
||||||
|
Elasticsearch ⇒ Indices ⇒ Types ⇒ Documents ⇒ Fields
|
||||||
|
|
||||||
|
In Elasticsearch, the term **document** has a specific meaning. It refers to the **top-level**, or root object that is serialized into JSON and stored in Elasticsearch under a unique ID.
|
||||||
|
|
||||||
|
Elasticsearch lets you insert documents without a predefined schema (in RDBMS you need to define tables in advance).
|
||||||
|
|
||||||
|
## Inverted index
|
||||||
|
|
||||||
|
Relational databases add an index, such as a B-tree index, to specific columns in order to improve the speed of data retrieval. Elasticsearch use a structure called an **inverted index** for exactly the same purpose.
|
||||||
|
|
||||||
|
By default, **every field in a document is indexed** (has an inverted index) and thus is searchable – **FullText search**. A field without an inverted index is not searchable.
|
||||||
|
|
||||||
|
An inverted index consists of a list of all the unique words that appear in any document, and for each word, a list of the documents in which it appears.
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
|
||||||
|
- Elasticsearch gives us Google-like features
|
||||||
|
- Scalable ingest / data size / search performance
|
||||||
|
- Accessible through a "REST API"
|
||||||
|
- Can be used as a full-text "search engine"
|
||||||
|
- Can be used as a scalable NoSQL database
|
|
@ -24,6 +24,14 @@ GET /items/_search
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GET /_search
|
||||||
|
{
|
||||||
|
"query": {
|
||||||
|
"match_all": {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
GET /shakespeare/_search
|
GET /shakespeare/_search
|
||||||
|
|
||||||
GET /shakespeare/_search
|
GET /shakespeare/_search
|
||||||
|
|
Loading…
Reference in New Issue