elk
This commit is contained in:
parent
f187d49780
commit
51d68caa88
|
@ -0,0 +1,148 @@
|
|||
# Some Python utility code for elasticsearch.
|
||||
# uses the requests library (low level) rather than the Python elasticsearch wrapper
|
||||
|
||||
# https://www.pg4e.com/code/elastictool.py
|
||||
|
||||
# (If needed)
|
||||
# https://www.pg4e.com/code/hidden-dist.py
|
||||
# copy hidden-dist.py to hidden.py
|
||||
# edit hidden.py and put in your credentials
|
||||
|
||||
import requests
|
||||
import json
|
||||
import hidden
|
||||
|
||||
import warnings
|
||||
|
||||
warnings.filterwarnings("ignore", message="Unverified HTTPS request")
|
||||
|
||||
secrets = hidden.elastic()
|
||||
|
||||
|
||||
# from lesson in course
|
||||
queryurl = "http://pg4e_86f9:*@es.py4e.com:9210/prefx/testindex/_search?pretty"
|
||||
|
||||
body = json.dumps({"query": {"match all": {}}}) # match everything
|
||||
|
||||
hdict = {"Content-type": "application/json; charset=UTF-8"}
|
||||
|
||||
response = requests.post(queryurl, headers=hdict, data=body)
|
||||
text = response.text
|
||||
|
||||
status = response.status_code
|
||||
js = json.loads(text)
|
||||
|
||||
# Status codes:
|
||||
# 200: OK
|
||||
# 404: not found
|
||||
# 500: error at server side
|
||||
|
||||
|
||||
# end from lesson in course
|
||||
|
||||
url = "http://"
|
||||
if secrets["scheme"] == "https":
|
||||
url = "https://"
|
||||
url = (
|
||||
url
|
||||
+ secrets["user"]
|
||||
+ ":"
|
||||
+ secrets["pass"]
|
||||
+ "@"
|
||||
+ secrets["host"]
|
||||
+ ":"
|
||||
+ str(secrets["port"])
|
||||
)
|
||||
if secrets.get("prefix"):
|
||||
url = url + "/" + secrets["prefix"]
|
||||
url = url + "/" + secrets["user"]
|
||||
|
||||
while True:
|
||||
print()
|
||||
try:
|
||||
cmd = input("Enter command: ").strip()
|
||||
except:
|
||||
print()
|
||||
break
|
||||
|
||||
if cmd.startswith("quit"):
|
||||
break
|
||||
|
||||
pieces = cmd.split()
|
||||
|
||||
# https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-delete-index.html
|
||||
if len(pieces) == 1 and pieces[0] == "delete":
|
||||
prurl = url.replace(secrets["pass"], "*****")
|
||||
print(prurl)
|
||||
response = requests.delete(url)
|
||||
text = response.text
|
||||
status = response.status_code
|
||||
print("Status:", status)
|
||||
print(text)
|
||||
continue
|
||||
|
||||
# https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-all-query.html
|
||||
if len(pieces) == 1 and pieces[0] == "match_all":
|
||||
queryurl = url + "/_search"
|
||||
prurl = queryurl.replace(secrets["pass"], "*****")
|
||||
print(prurl)
|
||||
|
||||
body = json.dumps({"query": {"match_all": {}}})
|
||||
|
||||
hdict = {"Content-type": "application/json; charset=UTF-8"}
|
||||
response = requests.post(
|
||||
queryurl, verify=False, headers=hdict, data=body
|
||||
)
|
||||
text = response.text
|
||||
status = response.status_code
|
||||
print(status)
|
||||
print(text)
|
||||
continue
|
||||
|
||||
# https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-get.html
|
||||
if len(pieces) == 2 and pieces[0] == "get":
|
||||
queryurl = url + "/_doc/" + pieces[1] + "?pretty"
|
||||
prurl = queryurl.replace(secrets["pass"], "*****")
|
||||
print(prurl)
|
||||
|
||||
response = requests.get(queryurl, verify=False)
|
||||
text = response.text
|
||||
status = response.status_code
|
||||
print(status)
|
||||
print(text)
|
||||
continue
|
||||
|
||||
# https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
|
||||
if len(pieces) == 2 and pieces[0] == "search":
|
||||
queryurl = url + "/_search?pretty"
|
||||
prurl = queryurl.replace(secrets["pass"], "*****")
|
||||
print(prurl)
|
||||
|
||||
body = json.dumps({"query": {"query_string": {"query": pieces[1]}}})
|
||||
|
||||
# {"query": {"query_string": { "query": search, "default_field": "content" }}}
|
||||
print(body)
|
||||
|
||||
hdict = {"Content-type": "application/json; charset=UTF-8"}
|
||||
response = requests.post(
|
||||
queryurl, verify=False, headers=hdict, data=body
|
||||
)
|
||||
text = response.text
|
||||
status = response.status_code
|
||||
if status == 200:
|
||||
print(status)
|
||||
print(json.dumps(json.loads(text), indent=2))
|
||||
else:
|
||||
print(text)
|
||||
print()
|
||||
print("Error, status=", status)
|
||||
continue
|
||||
|
||||
print()
|
||||
print("Invalid command, please try:")
|
||||
print("")
|
||||
print(" quit")
|
||||
print(" get id")
|
||||
print(" search string")
|
||||
print(" match_all")
|
||||
print(" delete")
|
|
@ -0,0 +1,82 @@
|
|||
# https://www.pg4e.com/code/elastictweet.py
|
||||
|
||||
# Example from:
|
||||
# https://elasticsearch-py.readthedocs.io/en/master/
|
||||
|
||||
# pip install 'elasticsearch<7.14.0'
|
||||
|
||||
# (If needed)
|
||||
# https://www.pg4e.com/code/hidden-dist.py
|
||||
# copy hidden-dist.py to hidden.py
|
||||
# edit hidden.py and put in your credentials
|
||||
|
||||
from datetime import datetime
|
||||
from elasticsearch import Elasticsearch
|
||||
from elasticsearch import RequestsHttpConnection
|
||||
|
||||
import hidden
|
||||
|
||||
secrets = hidden.elastic()
|
||||
|
||||
es = Elasticsearch(
|
||||
[secrets["host"]],
|
||||
http_auth=(secrets["user"], secrets["pass"]),
|
||||
url_prefix=secrets["prefix"],
|
||||
scheme=secrets["scheme"],
|
||||
port=secrets["port"],
|
||||
connection_class=RequestsHttpConnection,
|
||||
)
|
||||
indexname = secrets["user"]
|
||||
|
||||
# Start fresh
|
||||
# https://elasticsearch-py.readthedocs.io/en/master/api.html#indices
|
||||
res = es.indices.delete(index=indexname, ignore=[400, 404])
|
||||
print("Dropped index")
|
||||
print(res)
|
||||
|
||||
res = es.indices.create(index=indexname)
|
||||
print("Created the index...")
|
||||
print(res)
|
||||
|
||||
doc = {
|
||||
"author": "kimchy",
|
||||
"type": "tweet",
|
||||
"text": "Elasticsearch: cool. bonsai cool.",
|
||||
"timestamp": datetime.now(),
|
||||
}
|
||||
|
||||
# Note - you can't change the key type after you start indexing documents
|
||||
res = es.index(index=indexname, id="abc", body=doc)
|
||||
print("Added document...")
|
||||
print(res["result"])
|
||||
|
||||
res = es.get(index=indexname, id="abc")
|
||||
print("Retrieved document...")
|
||||
print(res)
|
||||
|
||||
# Tell it to recompute the index - normally it would take up to 30 seconds
|
||||
# Refresh can be costly - we do it here for demo purposes
|
||||
# https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-refresh.html
|
||||
res = es.indices.refresh(index=indexname)
|
||||
print("Index refreshed")
|
||||
print(res)
|
||||
|
||||
# Read the documents with a search term
|
||||
# https://www.elastic.co/guide/en/elasticsearch/reference/current/query-filter-context.html
|
||||
x = {
|
||||
"query": {
|
||||
"bool": {
|
||||
"must": {"match": {"text": "bonsai"}},
|
||||
"filter": {"match": {"type": "tweet"}},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
res = es.search(index=indexname, body=x)
|
||||
print("Search results...")
|
||||
print(res)
|
||||
print()
|
||||
print("Got %d Hits:" % len(res["hits"]["hits"]))
|
||||
for hit in res["hits"]["hits"]:
|
||||
s = hit["_source"]
|
||||
print(f"{s['timestamp']} {s['author']}: {s['text']}")
|
|
@ -0,0 +1,58 @@
|
|||
# Keep this file separate
|
||||
|
||||
# https://www.pg4e.com/code/hidden-dist.py
|
||||
|
||||
# psql -h pg.pg4e.com -p 5432 -U pg4e_be9e729093 pg4e_be9e729093
|
||||
|
||||
# %load_ext sql
|
||||
# %config SqlMagic.autocommit=False
|
||||
# %sql postgresql://pg4e_be9e729093:pg4e_p_d5fab7440699124@pg.pg4e.com:5432/pg4e_be9e729093
|
||||
# %sql SELECT 1 as "Test"
|
||||
|
||||
def secrets():
|
||||
return {"host": "pg.pg4e.com",
|
||||
"port": 5432,
|
||||
"database": "pg4e_be9e729093",
|
||||
"user": "pg4e_be9e729093",
|
||||
"pass": "pg4e_p_d5fab7440699124"}
|
||||
|
||||
def elastic() :
|
||||
return {"host": "www.pg4e.com",
|
||||
"prefix" : "elasticsearch",
|
||||
"port": 443,
|
||||
"scheme": "https",
|
||||
"user": "pg4e_86f9be92a2",
|
||||
"pass": "2008_9d454b1f"}
|
||||
|
||||
def readonly():
|
||||
return {"host": "pg.pg4e.com",
|
||||
"port": 5432,
|
||||
"database": "readonly",
|
||||
"user": "readonly",
|
||||
"pass": "readonly_password"}
|
||||
|
||||
# Return a psycopg2 connection string
|
||||
|
||||
# import hidden
|
||||
# secrets = hidden.readonly()
|
||||
# sql_string = hidden.psycopg2(hidden.readonly())
|
||||
|
||||
# 'dbname=pg4e_data user=pg4e_data_read password=pg4e_p_d5fab7440699124 host=pg.pg4e.com port=5432'
|
||||
|
||||
def psycopg2(secrets) :
|
||||
return ('dbname='+secrets['database']+' user='+secrets['user']+
|
||||
' password='+secrets['pass']+' host='+secrets['host']+
|
||||
' port='+str(secrets['port']))
|
||||
|
||||
# Return an SQLAlchemy string
|
||||
|
||||
# import hidden
|
||||
# secrets = hidden.readonly()
|
||||
# sql_string = hidden.alchemy(hidden.readonly())
|
||||
|
||||
# postgresql://pg4e_data_read:pg4e_p_d5fab7440699124@pg.pg4e.com:5432/pg4e_data
|
||||
|
||||
def alchemy(secrets) :
|
||||
return ('postgresql://'+secrets['user']+':'+secrets['pass']+'@'+secrets['host']+
|
||||
':'+str(secrets['port'])+'/'+secrets['database'])
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
# Application: ELK Stack
|
||||
|
||||
- Elasticsearch - distributed NoSQL database
|
||||
- Logstash - ingests streams of activity data
|
||||
- Kibana - Visualisation / Dashboard
|
||||
|
||||
# Fundamentals concepts
|
||||
[Source: architecture](https://codersite.dev/hot-warm-architecture-elasticsearch/)
|
||||
|
||||
The act of storing data in Elasticsearch is called **indexing**.
|
||||
|
||||
An index is a collection of documents and each document is a collection of fields, which are the **key-value pairs** that contain your data. Every index has some properties like mappings, settings, and aliases.
|
||||
|
||||
In Elasticsearch, a document belongs to a type, and those types live inside an index. We can draw a parallel to a traditional relational database:
|
||||
|
||||
Relational DB ⇒ Databases ⇒ Tables ⇒ Rows ⇒ Columns
|
||||
Elasticsearch ⇒ Indices ⇒ Types ⇒ Documents ⇒ Fields
|
||||
|
||||
In Elasticsearch, the term **document** has a specific meaning. It refers to the **top-level**, or root object that is serialized into JSON and stored in Elasticsearch under a unique ID.
|
||||
|
||||
Elasticsearch lets you insert documents without a predefined schema (in RDBMS you need to define tables in advance).
|
||||
|
||||
## Inverted index
|
||||
|
||||
Relational databases add an index, such as a B-tree index, to specific columns in order to improve the speed of data retrieval. Elasticsearch use a structure called an **inverted index** for exactly the same purpose.
|
||||
|
||||
By default, **every field in a document is indexed** (has an inverted index) and thus is searchable – **FullText search**. A field without an inverted index is not searchable.
|
||||
|
||||
An inverted index consists of a list of all the unique words that appear in any document, and for each word, a list of the documents in which it appears.
|
||||
|
||||
# Summary
|
||||
|
||||
- Elasticsearch gives us Google-like features
|
||||
- Scalable ingest / data size / search performance
|
||||
- Accessible through a "REST API"
|
||||
- Can be used as a full-text "search engine"
|
||||
- Can be used as a scalable NoSQL database
|
|
@ -24,6 +24,14 @@ GET /items/_search
|
|||
}
|
||||
}
|
||||
|
||||
GET /_search
|
||||
{
|
||||
"query": {
|
||||
"match_all": {}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
GET /shakespeare/_search
|
||||
|
||||
GET /shakespeare/_search
|
||||
|
|
Loading…
Reference in New Issue