elk

2024-01-17 21:20:22 +01:00 · 2024-01-17 21:20:22 +01:00 · 51d68caa88
parent f187d49780
commit 51d68caa88
7 changed files with 333 additions and 0 deletions
--- a/Courses/PostgreSQL-for-Everybody-Specialization/Database
+++ b/Courses/PostgreSQL-for-Everybody-Specialization/Database
@ -0,0 +1,148 @@
+# Some Python utility code for elasticsearch.
+# uses the requests library (low level) rather than the Python elasticsearch wrapper
+
+# https://www.pg4e.com/code/elastictool.py
+
+# (If needed)
+# https://www.pg4e.com/code/hidden-dist.py
+# copy hidden-dist.py to hidden.py
+# edit hidden.py and put in your credentials
+
+import requests
+import json
+import hidden
+
+import warnings
+
+warnings.filterwarnings("ignore", message="Unverified HTTPS request")
+
+secrets = hidden.elastic()
+
+
+# from lesson in course
+queryurl = "http://pg4e_86f9:*@es.py4e.com:9210/prefx/testindex/_search?pretty"
+
+body = json.dumps({"query": {"match all": {}}})   # match everything
+
+hdict = {"Content-type": "application/json; charset=UTF-8"}
+
+response = requests.post(queryurl, headers=hdict, data=body)
+text = response.text
+
+status = response.status_code
+js = json.loads(text)
+
+# Status codes:
+# 200: OK
+# 404: not found
+# 500: error at server side
+
+
+# end from lesson in course
+
+url = "http://"
+if secrets["scheme"] == "https":
+    url = "https://"
+url = (
+    url
+    + secrets["user"]
+    + ":"
+    + secrets["pass"]
+    + "@"
+    + secrets["host"]
+    + ":"
+    + str(secrets["port"])
+)
+if secrets.get("prefix"):
+    url = url + "/" + secrets["prefix"]
+url = url + "/" + secrets["user"]
+
+while True:
+    print()
+    try:
+        cmd = input("Enter command: ").strip()
+    except:
+        print()
+        break
+
+    if cmd.startswith("quit"):
+        break
+
+    pieces = cmd.split()
+
+    # https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-delete-index.html
+    if len(pieces) == 1 and pieces[0] == "delete":
+        prurl = url.replace(secrets["pass"], "*****")
+        print(prurl)
+        response = requests.delete(url)
+        text = response.text
+        status = response.status_code
+        print("Status:", status)
+        print(text)
+        continue
+
+    # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-all-query.html
+    if len(pieces) == 1 and pieces[0] == "match_all":
+        queryurl = url + "/_search"
+        prurl = queryurl.replace(secrets["pass"], "*****")
+        print(prurl)
+
+        body = json.dumps({"query": {"match_all": {}}})
+
+        hdict = {"Content-type": "application/json; charset=UTF-8"}
+        response = requests.post(
+            queryurl, verify=False, headers=hdict, data=body
+        )
+        text = response.text
+        status = response.status_code
+        print(status)
+        print(text)
+        continue
+
+    # https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-get.html
+    if len(pieces) == 2 and pieces[0] == "get":
+        queryurl = url + "/_doc/" + pieces[1] + "?pretty"
+        prurl = queryurl.replace(secrets["pass"], "*****")
+        print(prurl)
+
+        response = requests.get(queryurl, verify=False)
+        text = response.text
+        status = response.status_code
+        print(status)
+        print(text)
+        continue
+
+    # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
+    if len(pieces) == 2 and pieces[0] == "search":
+        queryurl = url + "/_search?pretty"
+        prurl = queryurl.replace(secrets["pass"], "*****")
+        print(prurl)
+
+        body = json.dumps({"query": {"query_string": {"query": pieces[1]}}})
+
+        # {"query": {"query_string": { "query": search, "default_field": "content" }}}
+        print(body)
+
+        hdict = {"Content-type": "application/json; charset=UTF-8"}
+        response = requests.post(
+            queryurl, verify=False, headers=hdict, data=body
+        )
+        text = response.text
+        status = response.status_code
+        if status == 200:
+            print(status)
+            print(json.dumps(json.loads(text), indent=2))
+        else:
+            print(text)
+            print()
+            print("Error, status=", status)
+        continue
+
+    print()
+    print("Invalid command, please try:")
+    print("")
+    print("  quit")
+    print("  get id")
+    print("  search string")
+    print("  match_all")
+    print("  delete")
--- a/Courses/PostgreSQL-for-Everybody-Specialization/Database
+++ b/Courses/PostgreSQL-for-Everybody-Specialization/Database
@ -0,0 +1,82 @@
+# https://www.pg4e.com/code/elastictweet.py
+
+# Example from:
+# https://elasticsearch-py.readthedocs.io/en/master/
+
+# pip install 'elasticsearch<7.14.0'
+
+# (If needed)
+# https://www.pg4e.com/code/hidden-dist.py
+# copy hidden-dist.py to hidden.py
+# edit hidden.py and put in your credentials
+
+from datetime import datetime
+from elasticsearch import Elasticsearch
+from elasticsearch import RequestsHttpConnection
+
+import hidden
+
+secrets = hidden.elastic()
+
+es = Elasticsearch(
+    [secrets["host"]],
+    http_auth=(secrets["user"], secrets["pass"]),
+    url_prefix=secrets["prefix"],
+    scheme=secrets["scheme"],
+    port=secrets["port"],
+    connection_class=RequestsHttpConnection,
+)
+indexname = secrets["user"]
+
+# Start fresh
+# https://elasticsearch-py.readthedocs.io/en/master/api.html#indices
+res = es.indices.delete(index=indexname, ignore=[400, 404])
+print("Dropped index")
+print(res)
+
+res = es.indices.create(index=indexname)
+print("Created the index...")
+print(res)
+
+doc = {
+    "author": "kimchy",
+    "type": "tweet",
+    "text": "Elasticsearch: cool. bonsai cool.",
+    "timestamp": datetime.now(),
+}
+
+# Note - you can't change the key type after you start indexing documents
+res = es.index(index=indexname, id="abc", body=doc)
+print("Added document...")
+print(res["result"])
+
+res = es.get(index=indexname, id="abc")
+print("Retrieved document...")
+print(res)
+
+# Tell it to recompute the index - normally it would take up to 30 seconds
+# Refresh can be costly - we do it here for demo purposes
+# https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-refresh.html
+res = es.indices.refresh(index=indexname)
+print("Index refreshed")
+print(res)
+
+# Read the documents with a search term
+# https://www.elastic.co/guide/en/elasticsearch/reference/current/query-filter-context.html
+x = {
+    "query": {
+        "bool": {
+            "must": {"match": {"text": "bonsai"}},
+            "filter": {"match": {"type": "tweet"}},
+        }
+    }
+}
+
+res = es.search(index=indexname, body=x)
+print("Search results...")
+print(res)
+print()
+print("Got %d Hits:" % len(res["hits"]["hits"]))
+for hit in res["hits"]["hits"]:
+    s = hit["_source"]
+    print(f"{s['timestamp']} {s['author']}: {s['text']}")
--- a/Courses/PostgreSQL-for-Everybody-Specialization/Database
+++ b/Courses/PostgreSQL-for-Everybody-Specialization/Database
@ -0,0 +1,58 @@
+# Keep this file separate
+
+# https://www.pg4e.com/code/hidden-dist.py
+
+# psql -h pg.pg4e.com -p 5432 -U pg4e_be9e729093 pg4e_be9e729093
+
+# %load_ext sql
+# %config SqlMagic.autocommit=False
+# %sql postgresql://pg4e_be9e729093:pg4e_p_d5fab7440699124@pg.pg4e.com:5432/pg4e_be9e729093
+# %sql SELECT 1 as "Test"
+
+def secrets():
+    return {"host": "pg.pg4e.com",
+            "port": 5432,
+            "database": "pg4e_be9e729093",
+            "user": "pg4e_be9e729093",
+            "pass": "pg4e_p_d5fab7440699124"}
+
+def elastic() :
+    return {"host": "www.pg4e.com",
+            "prefix" : "elasticsearch",
+            "port": 443,
+            "scheme": "https",
+            "user": "pg4e_86f9be92a2",
+            "pass": "2008_9d454b1f"}
+
+def readonly():
+    return {"host": "pg.pg4e.com",
+            "port": 5432,
+            "database": "readonly",
+            "user": "readonly",
+            "pass": "readonly_password"}
+
+# Return a psycopg2 connection string
+
+# import hidden
+# secrets = hidden.readonly()
+# sql_string = hidden.psycopg2(hidden.readonly())
+
+# 'dbname=pg4e_data user=pg4e_data_read password=pg4e_p_d5fab7440699124 host=pg.pg4e.com port=5432'
+
+def psycopg2(secrets) :
+     return ('dbname='+secrets['database']+' user='+secrets['user']+
+        ' password='+secrets['pass']+' host='+secrets['host']+
+        ' port='+str(secrets['port']))
+
+# Return an SQLAlchemy string
+
+# import hidden
+# secrets = hidden.readonly()
+# sql_string = hidden.alchemy(hidden.readonly())
+
+# postgresql://pg4e_data_read:pg4e_p_d5fab7440699124@pg.pg4e.com:5432/pg4e_data
+
+def alchemy(secrets) :
+    return ('postgresql://'+secrets['user']+':'+secrets['pass']+'@'+secrets['host']+
+        ':'+str(secrets['port'])+'/'+secrets['database'])
+
--- a/Databases/ElasticSearch/General.md
+++ b/Databases/ElasticSearch/General.md
@ -0,0 +1,37 @@
+# Application: ELK Stack
+
+- Elasticsearch - distributed NoSQL database
+- Logstash - ingests streams of activity data
+- Kibana - Visualisation / Dashboard
+
+# Fundamentals concepts
+[Source: architecture](https://codersite.dev/hot-warm-architecture-elasticsearch/)
+
+The act of storing data in Elasticsearch is called **indexing**.
+
+An index is a collection of documents and each document is a collection of fields, which are the **key-value pairs** that contain your data. Every index has some properties like mappings, settings, and aliases.
+
+In Elasticsearch, a document belongs to a type, and those types live inside an index. We can draw a parallel to a traditional relational database:
+
+Relational DB ⇒ Databases ⇒ Tables ⇒ Rows ⇒ Columns
+Elasticsearch ⇒ Indices ⇒ Types ⇒ Documents ⇒ Fields
+
+In Elasticsearch, the term **document** has a specific meaning. It refers to the **top-level**, or root object that is serialized into JSON and stored in Elasticsearch under a unique ID.
+
+Elasticsearch lets you insert documents without a predefined schema (in RDBMS you need to define tables in advance).
+
+## Inverted index
+
+Relational databases add an index, such as a B-tree index, to specific columns in order to improve the speed of data retrieval. Elasticsearch use a structure called an **inverted index** for exactly the same purpose.
+
+By default, **every field in a document is indexed** (has an inverted index) and thus is searchable – **FullText search**. A field without an inverted index is not searchable.
+
+An inverted index consists of a list of all the unique words that appear in any document, and for each word, a list of the documents in which it appears.
+
+# Summary
+
+- Elasticsearch gives us Google-like features
+    - Scalable ingest / data size / search performance
+    - Accessible through a "REST API"
+- Can be used as a full-text "search engine"
+- Can be used as a scalable NoSQL database
--- a/Databases/ElasticSearch/examples.md
+++ b/Databases/ElasticSearch/examples.md
@ -24,6 +24,14 @@ GET /items/_search
  }
 }

+GET /_search
+{
+  "query": {
+    "match_all": {}
+  }
+}
+
+
 GET /shakespeare/_search

 GET /shakespeare/_search
--- a/Databases/Hive/Select.md
+++ b/Databases/Hive/Select.md
--- a/Databases/Hive/Timestamp.md
+++ b/Databases/Hive/Timestamp.md