Open search

Elastic search / Open search

Opensearch is a fork of Elastic search after 7.10.

Setup a testing env

Opensearch

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
version: '3'
services:
  opensearch-node1: # This is also the hostname of the container within the Docker network (i.e. https://opensearch-node1/)
    image: opensearchproject/opensearch:latest # Specifying the latest available image - modify if you want a specific version
    container_name: opensearch-node1
    environment:
      - discovery.type=single-node
    ulimits:
      memlock:
        soft: -1 # Set memlock to unlimited (no soft or hard limit)
        hard: -1
      nofile:
        soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536
        hard: 65536
    volumes:
      - opensearch-data1:/usr/share/opensearch/data # Creates volume called opensearch-data1 and mounts it to the container
    ports:
      - 9200:9200 # REST API
      - 9600:9600 # Performance Analyzer
    networks:
      - opensearch-net # All of the containers will join the same Docker bridge network
  opensearch-dashboards:
    image: opensearchproject/opensearch-dashboards:latest # Make sure the version of opensearch-dashboards matches the version of opensearch installed on other nodes
    container_name: opensearch-dashboards
    ports:
      - 5601:5601 # Map host port 5601 to container port 5601
    expose:
      - "5601" # Expose port 5601 for web access to OpenSearch Dashboards
    environment:
      OPENSEARCH_HOSTS: '["https://opensearch-node1:9200"]' # Define the OpenSearch nodes that OpenSearch Dashboards will query
    networks:
      - opensearch-net

volumes:
  opensearch-data1:

networks:
  opensearch-net:

Elasticsearch

Reference: https://gist.github.com/tomcant/ebce0df19cdde66cb7c7b5939fdba2ad

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
version: "3.7"

services:
  elasticsearch:
    image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2
    ports:
      - 9200
      - 9300
    environment:
      - discovery.type=single-node
    volumes:
      - elasticsearch_data:/usr/share/elasticsearch/data

  kibana:
    image: docker.elastic.co/kibana/kibana:7.10.2
    ports:
      - "5601:5601"
    environment:
      - ELASTICSEARCH_URL=http://elasticsearch:9200
    depends_on:
      - elasticsearch

volumes:
  elasticsearch_data:
    driver: local

Sample dump of data for searching

For example, can take a look at here https://github.com/elastic/examples/tree/master/Exploring%20Public%20Datasets/nyc_traffic_accidents

data in CSV pipeline on elasticsearch filebeat for loading data.

Index

Create

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
# create an index
PUT http://127.0.0.1:9200/shopping

# create an index with mapping
PUT http://127.0.0.1:9200/user/_mapping
{
    "properties":{
        "name": {
            "type": "text",
            "index": true       //depends on the 分詞
        },
        "sex": {
            "type": "keyword",
            "index": true       //can only match as keyword
        },
        "tel": {
            "type": "keyword",
            "index": false      //query will result in exception
        },
    }
}

Delete

1
DELETE /shopping

Display

1
2
# show tables
GET http://127.0.0.1:9200/_cat/indicies?v

Backup and restore

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# list all the backup snapshot
GET _snapshot
GET _snapshot/my_repository/*?verbose=false

# backup hourly 
PUT _slm/policy/hourly-snapshots
{
  "name": "<hourly-snapshot-{now/d}>",
  "schedule": "0 0 * * * ?",
  "repository": "my_repository",
  "config": {
    "indices": "*",
    "include_global_state": true
  },
  "retention": {
    "expire_after": "1d",
    "min_count": 1,
    "max_count": 24
  }
}

# restore indicies
POST _snapshot/my_repository/my_snapshot_2099.05.06/_restore
{
  "indices": "my-index,logs-my_app-default"
}

GET _cluster/health

Data

Create

1
2
3
4
5
6
7
POST http://127.0.0.1/shopping/_doc
{
    "title": "android phone",
    "category": "android",
    "images": "http://www.image.com/x.jpg",
    "price": 3999,
}

Retrieve

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# search by id
GET http://127.0.0.1:9200/shopping/_doc/1001

# search all
GET http://127.0.0.1:9200/shopping/_search

GET http://127.0.0.1:9200/shopping/_search
{
    "query":{
        "match_all":{ }
    }
}

# search field 
GET http://127.0.0.1:9200/shopping/_search?q=category:android

# where
GET http://127.0.0.1:9200/shopping/_search
{
    "query":{
        "match":{
            "category": "android"
        }
    }
}

select field

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
POST my-index-000001/_search
{
  "query": {
    "match": {
      "user.id": "kimchy"
    }
  },
  "fields": [
    "user.id",
    "http.response.*",         
    {
      "field": "@timestamp",
      "format": "epoch_millis" 
    }
  ],
  "_source": false
}

and+or

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# and
POST http://127.0.0.1:9200/shopping/_search
{
    "query": {
        "bool": {
            "must": [
                "match": {
                    "category": "android"
                },
                "match": {
                    "price": 1999
                }
            ]
        }
    }
}

# or
POST http://127.0.0.1:9200/shopping/_search
{
    "query": {
        "bool": {
            "should": [
                "match": {
                    "category": "android"
                },
                "match": {
                    "category": "iphone"
                },
            ]
        }
    }
}

# range query
GET http://127.0.0.1:9200/shopping/_search
{
    "query": {
        "bool": {
            "should": [
                "match": {
                    "category": "android"
                },
                "match": {
                    "category": "iphone"
                },
            ],
            "filter": {
                "range": {
                    "price": {
                        "gt": 5000
                    }
                }
            }
        }
    }
}

# full text
GET http://127.0.0.1:9200/shopping/_search
{
    "query": {
        "match_phrase": {
            "category": "android phone"
        },
    },
    "highlight": {
        "fields":{
            "category": {}
        }
    }
}

order by + limit

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
GET http://127.0.0.1:9200/shopping/_search
{
    "query":{
        "match_all":{ }
    },
    "from": 0,
    "size": 2,
    "sort":{
        "price": { "order": "desc" }
    }
}

group by + count

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# gropu by
GET http://127.0.0.1:9200/shopping/_search
{
    "aggs":{
        "price_group":{
            "terms":{
                "field": "price"
            }
        }
    },
    "size": 0  //this will stop returning raw data
}

# average
GET http://127.0.0.1:9200/shopping/_search
{
    "aggs":{
        "price_avg":{
            "avg":{
                "field": "price"
            }
        }
    },
    "size": 0
}

Update

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
# update it from 3999 to 4999
PUT http://127.0.0.1/shopping/_doc/1001
{
    "title": "android phone",
    "category": "android",
    "images": "http://www.image.com/x.jpg",
    "price": 4999,
}

# using POST
POST http://127.0.0.1/shopping/_update/1001
{
    "doc": {
        "title": "Google Nexus 7"
    }
}

Delete

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
DELETE /my-index-000001/_doc/1

POST /my-index-000001/_delete_by_query
{
  "query": {
    "match": {
      "user.id": "elkbee"
    }
  }
}

Search relavant

Default boost is 2.2, BM25: in absence of an advanced optimization, as {\displaystyle k_{1}\in [1.2,2.0]}k_1 \in [1.2,2.0] and {\displaystyle b=0.75}b = 0.75

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
GET /testscore/_search?explain=true
{
    "query": {
        "bool": {
            "should": [
                "match": {
                    "title": { "query": "Hadoop", "boost": 1 }
                },
                "match": {
                    "title": { "query": "Hive", "boost": 1 }
                },
                "match": {
                    "title": { "query": "Spark", "boost": 2 }
                }
            ]
        }
    }
}

Java client

Library

1
2
3
4
5
<dependency>
  <groupId>org.opensearch.client</groupId>
  <artifactId>opensearch-rest-high-level-client</artifactId>
  <version>2.4.0</version>
</dependency>
  • Elasticsearch high level rest client
  • Opensearch
  • Opensearch high level rest client
1
2
3
4
5
<dependency>
  <groupId>org.opensearch.client</groupId>
  <artifactId>opensearch-rest-high-level-client</artifactId>
  <version>2.4.0</version>
</dependency>

In short high level API is handwritten and java client is code generated.

Send query to elasticsearch using Jest

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
public JestClient jestClient() {
    JestClientFactory factory = new JestClientFactory();
    factory.setHttpClientConfig(
      new HttpClientConfig.Builder("http://localhost:9200")
        .multiThreaded(true)
        .defaultMaxTotalConnectionPerRoute(2)
        .maxTotalConnection(10)
        .build());
    return factory.getObject();
}

List<SearchResult.Hit<Employee, Void>> searchResults = 
  jestClient.execute(new Search.Builder(search).build())
    .getHits(Employee.class);
searchResults.forEach(hit -> {
    System.out.println(String.format("Document %s has score %s", hit.id, hit.score));
});

Using elasticsearch library to generate Query

Elastic library can be used to build query and translate it to Json Request string.