eolas/neuron/d0ed26d0-cdc8-4643-8c09-445408195f9b/.neuron/output/OpenSearch.html
2024-10-20 19:00:04 +01:00

130 lines
No EOL
18 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html><html><head><meta content="text/html; charset=utf-8" http-equiv="Content-Type" /><meta content="width=device-width, initial-scale=1" name="viewport" /><!--replace-start-0--><!--replace-start-5--><!--replace-start-8--><title>OpenSearch - My Zettelkasten</title><!--replace-end-8--><!--replace-end-5--><!--replace-end-0--><link href="https://cdn.jsdelivr.net/npm/fomantic-ui@2.8.7/dist/semantic.min.css" rel="stylesheet" /><link href="https://fonts.googleapis.com/css?family=Merriweather|Libre+Franklin|Roboto+Mono&amp;display=swap" rel="stylesheet" /><!--replace-start-1--><!--replace-start-4--><!--replace-start-7--><link href="https://raw.githubusercontent.com/srid/neuron/master/assets/neuron.svg" rel="icon" /><meta content="OpenSearch is the AWS implementation of the opean source Elasticsearch search engine. It was formally known as AWS Elasticsearch." name="description" /><meta content="OpenSearch" property="og:title" /><meta content="My Zettelkasten" property="og:site_name" /><meta content="article" property="og:type" /><meta content="OpenSearch" property="neuron:zettel-id" /><meta content="OpenSearch" property="neuron:zettel-slug" /><meta content="AWS" property="neuron:zettel-tag" /><script type="application/ld+json">[]</script><style type="text/css">body{background-color:#eeeeee !important;font-family:"Libre Franklin", serif !important}body .ui.container{font-family:"Libre Franklin", serif !important}body h1, h2, h3, h4, h5, h6, .ui.header, .headerFont{font-family:"Merriweather", sans-serif !important}body code, pre, tt, .monoFont{font-family:"Roboto Mono","SFMono-Regular","Menlo","Monaco","Consolas","Liberation Mono","Courier New", monospace !important}body div.z-index p.info{color:#808080}body div.z-index ul{list-style-type:square;padding-left:1.5em}body div.z-index .uplinks{margin-left:0.29999em}body .zettel-content h1#title-h1{background-color:rgba(33,133,208,0.1)}body nav.bottomPane{background-color:rgba(33,133,208,2.0e-2)}body div#footnotes{border-top-color:#2185d0}body p{line-height:150%}body img{max-width:100%}body .deemphasized{font-size:0.94999em}body .deemphasized:hover{opacity:1}body .deemphasized:not(:hover){opacity:0.69999}body .deemphasized:not(:hover) a{color:#808080 !important}body div.container.universe{padding-top:1em}body div.zettel-view ul{padding-left:1.5em;list-style-type:square}body div.zettel-view .pandoc .highlight{background-color:#ffff00}body div.zettel-view .pandoc .ui.disabled.fitted.checkbox{margin-right:0.29999em;vertical-align:middle}body div.zettel-view .zettel-content .metadata{margin-top:1em}body div.zettel-view .zettel-content .metadata div.date{text-align:center;color:#808080}body div.zettel-view .zettel-content h1{padding-top:0.2em;padding-bottom:0.2em;text-align:center}body div.zettel-view .zettel-content h2{border-bottom:solid 1px #4682b4;margin-bottom:0.5em}body div.zettel-view .zettel-content h3{margin:0px 0px 0.4em 0px}body div.zettel-view .zettel-content h4{opacity:0.8}body div.zettel-view .zettel-content div#footnotes{margin-top:4em;border-top-style:groove;border-top-width:2px;font-size:0.9em}body div.zettel-view .zettel-content div#footnotes ol > li > p:only-of-type{display:inline;margin-right:0.5em}body div.zettel-view .zettel-content aside.footnote-inline{width:30%;padding-left:15px;margin-left:15px;float:right;background-color:#d3d3d3}body div.zettel-view .zettel-content .overflows{overflow:auto}body div.zettel-view .zettel-content code{margin:auto auto auto auto;font-size:100%}body div.zettel-view .zettel-content p code, li code, ol code{padding:0.2em 0.2em 0.2em 0.2em;background-color:#f5f2f0}body div.zettel-view .zettel-content pre{overflow:auto}body div.zettel-view .zettel-content dl dt{font-weight:bold}body div.zettel-view .zettel-content blockquote{background-color:#f9f9f9;border-left:solid 10px #cccccc;margin:1.5em 0px 1.5em 0px;padding:0.5em 10px 0.5em 10px}body div.zettel-view .zettel-content.raw{background-color:#dddddd}body .ui.label.zettel-tag{color:#000000}body .ui.label.zettel-tag a{color:#000000}body nav.bottomPane ul.backlinks > li{padding-bottom:0.4em;list-style-type:disc}body nav.bottomPane ul.context-list > li{list-style-type:lower-roman}body .footer-version img{-webkit-filter:grayscale(100%);-moz-filter:grayscale(100%);-ms-filter:grayscale(100%);-o-filter:grayscale(100%);filter:grayscale(100%)}body .footer-version img:hover{-webkit-filter:grayscale(0%);-moz-filter:grayscale(0%);-ms-filter:grayscale(0%);-o-filter:grayscale(0%);filter:grayscale(0%)}body .footer-version, .footer-version a, .footer-version a:visited{color:#808080}body .footer-version a{font-weight:bold}body .footer-version{margin-top:1em !important;font-size:0.69999em}@media only screen and (max-width: 768px){body div#zettel-container{margin-left:0.4em !important;margin-right:0.4em !important}}body span.zettel-link-container span.zettel-link a{color:#2185d0;font-weight:bold;text-decoration:none}body span.zettel-link-container span.zettel-link a:hover{background-color:rgba(33,133,208,0.1)}body span.zettel-link-container span.extra{color:auto}body span.zettel-link-container.errors{border:solid 1px #ff0000}body span.zettel-link-container.errors span.zettel-link a:hover{text-decoration:none !important;cursor:not-allowed}body [data-tooltip]:after{font-size:0.69999em}body div.tag-tree div.node{font-weight:bold}body div.tag-tree div.node a.inactive{color:#555555}body .tree.flipped{-webkit-transform:rotate(180deg);-moz-transform:rotate(180deg);-ms-transform:rotate(180deg);-o-transform:rotate(180deg);transform:rotate(180deg)}body .tree{overflow:auto}body .tree ul.root{padding-top:0px;margin-top:0px}body .tree ul{position:relative;padding:1em 0px 0px 0px;white-space:nowrap;margin:0px auto 0px auto;text-align:center}body .tree ul::after{content:"";display:table;clear:both}body .tree ul:last-child{padding-bottom:0.1em}body .tree li{display:inline-block;vertical-align:top;text-align:center;list-style-type:none;position:relative;padding:1em 0.5em 0em 0.5em}body .tree li::before{content:"";position:absolute;top:0px;right:50%;border-top:solid 2px #cccccc;width:50%;height:1.19999em}body .tree li::after{content:"";position:absolute;top:0px;right:50%;border-top:solid 2px #cccccc;width:50%;height:1.19999em}body .tree li::after{right:auto;left:50%;border-left:solid 2px #cccccc}body .tree li:only-child{padding-top:0em}body .tree li:only-child::after{display:none}body .tree li:only-child::before{display:none}body .tree li:first-child::before{border-style:none;border-width:0px}body .tree li:first-child::after{border-radius:5px 0px 0px 0px}body .tree li:last-child::after{border-style:none;border-width:0px}body .tree li:last-child::before{border-right:solid 2px #cccccc;border-radius:0px 5px 0px 0px}body .tree ul ul::before{content:"";position:absolute;top:0px;left:50%;border-left:solid 2px #cccccc;width:0px;height:1.19999em}body .tree li div.forest-link{border:solid 2px #cccccc;padding:0.2em 0.29999em 0.2em 0.29999em;text-decoration:none;display:inline-block;border-radius:5px 5px 5px 5px;color:#333333;position:relative;top:2px}body .tree.flipped li div.forest-link{-webkit-transform:rotate(180deg);-moz-transform:rotate(180deg);-ms-transform:rotate(180deg);-o-transform:rotate(180deg);transform:rotate(180deg)}</style><script
async=""
id="MathJax-script"
src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"
></script>
<link
href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/themes/prism.min.css"
rel="stylesheet"
/><link rel="preconnect" href="https://fonts.googleapis.com" /><link
rel="preconnect"
href="https://fonts.gstatic.com"
crossorigin
/><link
href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:ital,wght@0,100;0,200;0,300;0,400;0,500;0,600;0,700;1,100;1,200;1,300;1,400;1,500;1,600;1,700&family=IBM+Plex+Sans+Condensed:ital,wght@0,100;0,200;0,300;0,400;0,500;0,600;0,700;1,100;1,200;1,300;1,400;1,500;1,600;1,700&family=IBM+Plex+Sans:ital,wght@0,100;0,200;0,300;0,400;0,500;0,600;0,700;1,100;1,200;1,300;1,400;1,500;1,600;1,700&family=IBM+Plex+Serif:ital,wght@0,100;0,200;0,300;0,400;0,500;0,600;0,700;1,100;1,200;1,300;1,400;1,500;1,600;1,700&display=swap"
rel="stylesheet"
/>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/components/prism-core.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
<style>
body .ui.container,
body ul {
font-family: "IBM Plex Sans" !important;
}
body blockquote {
border-left-width: 3px !important;
font-style: italic;
}
.headerFont,
.ui.header,
body h1,
h2,
h3,
h4,
h5,
h6 {
font-family: "IBM Plex Sans Condensed" !important;
}
body p {
line-height: 1.4;
}
.monoFont,
body code,
pre,
tt {
font-family: "IBM Plex Mono" !important;
font-size: 12px !important;
line-height: 1.4 !important;
}
</style>
<!--replace-end-7--><!--replace-end-4--><!--replace-end-1--></head><body><div class="ui fluid container universe"><!--replace-start-2--><!--replace-start-3--><!--replace-start-6--><div class="ui text container" id="zettel-container" style="position: relative"><div class="zettel-view"><article class="ui raised attached segment zettel-content"><div class="pandoc"><h1 id="title-h1">OpenSearch</h1><h2 id="background">Background</h2><p>OpenSearch is the AWS implementation of the opean source Elasticsearch search engine. It was formally known as “AWS Elasticsearch”.</p><p>It has many features but the core usage is to create searchable indicies of a given content domain such as, for example, a website or content management system. This enables the quick search and retrieval of documents without using expensive database queries.</p><h2 id="key-concepts">Key concepts</h2><p>We will introduce the main concepts with the example of an internal intranet for which we want to create a searchable index. The intranet comprises hundreds of pages. Each page has the following metadata, conforming to the following example.</p><pre><code class="json language-json">{
&quot;title&quot;: &quot;Internal News&quot;,
&quot;author&quot;: &quot;Jane Doe&quot;,
&quot;published_date&quot;: &quot;2023-11-01T00:00:00Z&quot;,
&quot;tags&quot;: [&quot;news&quot;, &quot;internal&quot;],
&quot;categories&quot;: [&quot;communication&quot;],
&quot;content&quot;: &quot;Today&#39;s internal news and updates are...&quot;
}</code></pre><h3 id="create-domain">Create domain</h3><p>The OpenSearch domain is a managed environment which hosts OpenSearch <strong>clusters</strong>. It can contain one or more clusters.</p><p>The domain provides network <strong>endpoints</strong> you use to communicate and send requests. Typical requests:</p><ul><li>ingest data</li><li>index data</li><li>run search query and return matches</li></ul><h4 id="clusters-and-nodes">Clusters and nodes</h4><p>A cluster is the highest level of organisation within an OpensSearch domain that contains your indexed data. It processes all the search queries and handles tasks like indexing, searching, and managing documents.</p><p><img src="/static/opensearch-architecture.drawio.svg" /></p><p>A cluster comprises <strong>nodes</strong>. Nodes are individual servers that hold part of the clusters data. Each node participates in the indexing and searching of the clusters data.</p><ul><li>This distributed architecture helps in balancing the load and ensuring high availability.</li><li>Data can be replicated accross nodes, making the system resiliant against data loss</li><li>You can add more nodes to the cluster to handle increased data and traffic, making the system adaptable to changing needs.</li></ul><h3 id="define-index-and-mappings">Define index and mappings</h3><p>Assuming the domain has been created. The next step is to create an index for the data, say <code>intranet_pages</code>. An index is basically a collection where the data is stored, similar to a database. Each entry is a <strong>document</strong> in this collection.</p><p>Our index will store each webpage as a document.</p><p>We specify the data that we want to store using an index mapping. For instance we may not want to store all the metadata for each page, preferring only to index a subset of the properties. In this example we will store all the data. We would achieve this with the following:</p><pre><code class="json language-json">{
&quot;mappings&quot;: {
&quot;properties&quot;: {
&quot;title&quot;: { &quot;type&quot;: &quot;text&quot; },
&quot;author&quot;: { &quot;type&quot;: &quot;keyword&quot; },
&quot;published_date&quot;: { &quot;type&quot;: &quot;date&quot; },
&quot;tags&quot;: { &quot;type&quot;: &quot;keyword&quot; },
&quot;content&quot;: { &quot;type&quot;: &quot;text&quot; }
}
}
}</code></pre><p>The mapping will be utilised in the following scenarios:</p><ul><li>storing data:<ul><li>when a new document is added to the <code>intranet_pages</code> index, it will adhere to the defined mappings</li><li>it will have to have the properties specified in the mapping in order to be added</li></ul></li><li>searching data:<ul><li>when executing searches, OpenSearch utilizes the mappings to understand the type of data in each field and optimizes search queries accordingly</li></ul></li><li>assessing relevance:<ul><li>proper mappings allow OpenSearch to accurately score and rank search results based on relevance.</li></ul></li></ul><h3 id="ingest-data-bulk-import-andor-scraper">Ingest data (bulk import and/or scraper)</h3><p>In order to create the index that we previously defined with a mapping it is necessary to implement some sort of mechanism for collating the metadata that matches the mapping. This would be a crawler or scraper that might be implemented with a lambda. This is a key part of the ingestion process.</p><p>Alternatively the data could be bulk imported in a format that maps to the index.</p><h3 id="querying-and-searching">Querying and searching</h3><p>Having established the crawler (and some kind of search interface), we can now run queries against the OpenSearch domain.</p><p>A basic example of the structure of a query would be as follows:</p><pre><code class="sh language-sh">GET /intranet_pages/_search</code></pre><pre><code class="json language-json">{
&quot;query&quot;: {
&quot;match&quot;: {
&quot;content&quot;: &quot;project&quot;
}
}
}</code></pre><p>Here we search against the <code>content</code> mapping to find pages that contain the word “project”.</p><p>Example of the data returned:</p><pre><code class="json language-json">{
&quot;hits&quot;: {
&quot;total&quot;: { &quot;value&quot;: 100 },
&quot;hits&quot;: [
{
&quot;_index&quot;: &quot;intranet_pages&quot;,
&quot;_id&quot;: &quot;1&quot;,
&quot;_score&quot;: 1.2,
&quot;_source&quot;: {
&quot;title&quot;: &quot;Project ABC Launch&quot;,
&quot;author&quot;: &quot;John Doe&quot;,
&quot;published_date&quot;: &quot;2023-01-01T00:00:00Z&quot;,
&quot;tags&quot;: [&quot;project&quot;, &quot;launch&quot;],
&quot;content&quot;: &quot;Details about the launch of Project ABC...&quot;
}
}
// Additional results here
]
}
}</code></pre><h2 id="search-patterns">Search patterns</h2><p>Below are further examples of commonly used search patterns.</p><h3 id="multiple-conditions">Multiple conditions</h3><p>Find documents that are authored by Jane Doe that contain the word “meeting”. <code>must</code> stands for boolean AND:</p><pre><code class="json language-json">{
&quot;query&quot;: {
&quot;bool&quot;: {
&quot;must&quot;: [
{ &quot;match&quot;: { &quot;content&quot;: &quot;meeting&quot; } },
{ &quot;match&quot;: { &quot;author&quot;: &quot;Jane Doe&quot; } }
]
}
}
}</code></pre><p>Find documents that contain either the word “meeting” or the word “project” in their content. <code>should</code> stands for boolean OR:</p><pre><code class="json language-json">{
&quot;query&quot;: {
&quot;bool&quot;: {
&quot;should&quot;: [
{ &quot;match&quot;: { &quot;content&quot;: &quot;meeting&quot; } },
{ &quot;match&quot;: { &quot;content&quot;: &quot;project&quot; } }
],
&quot;minimum_should_match&quot;: 1
}
}
}</code></pre><p><code>minimum_should_match</code> specifies the number of conditions that should match.</p><h3 id="query-by-date-ranges">Query by date ranges</h3><p>Find pages published after a certain date:</p><pre><code class="json language-json">{
&quot;query&quot;: {
&quot;range&quot;: {
&quot;published_date&quot;: {
&quot;gte&quot;: &quot;2023-01-01T00:00:00Z&quot;
}
}
}
}</code></pre><pre><code class="json language-json">{
&quot;query&quot;: {
&quot;bool&quot;: {
&quot;should&quot;: [
{ &quot;match&quot;: { &quot;fileId&quot;: &quot;val&quot; } },
{ &quot;match&quot;: { &quot;programmeId&quot;: &quot;val&quot; } },
{ &quot;match&quot;: { &quot;guid&quot;: &quot;val&quot; } }
],
&quot;minimum_should_match&quot;: 1
}
}
}</code></pre></div></article><nav class="ui attached segment deemphasized bottomPane" id="neuron-tags-pane"><div><span class="ui basic label zettel-tag" title="Tag">AWS</span></div></nav><nav class="ui bottom attached icon compact inverted menu blue" id="neuron-nav-bar"><!--replace-start-9--><!--replace-end-9--><a class="right item" href="impulse.html" title="Open Impulse"><i class="wave square icon"></i></a></nav></div></div><!--replace-end-6--><!--replace-end-3--><!--replace-end-2--><div class="ui center aligned container footer-version"><div class="ui tiny image"><a href="https://neuron.zettel.page"><img alt="logo" src="https://raw.githubusercontent.com/srid/neuron/master/assets/neuron.svg" title="Generated by Neuron 1.9.35.3" /></a></div></div></div></body></html>