s3:mdssvc: add Elasticsearch backend
authorRalph Boehme <slow@samba.org>
Mon, 5 Aug 2019 14:25:01 +0000 (16:25 +0200)
committerRalph Boehme <slow@samba.org>
Wed, 9 Oct 2019 14:35:29 +0000 (14:35 +0000)
Signed-off-by: Ralph Boehme <slow@samba.org>
Reviewed-by: Noel Power <noel.power@suse.com>
21 files changed:
docs-xml/smbdotconf/misc/elasticsearchaddress.xml [new file with mode: 0644]
docs-xml/smbdotconf/misc/elasticsearchindex.xml [new file with mode: 0644]
docs-xml/smbdotconf/misc/elasticsearchmappings.xml [new file with mode: 0644]
docs-xml/smbdotconf/misc/elasticsearchmaxresults.xml [new file with mode: 0644]
docs-xml/smbdotconf/misc/elasticsearchport.xml [new file with mode: 0644]
docs-xml/smbdotconf/misc/elasticsearchusetls.xml [new file with mode: 0644]
docs-xml/smbdotconf/misc/spotlight_backend.xml
lib/param/loadparm.h
lib/param/param_table.c
source3/rpc_server/mdssvc/elasticsearch_mappings.json [new file with mode: 0644]
source3/rpc_server/mdssvc/es_lexer.l [new file with mode: 0644]
source3/rpc_server/mdssvc/es_mapping.c [new file with mode: 0644]
source3/rpc_server/mdssvc/es_mapping.h [new file with mode: 0644]
source3/rpc_server/mdssvc/es_parser.y [new file with mode: 0644]
source3/rpc_server/mdssvc/es_parser_test.c [new file with mode: 0644]
source3/rpc_server/mdssvc/mdssvc.c
source3/rpc_server/mdssvc/mdssvc_es.c [new file with mode: 0644]
source3/rpc_server/mdssvc/mdssvc_es.h [new file with mode: 0644]
source3/rpc_server/wscript_build
source3/wscript
source3/wscript_build

diff --git a/docs-xml/smbdotconf/misc/elasticsearchaddress.xml b/docs-xml/smbdotconf/misc/elasticsearchaddress.xml
new file mode 100644 (file)
index 0000000..6112546
--- /dev/null
@@ -0,0 +1,14 @@
+<samba:parameter name="elasticsearch:address"
+                 context="S"
+                 type="string"
+                 xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
+  <description>
+    <para>
+      Specifies the name of the Elasticsearch server to use for Spotlight
+      queries when using the Elasticsearch backend.
+    </para>
+  </description>
+
+  <value type="default">localhost</value>
+  <value type="example">needle.haystack.samba.org</value>
+</samba:parameter>
diff --git a/docs-xml/smbdotconf/misc/elasticsearchindex.xml b/docs-xml/smbdotconf/misc/elasticsearchindex.xml
new file mode 100644 (file)
index 0000000..7f394b2
--- /dev/null
@@ -0,0 +1,16 @@
+<samba:parameter name="elasticsearch:index"
+                 context="S"
+                 type="string"
+                 xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
+  <description>
+    <para>
+      Specifies the name of the Elasticsearch index to use for Spotlight queries
+      when using the Elasticsearch backend. The default value of "_all" is a
+      special Elasticsearch value that performs the search operation on all
+      indices.
+    </para>
+  </description>
+
+  <value type="default">_all</value>
+  <value type="example">spotlight</value>
+</samba:parameter>
diff --git a/docs-xml/smbdotconf/misc/elasticsearchmappings.xml b/docs-xml/smbdotconf/misc/elasticsearchmappings.xml
new file mode 100644 (file)
index 0000000..d2502a6
--- /dev/null
@@ -0,0 +1,14 @@
+<samba:parameter name="elasticsearch:mappings"
+                 context="G"
+                 type="string"
+                 xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
+  <description>
+    <para>
+      Path to a file specifying metadata attribute mappings in JSON format. Use
+      by the Elasticsearch backend of the Spotlight RPC service.
+    </para>
+  </description>
+
+  <value type="default">&pathconfig.SAMBA_DATADIR;/elasticsearch_mappings.json</value>
+  <value type="example">/usr/share/foo/mymappings.json</value>
+</samba:parameter>
diff --git a/docs-xml/smbdotconf/misc/elasticsearchmaxresults.xml b/docs-xml/smbdotconf/misc/elasticsearchmaxresults.xml
new file mode 100644 (file)
index 0000000..1086b89
--- /dev/null
@@ -0,0 +1,15 @@
+<samba:parameter name="elasticsearch:max results"
+                 context="S"
+                 type="integer"
+                 xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
+  <description>
+    <para>
+      Path to a file specifying metadata attribute mappings in JSON format. Used
+      by the Elasticsearch backend of the Spotlight RPC service. A value of 0
+      means no limit.
+    </para>
+  </description>
+
+  <value type="default">100</value>
+  <value type="example">10</value>
+</samba:parameter>
diff --git a/docs-xml/smbdotconf/misc/elasticsearchport.xml b/docs-xml/smbdotconf/misc/elasticsearchport.xml
new file mode 100644 (file)
index 0000000..ea87daa
--- /dev/null
@@ -0,0 +1,14 @@
+<samba:parameter name="elasticsearch:port"
+                 context="S"
+                 type="integer"
+                 xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
+  <description>
+    <para>
+      Specifies the TCP port of the Elasticsearch server to use for Spotlight
+      queries when using the Elasticsearch backend.
+    </para>
+  </description>
+
+  <value type="default">9200</value>
+  <value type="example">9201</value>
+</samba:parameter>
diff --git a/docs-xml/smbdotconf/misc/elasticsearchusetls.xml b/docs-xml/smbdotconf/misc/elasticsearchusetls.xml
new file mode 100644 (file)
index 0000000..e1aa8a3
--- /dev/null
@@ -0,0 +1,14 @@
+<samba:parameter name="elasticsearch:use tls"
+                 context="S"
+                 type="boolean"
+                 xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
+  <description>
+    <para>
+      Specifies whether to use HTTPS when talking to the Elasticsearch server
+      used for Spotlight queries when using the Elasticsearch backend.
+    </para>
+  </description>
+
+  <value type="default">no</value>
+  <value type="example">yes</value>
+</samba:parameter>
index 6d224b81e5f5ac54cde2670cade5beea36c4527f..0643fc16cbd045a6ad44046356b006c98cdc3106 100644 (file)
        Gnome Tracker.
        </para></listitem>
 
+       <listitem><para><constant>elasticsearch</constant> -
+       a backend that uses JSON and REST over HTTP(s) to query an
+       Elasticsearch server.
+       </para></listitem>
        </itemizedlist>
        </para>
 </description>
index 5c5b1cd7cd47371b20dcd50b64bd639f4f4b5594..d0ce3d312e14eb7687295a859378bad8ada0aaae 100644 (file)
@@ -252,6 +252,7 @@ enum mangled_names_options {MANGLED_NAMES_NO, MANGLED_NAMES_YES, MANGLED_NAMES_I
 enum spotlight_backend_options {
        SPOTLIGHT_BACKEND_NOINDEX,
        SPOTLIGHT_BACKEND_TRACKER,
+       SPOTLIGHT_BACKEND_ES,
 };
 
 /*
index 82dc5cd6cda14911ac6b9376b6140d4709746d85..2fd3361f99615b062d0d6945eec166dd43df2c77 100644 (file)
@@ -354,6 +354,7 @@ static const struct enum_list enum_ntlm_auth[] = {
 static const struct enum_list enum_spotlight_backend[] = {
        {SPOTLIGHT_BACKEND_NOINDEX, "noindex"},
        {SPOTLIGHT_BACKEND_TRACKER, "tracker"},
+       {SPOTLIGHT_BACKEND_ES, "elasticsearch"},
        {-1, NULL}
 };
 
diff --git a/source3/rpc_server/mdssvc/elasticsearch_mappings.json b/source3/rpc_server/mdssvc/elasticsearch_mappings.json
new file mode 100644 (file)
index 0000000..9f68a64
--- /dev/null
@@ -0,0 +1,142 @@
+{
+    "attribute_mappings": {
+        "*": {
+            "type": "fts",
+            "attribute": ""
+        },
+        "kMDItemTextContent": {
+            "type": "str",
+            "attribute": "content"
+        },
+        "_kMDItemGroupId": {
+            "type": "type",
+            "attribute": "file.content_type"
+        },
+        "kMDItemContentType": {
+            "type": "type",
+            "attribute": "file.content_type"
+        },
+        "kMDItemContentTypeTree": {
+            "type": "type",
+            "attribute": "file.content_type"
+        },
+        "kMDItemFSContentChangeDate": {
+            "type": "date",
+            "attribute": "file.last_modified"
+        },
+        "kMDItemFSCreationDate": {
+            "type": "date",
+            "attribute": "file.created"
+        },
+        "kMDItemFSName": {
+            "type": "str",
+            "attribute": "file.filename"
+        },
+        "kMDItemFSOwnerGroupID": {
+            "type": "str",
+            "attribute": "attributes.owner"
+        },
+        "kMDItemFSOwnerUserID": {
+            "type": "str",
+            "attribute": "attributes.group"
+        },
+        "kMDItemFSSize": {
+            "type": "num",
+            "attribute": "file.filesize"
+        },
+        "kMDItemPath": {
+            "type": "str",
+            "attribute": "path.real"
+        },
+        "kMDItemAttributeChangeDate": {
+            "type": "date",
+            "attribute": "file.last_modified"
+        },
+        "kMDItemAuthors": {
+            "type": "str",
+            "attribute": "meta.author"
+        },
+        "kMDItemContentCreationDate": {
+            "type": "date",
+            "attribute": "file.created"
+        },
+        "kMDItemContentModificationDate": {
+            "type": "date",
+            "attribute": "file.last_modified"
+        },
+        "kMDItemCreator": {
+            "type": "str",
+            "attribute": "meta.raw.creator"
+        },
+        "kMDItemDescription": {
+            "type": "str",
+            "attribute": "meta.raw.description"
+        },
+        "kMDItemDisplayName": {
+            "type": "str",
+            "attribute": "file.filename"
+        },
+        "kMDItemDurationSeconds": {
+            "type": "num",
+            "attribute": "meta.raw.xmpDM:duration"
+        },
+        "kMDItemNumberOfPages": {
+            "type": "num",
+            "attribute": "meta.raw.xmpTPg:NPages"
+        },
+        "kMDItemTitle": {
+            "type": "str",
+            "attribute": "meta.title"
+        },
+        "kMDItemAlbum": {
+            "type": "str",
+            "attribute": "meta.raw.xmpDM:album"
+        },
+        "kMDItemBitsPerSample": {
+            "type": "num",
+            "attribute": "meta.raw.tiff:BitsPerSample"
+        },
+        "kMDItemPixelHeight": {
+            "type": "num",
+            "attribute": "meta.raw.Image Height"
+        },
+        "kMDItemPixelWidth": {
+            "type": "num",
+            "attribute": "meta.raw.Image Width"
+        },
+        "kMDItemResolutionHeightDPI": {
+            "type": "num",
+            "attribute": "meta.raw.Y Resolution"
+        },
+        "kMDItemResolutionWidthDPI": {
+            "type": "num",
+            "attribute": "meta.raw.X Resolution"
+        }
+    },
+    "mime_mappings": {
+        "1": "message/rfc822",
+        "2": "text/x-vcard",
+        "6": "text/x-vcard",
+        "7": "video/*",
+        "8": "application/octet-stream",
+        "9": "text/directory",
+        "10": "audio/*",
+        "11": "application/pdf",
+        "12": "application/vnd.oasis.opendocument.presentation",
+        "13": "image/*",
+        "public.content": "message/rfc822 application/pdf application/vnd.oasis.opendocument.presentation image/* text/*",
+        "public.jpeg": "image/jpeg",
+        "public.tiff": "image/tiff",
+        "com.compuserve.gif": "image/gif",
+        "public.png": "image/png",
+        "com.microsoft.bmp": "image/bmp",
+        "public.mp3": "audio/mpeg",
+        "public.mpeg-4-audio": "audio/x-aac",
+        "public.text": "text/*",
+        "public.plain-text": "text/plain",
+        "public.rtf": "text/rtf",
+        "public.html": "text/html",
+        "public.xml": "text/xml",
+        "public.archive": "application/zip application/x-bzip application/x-bzip2 application/x-tar application/x-7z-compressed"
+    }
+}
diff --git a/source3/rpc_server/mdssvc/es_lexer.l b/source3/rpc_server/mdssvc/es_lexer.l
new file mode 100644 (file)
index 0000000..4be4225
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+   Unix SMB/CIFS implementation.
+   Main metadata server / Spotlight routines / Elasticsearch backend
+
+   Copyright (C) Ralph Boehme                  2019
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+%{
+#include "includes.h"
+#include "rpc_server/mdssvc/es_parser.tab.h"
+
+#define YY_NO_INPUT
+#define mdsyylalloc SMB_MALLOC
+#define mdsyylrealloc SMB_REALLOC
+
+static char *strip_quote(const char *phrase);
+%}
+
+%option nounput noyyalloc noyyrealloc prefix="mdsyyl"
+
+ASC     [a-zA-Z0-9_\*\:\-\.]
+U       [\x80-\xbf]
+U2      [\xc2-\xdf]
+U3      [\xe0-\xef]
+U4      [\xf0-\xf4]
+SPECIAL [\!\#\$\%\&\'\(\)\+\,\.\/\;\<\=\>\?\@\[\]\^\`\{\}\|\~\\]
+ESCHAR  [\"\*]
+BLANK   [ \t\n]
+
+UANY    {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
+UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
+UPHRASE {UANY}|{SPECIAL}|{BLANK}|\\{ESCHAR}
+
+%%
+InRange           return FUNC_INRANGE;
+\$time\.iso       return DATE_ISO;
+false             {mdsyyllval.bval = false; return BOOLEAN;}
+true              {mdsyyllval.bval = true; return BOOLEAN;}
+\"                return QUOTE;
+\(                return OBRACE;
+\)                return CBRACE;
+\&\&              return AND;
+\|\|              return OR;
+\=\=              return EQUAL;
+\!\=              return UNEQUAL;
+\=                return EQUAL;
+\<                return LT;
+\>                return GT;
+\,                return COMMA;
+{UANY}+           {mdsyyllval.sval = talloc_strdup(talloc_tos(), yytext); return WORD;}
+\"{UPHRASE}+\"    {mdsyyllval.sval = strip_quote(yytext); return PHRASE;}
+{BLANK}           /* ignore */
+%%
+
+static char *strip_quote(const char *phrase)
+{
+       size_t phrase_len = 0;
+       char *stripped_phrase = NULL;
+
+       if (phrase == NULL) {
+               return NULL;
+       }
+
+       phrase_len = strlen(phrase);
+       if (phrase_len < 2 ||
+           phrase[0] != '\"' ||
+           phrase[phrase_len - 1] != '\"')
+       {
+               return talloc_strdup(talloc_tos(), phrase);
+       }
+
+       phrase++;
+
+       stripped_phrase = talloc_strndup(talloc_tos(), phrase, phrase_len - 2);
+       if (stripped_phrase == NULL) {
+               return NULL;
+       }
+       return stripped_phrase;
+}
diff --git a/source3/rpc_server/mdssvc/es_mapping.c b/source3/rpc_server/mdssvc/es_mapping.c
new file mode 100644 (file)
index 0000000..5c71e50
--- /dev/null
@@ -0,0 +1,241 @@
+/*
+   Unix SMB/CIFS implementation.
+   Main metadata server / Spotlight routines / Elasticsearch backend
+
+   Copyright (C) Ralph Boehme                  2019
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "includes.h"
+#include "es_mapping.h"
+
+/*
+ * Escaping of special characters in Lucene query syntax across HTTP and JSON
+ * ==========================================================================
+ *
+ * These characters in Lucene queries need escaping [1]:
+ *
+ *   + - & | ! ( ) { } [ ] ^ " ~ * ? : \ /
+ *
+ * Additionally JSON requires escaping of:
+ *
+ *   " \
+ *
+ * Characters already escaped by the mdssvc client:
+ *
+ *   * " \
+ *
+ * The following table contains the resulting escaped strings, beginning with the
+ * search term, the corresponding Spotlight query and the final string that gets
+ * sent to the target Elasticsearch server.
+ *
+ * string | mdfind | http
+ * -------+--------+------
+ * x!x     x!x      x\\!x
+ * x&x     x&x      x\\&x
+ * x+x     x+x      x\\+x
+ * x-x     x-x      x\\-x
+ * x.x     x.x      x\\.x
+ * x<x     x<x      x\\<x
+ * x>x     x>x      x\\>x
+ * x=x     x=x      x\\=x
+ * x?x     x?x      x\\?x
+ * x[x     x[x      x\\[x
+ * x]x     x]x      x\\]x
+ * x^x     x^x      x\\^x
+ * x{x     x{x      x\\{x
+ * x}x     x}x      x\\}x
+ * x|x     x|x      x\\|x
+ * x x     x x      x\\ x
+ * x*x     x\*x     x\\*x
+ * x\x     x\\x     x\\\\x
+ * x"x     x\"x     x\\\"x
+ *
+ * Special cases:
+ * x y    It's not possible to search for terms including spaces, Spotlight
+ *        will search for x OR y.
+ * x(x    Search for terms including ( and ) doesn not work with Spotlight.
+ *
+ * [1] <http://lucene.apache.org/core/8_2_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters>
+ */
+
+static char *escape_str(TALLOC_CTX *mem_ctx,
+                       const char *in,
+                       const char *escape_list,
+                       const char *escape_exceptions)
+{
+       char *out = NULL;
+       size_t in_len;
+       size_t new_len;
+       size_t in_pos;
+       size_t out_pos = 0;
+
+       if (in == NULL) {
+               return NULL;
+       }
+       in_len = strlen(in);
+
+       if (escape_list == NULL) {
+               escape_list = "";
+       }
+       if (escape_exceptions == NULL) {
+               escape_exceptions = "";
+       }
+
+       /*
+        * Allocate enough space for the worst case: every char needs to be
+        * escaped and requires an additional char.
+        */
+       new_len = (in_len * 2) + 1;
+       if (new_len <= in_len) {
+               return NULL;
+       }
+
+       out = talloc_zero_array(mem_ctx, char, new_len);
+       if (out == NULL) {
+               return NULL;
+       }
+
+       for (in_pos = 0, out_pos = 0; in_pos < in_len; in_pos++, out_pos++) {
+               if (strchr(escape_list, in[in_pos]) != NULL &&
+                   strchr(escape_exceptions, in[in_pos]) == NULL)
+               {
+                       out[out_pos++] = '\\';
+               }
+               out[out_pos] = in[in_pos];
+       }
+
+       return out;
+}
+
+char *es_escape_str(TALLOC_CTX *mem_ctx,
+                   const char *in,
+                   const char *exceptions)
+{
+       const char *lucene_escape_list = "+-&|!(){}[]^\"~*?:\\/ ";
+       const char *json_escape_list = "\\\"";
+       char *lucene_escaped = NULL;
+       char *full_escaped = NULL;
+
+       lucene_escaped =  escape_str(mem_ctx,
+                                    in,
+                                    lucene_escape_list,
+                                    exceptions);
+       if (lucene_escaped == NULL) {
+               return NULL;
+       }
+
+       full_escaped = escape_str(mem_ctx,
+                                 lucene_escaped,
+                                 json_escape_list,
+                                 NULL);
+       TALLOC_FREE(lucene_escaped);
+       return full_escaped;
+}
+
+struct es_attr_map *es_map_sl_attr(TALLOC_CTX *mem_ctx,
+                                  json_t *kmd_map,
+                                  const char *sl_attr)
+{
+       struct es_attr_map *es_map = NULL;
+       const char *typestr = NULL;
+       enum ssm_type type;
+       char *es_attr = NULL;
+       size_t i;
+       int cmp;
+       int ret;
+
+       static struct {
+               const char *typestr;
+               enum ssm_type typeval;
+       } ssmt_type_map[] = {
+               {"bool", ssmt_bool},
+               {"num", ssmt_num},
+               {"str", ssmt_str},
+               {"fts", ssmt_fts},
+               {"date", ssmt_date},
+               {"type", ssmt_type},
+       };
+
+       if (sl_attr == NULL) {
+               return NULL;
+       }
+
+       ret = json_unpack(kmd_map,
+                         "{s: {s: s}}",
+                         sl_attr,
+                         "type",
+                         &typestr);
+       if (ret != 0) {
+               DBG_ERR("No JSON type mapping for [%s]\n", sl_attr);
+               return NULL;
+       }
+
+       ret = json_unpack(kmd_map,
+                         "{s: {s: s}}",
+                         sl_attr,
+                         "attribute",
+                         &es_attr);
+       if (ret != 0) {
+               DBG_ERR("No JSON attribute mapping for [%s]\n", sl_attr);
+               return NULL;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(ssmt_type_map); i++) {
+               cmp = strcmp(typestr, ssmt_type_map[i].typestr);
+               if (cmp == 0) {
+                       type = ssmt_type_map[i].typeval;
+                       break;
+               }
+       }
+       if (i == ARRAY_SIZE(ssmt_type_map)) {
+               return NULL;
+       }
+
+       es_map = talloc_zero(mem_ctx, struct es_attr_map);
+       if (es_map == NULL) {
+               return NULL;
+       }
+       es_map->type = type;
+
+       es_map->name = es_escape_str(es_map, es_attr, NULL);
+       if (es_map->name == NULL) {
+               TALLOC_FREE(es_map);
+               return false;
+       }
+
+       return es_map;
+}
+
+const char *es_map_sl_type(json_t *mime_map,
+                          const char *sl_type)
+{
+       const char *mime_type = NULL;
+       int ret;
+
+       if (sl_type == NULL) {
+               return NULL;
+       }
+
+       ret = json_unpack(mime_map,
+                         "{s: s}",
+                         sl_type,
+                         &mime_type);
+       if (ret != 0) {
+               return NULL;
+       }
+
+       return mime_type;
+}
diff --git a/source3/rpc_server/mdssvc/es_mapping.h b/source3/rpc_server/mdssvc/es_mapping.h
new file mode 100644 (file)
index 0000000..29511b5
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+  Unix SMB/CIFS implementation.
+  Main metadata server / Spotlight routines / Elasticsearch backend
+
+  Copyright (c) Ralph Boehme                   2019
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _ES_MAPPING_H_
+#define _ES_MAPPING_H_
+
+#include <jansson.h>
+
+enum ssm_type {
+       ssmt_bool,   /* a boolean value */
+       ssmt_num,    /* a numeric value */
+       ssmt_str,    /* a string value */
+       ssmt_fts,    /* a string value */
+       ssmt_date,   /* date values */
+       ssmt_type    /* kMDItemContentType, requires special mapping */
+};
+
+struct es_attr_map {
+       enum ssm_type type;
+       const char *name;
+};
+
+char *es_escape_str(TALLOC_CTX *mem_ctx,
+                   const char *in,
+                   const char *exceptions);
+struct es_attr_map *es_map_sl_attr(TALLOC_CTX *mem_ctx,
+                                  json_t *kmd_map,
+                                  const char *sl_attr);
+const char *es_map_sl_type(json_t *mime_map,
+                          const char *sl_type);
+
+#endif
diff --git a/source3/rpc_server/mdssvc/es_parser.y b/source3/rpc_server/mdssvc/es_parser.y
new file mode 100644 (file)
index 0000000..0514183
--- /dev/null
@@ -0,0 +1,625 @@
+/*
+   Unix SMB/CIFS implementation.
+   Main metadata server / Spotlight routines / Elasticsearch backend
+
+   Copyright (C) Ralph Boehme                  2019
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+%{
+       #include "includes.h"
+       #include "rpc_server/mdssvc/mdssvc.h"
+       #include "rpc_server/mdssvc/mdssvc_es.h"
+       #include "rpc_server/mdssvc/es_parser.tab.h"
+       #include "rpc_server/mdssvc/es_mapping.h"
+       #include <jansson.h>
+
+       /*
+        * allow building with -O3 -Wp,-D_FORTIFY_SOURCE=2
+        *
+        * /tmp/samba-testbase/.../mdssvc/es_parser.y: In function
+        * â€˜mdsyylparse’:
+        * es_parser.tab.c:1124:6: error: assuming pointer wraparound
+        * does not occur when comparing P +- C1 with P +- C2
+        * [-Werror=strict-overflow]
+        *
+        * The generated code in es_parser.tab.c looks like this:
+        *
+        *   if (yyss + yystacksize - 1 <= yyssp)
+        */
+       #pragma GCC diagnostic ignored "-Wstrict-overflow"
+
+       #define YYMALLOC SMB_MALLOC
+       #define YYREALLOC SMB_REALLOC
+
+       struct yy_buffer_state;
+       typedef struct yy_buffer_state *YY_BUFFER_STATE;
+       int mdsyyllex(void);
+       void mdsyylerror(char const *);
+       void *mdsyylterminate(void);
+       YY_BUFFER_STATE mdsyyl_scan_string(const char *str);
+       void mdsyyl_delete_buffer(YY_BUFFER_STATE buffer);
+
+       /* forward declarations */
+       static char *isodate_to_sldate(const char *s);
+       static char *map_expr(const struct es_attr_map *attr,
+                             char op,
+                             const char *val1,
+                             const char *val2);
+
+       /* global vars, eg needed by the lexer */
+       struct es_parser_state {
+               TALLOC_CTX *frame;
+               json_t *kmd_map;
+               json_t *mime_map;
+               YY_BUFFER_STATE s;
+               const char *result;
+       } *global_es_parser_state;
+%}
+
+%code provides {
+       #include <stdbool.h>
+       #include <jansson.h>
+       #include "rpc_server/mdssvc/mdssvc.h"
+
+       /* 2001-01-01T00:00:00Z - Unix Epoch = SP_RAW_TIME_OFFSET */
+       #define SP_RAW_TIME_OFFSET 978307200
+
+       int mdsyylwrap(void);
+       bool map_spotlight_to_es_query(TALLOC_CTX *mem_ctx,
+                                      json_t *mappings,
+                                      const char *path_scope,
+                                      const char *query_string,
+                                      char **_es_query);
+}
+
+%union {
+       bool bval;
+       const char *sval;
+       struct es_attr_map *attr_map;
+}
+
+%name-prefix "mdsyyl"
+%expect 1
+%error-verbose
+
+%type <sval> match expr line function value isodate
+%type <attr_map> attribute
+
+%token <sval> WORD PHRASE
+%token <bval> BOOLEAN
+%token FUNC_INRANGE
+%token DATE_ISO
+%token OBRACE CBRACE EQUAL UNEQUAL GT LT COMMA QUOTE
+%left OR
+%left AND
+%%
+
+input:
+/* empty */
+| input line
+;
+
+line:
+expr {
+       global_es_parser_state->result = $1;
+}
+;
+
+expr:
+OBRACE expr CBRACE {
+       if ($2 == NULL) YYABORT;
+       $$ = talloc_asprintf(talloc_tos(), "(%s)", $2);
+       if ($$ == NULL) YYABORT;
+}
+| expr AND expr {
+       $$ = talloc_asprintf(talloc_tos(), "(%s) AND (%s)", $1, $3);
+       if ($$ == NULL) YYABORT;
+}
+| expr OR expr {
+       $$ = talloc_asprintf(talloc_tos(), "%s OR %s", $1, $3);
+       if ($$ == NULL) YYABORT;
+}
+| match {
+       $$ = $1;
+}
+| BOOLEAN {
+       /*
+        * We can't properly handle these in expressions, fortunately this
+        * is probably only ever used by OS X as sole element in an
+        * expression ie "False" (when Finder window selected our share
+        * but no search string entered yet). Packet traces showed that OS
+        * X Spotlight server then returns a failure (ie -1) which is what
+        * we do here too by calling YYABORT.
+        */
+       YYABORT;
+};
+
+match:
+attribute EQUAL value {
+       $$ = map_expr($1, '=', $3, NULL);
+       if ($$ == NULL) YYABORT;
+}
+| attribute UNEQUAL value {
+       $$ = map_expr($1, '!', $3, NULL);
+       if ($$ == NULL) YYABORT;
+}
+| attribute LT value {
+       $$ = map_expr($1, '<', $3, NULL);
+       if ($$ == NULL) YYABORT;
+}
+| attribute GT value {
+       $$ = map_expr($1, '>', $3, NULL);
+       if ($$ == NULL) YYABORT;
+}
+| function {
+       $$ = $1;
+}
+| match WORD {
+       $$ = $1;
+};
+
+function:
+FUNC_INRANGE OBRACE attribute COMMA WORD COMMA WORD CBRACE {
+       $$ = map_expr($3, '~', $5, $7);
+       if ($$ == NULL) YYABORT;
+};
+
+attribute:
+WORD {
+       $$ = es_map_sl_attr(global_es_parser_state->frame,
+                           global_es_parser_state->kmd_map,
+                           $1);
+       if ($$ == NULL) YYABORT;
+};
+
+value:
+PHRASE {
+       $$ = $1;
+}
+| isodate {
+       $$ = $1;
+};
+
+isodate:
+DATE_ISO OBRACE WORD CBRACE {
+       $$ = isodate_to_sldate($3);
+       if ($$ == NULL) YYABORT;
+};
+
+%%
+
+/*
+ * Spotlight has two date formats:
+ * - seconds since 2001-01-01 00:00:00Z
+ * - as string "$time.iso(%Y-%m-%dT%H:%M:%SZ)"
+ * This function converts the latter to the former as string, so the parser
+ * can work on a uniform format.
+ */
+static char *isodate_to_sldate(const char *isodate)
+{
+       struct es_parser_state *s = global_es_parser_state;
+       struct tm tm;
+       const char *p = NULL;
+       char *tstr = NULL;
+       time_t t;
+
+       p = strptime(isodate, "%Y-%m-%dT%H:%M:%SZ", &tm);
+       if (p == NULL) {
+               DBG_ERR("strptime [%s] failed\n", isodate);
+               return NULL;
+       }
+
+       t = timegm(&tm);
+       t -= SP_RAW_TIME_OFFSET;
+
+       tstr = talloc_asprintf(s->frame, "%jd", (intmax_t)t);
+       if (tstr == NULL) {
+               return NULL;
+       }
+
+       return tstr;
+}
+
+static char *map_type(const struct es_attr_map *attr,
+                     char op,
+                     const char *val)
+{
+       struct es_parser_state *s = global_es_parser_state;
+       const char *mime_type_list = NULL;
+       char *esc_mime_type_list = NULL;
+       const char *not = NULL;
+       const char *end = NULL;
+       char *es = NULL;
+
+       mime_type_list = es_map_sl_type(s->mime_map, val);
+       if (mime_type_list == NULL) {
+               DBG_ERR("Mapping type [%s] failed\n", val);
+               return NULL;
+       }
+
+       esc_mime_type_list = es_escape_str(s->frame,
+                                          mime_type_list,
+                                          "* ");
+       if (esc_mime_type_list == NULL) {
+               return NULL;
+       }
+
+       switch (op) {
+       case '=':
+               not = "";
+               end = "";
+               break;
+       case '!':
+               not = "(NOT ";
+               end = ")";
+               break;
+       default:
+               DBG_ERR("Mapping type [%s] unexpected op [%c]\n", val, op);
+               return NULL;
+       }
+       es = talloc_asprintf(s->frame,
+                            "%s%s:(%s)%s",
+                            not,
+                            attr->name,
+                            esc_mime_type_list,
+                            end);
+       if (es == NULL) {
+               return NULL;
+       }
+
+       return es;
+}
+
+static char *map_num(const struct es_attr_map *attr,
+                    char op,
+                    const char *val1,
+                    const char *val2)
+{
+       struct es_parser_state *s = global_es_parser_state;
+       char *es = NULL;
+
+       switch (op) {
+       case '>':
+               es = talloc_asprintf(s->frame,
+                                    "%s:{%s TO *}",
+                                    attr->name,
+                                    val1);
+               break;
+       case '<':
+               es = talloc_asprintf(s->frame,
+                                    "%s:{* TO %s}",
+                                    attr->name,
+                                    val1);
+               break;
+       case '~':
+               es = talloc_asprintf(s->frame,
+                                    "%s:[%s TO %s]",
+                                    attr->name,
+                                    val1,
+                                    val2);
+               break;
+       case '=':
+               es = talloc_asprintf(s->frame,
+                                    "%s:%s",
+                                    attr->name,
+                                    val1);
+               break;
+       case '!':
+               es = talloc_asprintf(s->frame,
+                                    "(NOT %s:%s)",
+                                    attr->name,
+                                    val1);
+               break;
+       default:
+               DBG_ERR("Mapping num unexpected op [%c]\n", op);
+               return NULL;
+       }
+       if (es == NULL) {
+               return NULL;
+       }
+
+       return es;
+}
+
+static char *map_fts(const struct es_attr_map *attr,
+                    char op,
+                    const char *val)
+{
+       struct es_parser_state *s = global_es_parser_state;
+       const char *not = NULL;
+       const char *end = NULL;
+       char *esval = NULL;
+       char *es = NULL;
+
+       esval = es_escape_str(s->frame, val, "*\\\"");
+       if (esval == NULL) {
+               yyerror("es_escape_str failed");
+               return NULL;
+       }
+
+       switch (op) {
+       case '=':
+               not = "";
+               end = "";
+               break;
+       case '!':
+               not = "(NOT ";
+               end = ")";
+               break;
+       default:
+               DBG_ERR("Mapping fts [%s] unexpected op [%c]\n", val, op);
+               return NULL;
+       }
+       es = talloc_asprintf(s->frame,
+                            "%s%s%s",
+                            not,
+                            esval,
+                            end);
+       if (es == NULL) {
+               return NULL;
+       }
+       return es;
+}
+
+static char *map_str(const struct es_attr_map *attr,
+                    char op,
+                    const char *val)
+{
+       struct es_parser_state *s = global_es_parser_state;
+       char *esval = NULL;
+       char *es = NULL;
+       const char *not = NULL;
+       const char *end = NULL;
+
+       esval = es_escape_str(s->frame, val, "*\\\"");
+       if (esval == NULL) {
+               yyerror("es_escape_str failed");
+               return NULL;
+       }
+
+       switch (op) {
+       case '=':
+               not = "";
+               end = "";
+               break;
+       case '!':
+               not = "(NOT ";
+               end = ")";
+               break;
+       default:
+               DBG_ERR("Mapping string [%s] unexpected op [%c]\n", val, op);
+               return NULL;
+       }
+
+       es = talloc_asprintf(s->frame,
+                            "%s%s:%s%s",
+                            not,
+                            attr->name,
+                            esval,
+                            end);
+       if (es == NULL) {
+               return NULL;
+       }
+       return es;
+}
+
+/*
+ * Convert Spotlight date seconds since 2001-01-01 00:00:00Z
+ * to a date string in the format %Y-%m-%dT%H:%M:%SZ.
+ */
+static char *map_sldate_to_esdate(TALLOC_CTX *mem_ctx,
+                                 const char *sldate)
+{
+       struct tm *tm = NULL;
+       char *esdate = NULL;
+       char buf[21];
+       size_t len;
+       time_t t;
+       int error;
+
+       t = (time_t)smb_strtoull(sldate, NULL, 10, &error, SMB_STR_STANDARD);
+       if (error != 0) {
+               DBG_ERR("smb_strtoull [%s] failed\n", sldate);
+               return NULL;
+       }
+       t += SP_RAW_TIME_OFFSET;
+
+       tm = gmtime(&t);
+       if (tm == NULL) {
+               DBG_ERR("localtime [%s] failed\n", sldate);
+               return NULL;
+       }
+
+       len = strftime(buf, sizeof(buf),
+                      "%Y-%m-%dT%H:%M:%SZ", tm);
+       if (len != 20) {
+               DBG_ERR("strftime [%s] failed\n", sldate);
+               return NULL;
+       }
+
+       esdate = es_escape_str(mem_ctx, buf, NULL);
+       if (esdate == NULL) {
+               yyerror("es_escape_str failed");
+               return NULL;
+       }
+       return esdate;
+}
+
+static char *map_date(const struct es_attr_map *attr,
+                     char op,
+                     const char *sldate1,
+                     const char *sldate2)
+{
+       struct es_parser_state *s = global_es_parser_state;
+       char *esdate1 = NULL;
+       char *esdate2 = NULL;
+       char *es = NULL;
+
+       if (op == '~' && sldate2 == NULL) {
+               DBG_ERR("Date range query, but second date is NULL\n");
+               return NULL;
+       }
+
+       esdate1 = map_sldate_to_esdate(s->frame, sldate1);
+       if (esdate1 == NULL) {
+               DBG_ERR("map_sldate_to_esdate [%s] failed\n", sldate1);
+               return NULL;
+       }
+       if (sldate2 != NULL) {
+               esdate2 = map_sldate_to_esdate(s->frame, sldate2);
+               if (esdate2 == NULL) {
+                       DBG_ERR("map_sldate_to_esdate [%s] failed\n", sldate2);
+                       return NULL;
+               }
+       }
+
+       switch (op) {
+       case '>':
+               es = talloc_asprintf(s->frame,
+                                    "%s:{%s TO *}",
+                                    attr->name,
+                                    esdate1);
+               break;
+       case '<':
+               es = talloc_asprintf(s->frame,
+                                    "%s:{* TO %s}",
+                                    attr->name,
+                                    esdate1);
+               break;
+       case '~':
+               es = talloc_asprintf(s->frame,
+                                    "%s:[%s TO %s]",
+                                    attr->name,
+                                    esdate1,
+                                    esdate2);
+               break;
+       case '=':
+               es = talloc_asprintf(s->frame,
+                                    "%s:%s",
+                                    attr->name,
+                                    esdate1);
+               break;
+       case '!':
+               es = talloc_asprintf(s->frame,
+                                    "(NOT %s:%s)",
+                                    attr->name,
+                                    esdate1);
+               break;
+       }
+       if (es == NULL) {
+               return NULL;
+       }
+       return es;
+}
+
+static char *map_expr(const struct es_attr_map *attr,
+                     char op,
+                     const char *val1,
+                     const char *val2)
+{
+       char *es = NULL;
+
+       switch (attr->type) {
+       case ssmt_type:
+               es = map_type(attr, op, val1);
+               break;
+       case ssmt_num:
+               es = map_num(attr, op, val1, val2);
+               break;
+       case ssmt_fts:
+               es = map_fts(attr, op, val1);
+               break;
+       case ssmt_str:
+               es = map_str(attr, op, val1);
+               break;
+       case ssmt_date:
+               es = map_date(attr, op, val1, val2);
+               break;
+       default:
+               break;
+       }
+       if (es == NULL) {
+               DBG_ERR("Mapping [%s %c %s (%s)] failed\n",
+                       attr->name, op, val1, val2 ? val2 : "");
+               return NULL;
+       }
+
+       return es;
+}
+
+void mdsyylerror(const char *str)
+{
+       DBG_ERR("Parser failed: %s\n", str);
+}
+
+int mdsyylwrap(void)
+{
+       return 1;
+}
+
+/**
+ * Map a Spotlight RAW query string to a ES query string
+ **/
+bool map_spotlight_to_es_query(TALLOC_CTX *mem_ctx,
+                              json_t *mappings,
+                              const char *path_scope,
+                              const char *query_string,
+                              char **_es_query)
+{
+       struct es_parser_state s = {
+               .frame = talloc_stackframe(),
+       };
+       int result;
+       char *es_query = NULL;
+
+       s.kmd_map = json_object_get(mappings, "attribute_mappings");
+       if (s.kmd_map == NULL) {
+               DBG_ERR("Failed to load attribute_mappings from JSON\n");
+               return false;
+       }
+       s.mime_map = json_object_get(mappings, "mime_mappings");
+       if (s.mime_map == NULL) {
+               DBG_ERR("Failed to load mime_mappings from JSON\n");
+               return false;
+       }
+
+       s.s = mdsyyl_scan_string(query_string);
+       if (s.s == NULL) {
+               DBG_WARNING("Failed to parse [%s]\n", query_string);
+               TALLOC_FREE(s.frame);
+               return false;
+       }
+       global_es_parser_state = &s;
+       result = mdsyylparse();
+       global_es_parser_state = NULL;
+       mdsyyl_delete_buffer(s.s);
+
+       if (result != 0) {
+               TALLOC_FREE(s.frame);
+               return false;
+       }
+
+       es_query = talloc_asprintf(mem_ctx,
+                                  "(%s) AND path.real.fulltext:\\\"%s\\\"",
+                                  s.result, path_scope);
+       TALLOC_FREE(s.frame);
+       if (es_query == NULL) {
+               return false;
+       }
+
+       *_es_query = es_query;
+       return true;
+}
diff --git a/source3/rpc_server/mdssvc/es_parser_test.c b/source3/rpc_server/mdssvc/es_parser_test.c
new file mode 100644 (file)
index 0000000..5751606
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+   Unix SMB/CIFS implementation.
+   Main metadata server / Spotlight routines / ES backend
+
+   Copyright (C) Ralph Boehme                  2019
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "includes.h"
+#include "rpc_server/mdssvc/mdssvc.h"
+#include "rpc_server/mdssvc/mdssvc_es.h"
+#include "rpc_server/mdssvc/es_parser.tab.h"
+#include "rpc_server/mdssvc/es_mapping.h"
+
+/*
+ * Examples:
+ *
+ * $ ./spotlight2es '_kMDItemGroupId=="11"'
+ * ...
+ * $ ./spotlight2es '*=="test*"||kMDItemTextContent=="test*"'
+ * ...
+ */
+
+int main(int argc, char **argv)
+{
+       TALLOC_CTX *mem_ctx = NULL;
+       json_t *mappings = NULL;
+       json_error_t json_error;
+       char *default_path = NULL;
+       char *path = NULL;
+       const char *query_string = NULL;
+       const char *path_scope = NULL;
+       char *es_query = NULL;
+       bool ok;
+
+       if (argc != 2) {
+               printf("usage: %s QUERY\n", argv[0]);
+               return 1;
+       }
+       query_string = argv[1];
+       path_scope = "/foo/bar";
+
+       lp_load_global(get_dyn_CONFIGFILE());
+
+       mem_ctx = talloc_init("es_parser_test");
+       if (mem_ctx == NULL) {
+               return 1;
+       }
+
+       default_path = talloc_asprintf(mem_ctx,
+               "%s/mdssvc/elasticsearch_mappings.json",
+               get_dyn_SAMBA_DATADIR());
+       if (default_path == NULL) {
+               TALLOC_FREE(mem_ctx);
+               return 1;
+       }
+
+       path = lp_parm_talloc_string(mem_ctx,
+                                    GLOBAL_SECTION_SNUM,
+                                    "elasticsearch",
+                                    "mappings",
+                                    default_path);
+       TALLOC_FREE(default_path);
+       if (path == NULL) {
+               TALLOC_FREE(mem_ctx);
+               return 1;
+       }
+
+       mappings = json_load_file(path, 0, &json_error);
+       if (mappings == NULL) {
+               DBG_ERR("Opening mapping file [%s] failed: %s\n",
+                       path, strerror(errno));
+               TALLOC_FREE(mem_ctx);
+               return 1;
+       }
+
+       ok = map_spotlight_to_es_query(mem_ctx,
+                                          mappings,
+                                          path_scope,
+                                          query_string,
+                                          &es_query);
+       printf("%s\n", ok ? es_query : "*mapping failed*");
+
+       json_decref(mappings);
+       talloc_free(mem_ctx);
+       return ok ? 0 : 1;
+}
index 2418332930132a68e7234b5cccd427e60c6a6b90..fce3335d60214e7fdcfbd7c741120e4c0a7b3dd2 100644 (file)
@@ -31,6 +31,9 @@
 #ifdef HAVE_SPOTLIGHT_BACKEND_TRACKER
 #include "mdssvc_tracker.h"
 #endif
+#ifdef HAVE_SPOTLIGHT_BACKEND_ES
+#include "mdssvc_es.h"
+#endif
 
 #undef DBGC_CLASS
 #define DBGC_CLASS DBGC_RPC_SRV
@@ -1422,6 +1425,15 @@ static struct mdssvc_ctx *mdssvc_init(struct tevent_context *ev)
                return NULL;
        }
 
+#ifdef HAVE_SPOTLIGHT_BACKEND_ES
+       ok = mdsscv_backend_es.init(mdssvc_ctx);
+       if (!ok) {
+               DBG_ERR("backend init failed\n");
+               TALLOC_FREE(mdssvc_ctx);
+               return NULL;
+       }
+#endif
+
 #ifdef HAVE_SPOTLIGHT_BACKEND_TRACKER
        ok = mdsscv_backend_tracker.init(mdssvc_ctx);
        if (!ok) {
@@ -1457,6 +1469,14 @@ bool mds_shutdown(void)
        if (!ok) {
                goto fail;
        }
+
+#ifdef HAVE_SPOTLIGHT_BACKEND_ES
+       ok = mdsscv_backend_es.shutdown(mdssvc_ctx);
+       if (!ok) {
+               goto fail;
+       }
+#endif
+
 #ifdef HAVE_SPOTLIGHT_BACKEND_TRACKER
        ok = mdsscv_backend_tracker.shutdown(mdssvc_ctx);
        if (!ok) {
@@ -1528,6 +1548,13 @@ struct mds_ctx *mds_init_ctx(TALLOC_CTX *mem_ctx,
        case SPOTLIGHT_BACKEND_NOINDEX:
                mds_ctx->backend = &mdsscv_backend_noindex;
                break;
+
+#ifdef HAVE_SPOTLIGHT_BACKEND_ES
+       case SPOTLIGHT_BACKEND_ES:
+               mds_ctx->backend = &mdsscv_backend_es;
+               break;
+#endif
+
 #ifdef HAVE_SPOTLIGHT_BACKEND_TRACKER
        case SPOTLIGHT_BACKEND_TRACKER:
                mds_ctx->backend = &mdsscv_backend_tracker;
diff --git a/source3/rpc_server/mdssvc/mdssvc_es.c b/source3/rpc_server/mdssvc/mdssvc_es.c
new file mode 100644 (file)
index 0000000..3c54abf
--- /dev/null
@@ -0,0 +1,835 @@
+/*
+   Unix SMB/CIFS implementation.
+   Main metadata server / Spotlight routines / ES backend
+
+   Copyright (C) Ralph Boehme                  2019
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "includes.h"
+#include "system/filesys.h"
+#include "lib/util/time_basic.h"
+#include "lib/tls/tls.h"
+#include "lib/util/tevent_ntstatus.h"
+#include "libcli/http/http.h"
+#include "lib/util/tevent_unix.h"
+#include "credentials.h"
+#include "mdssvc.h"
+#include "mdssvc_es.h"
+#include "rpc_server/mdssvc/es_parser.tab.h"
+
+#include <jansson.h>
+
+#undef DBGC_CLASS
+#define DBGC_CLASS DBGC_RPC_SRV
+
+#define MDSSVC_ELASTIC_QUERY_TEMPLATE  \
+       "{"                             \
+       "    \"from\": %zu,"            \
+       "    \"size\": %zu,"            \
+       "    \"_source\": [%s],"        \
+       "    \"query\": {"              \
+        "        \"query_string\": {"  \
+       "            \"query\": \"%s\"" \
+       "        }"                     \
+       "    }"                         \
+       "}"
+
+#define MDSSVC_ELASTIC_SOURCES \
+       "\"path.real\""
+
+static bool mdssvc_es_init(struct mdssvc_ctx *mdssvc_ctx)
+{
+       struct mdssvc_es_ctx *mdssvc_es_ctx = NULL;
+       json_error_t json_error;
+       char *default_path = NULL;
+       char *path = NULL;
+
+       mdssvc_es_ctx = talloc_zero(mdssvc_ctx, struct mdssvc_es_ctx);
+       if (mdssvc_es_ctx == NULL) {
+               return false;
+       }
+       mdssvc_es_ctx->mdssvc_ctx = mdssvc_ctx;
+
+       mdssvc_es_ctx->creds = cli_credentials_init_anon(mdssvc_es_ctx);
+       if (mdssvc_es_ctx->creds == NULL) {
+               TALLOC_FREE(mdssvc_es_ctx);
+               return false;
+       }
+
+       default_path = talloc_asprintf(
+               mdssvc_es_ctx,
+               "%s/mdssvc/elasticsearch_mappings.json",
+               get_dyn_SAMBA_DATADIR());
+       if (default_path == NULL) {
+               TALLOC_FREE(mdssvc_es_ctx);
+               return false;
+       }
+
+       path = lp_parm_talloc_string(mdssvc_es_ctx,
+                                    GLOBAL_SECTION_SNUM,
+                                    "elasticsearch",
+                                    "mappings",
+                                    default_path);
+       TALLOC_FREE(default_path);
+       if (path == NULL) {
+               TALLOC_FREE(mdssvc_es_ctx);
+               return false;
+       }
+
+       mdssvc_es_ctx->mappings = json_load_file(path, 0, &json_error);
+       if (mdssvc_es_ctx->mappings == NULL) {
+               DBG_ERR("Opening mapping file [%s] failed: %s\n",
+                       path, json_error.text);
+               TALLOC_FREE(path);
+               TALLOC_FREE(mdssvc_es_ctx);
+               return false;
+       }
+       TALLOC_FREE(path);
+
+       mdssvc_ctx->backend_private = mdssvc_es_ctx;
+       return true;
+}
+
+static bool mdssvc_es_shutdown(struct mdssvc_ctx *mdssvc_ctx)
+{
+       return true;
+}
+
+static struct tevent_req *mds_es_connect_send(
+                               TALLOC_CTX *mem_ctx,
+                               struct tevent_context *ev,
+                               struct mds_es_ctx *mds_es_ctx);
+static int mds_es_connect_recv(struct tevent_req *req);
+static void mds_es_connected(struct tevent_req *subreq);
+static bool mds_es_next_search_trigger(struct mds_es_ctx *mds_es_ctx);
+
+static bool mds_es_connect(struct mds_ctx *mds_ctx)
+{
+       struct mdssvc_es_ctx *mdssvc_es_ctx = talloc_get_type_abort(
+               mds_ctx->mdssvc_ctx->backend_private, struct mdssvc_es_ctx);
+       struct mds_es_ctx *mds_es_ctx = NULL;
+       struct tevent_req *subreq = NULL;
+
+       mds_es_ctx = talloc_zero(mds_ctx, struct mds_es_ctx);
+       if (mds_es_ctx == NULL) {
+               return false;
+       }
+       *mds_es_ctx = (struct mds_es_ctx) {
+               .mdssvc_es_ctx = mdssvc_es_ctx,
+               .mds_ctx = mds_ctx,
+       };
+
+       mds_ctx->backend_private = mds_es_ctx;
+
+       subreq = mds_es_connect_send(
+                       mds_es_ctx,
+                       mdssvc_es_ctx->mdssvc_ctx->ev_ctx,
+                       mds_es_ctx);
+       if (subreq == NULL) {
+               TALLOC_FREE(mds_es_ctx);
+               return false;
+       }
+       tevent_req_set_callback(subreq, mds_es_connected, mds_es_ctx);
+       return true;
+}
+
+static void mds_es_connected(struct tevent_req *subreq)
+{
+       struct mds_es_ctx *mds_es_ctx = tevent_req_callback_data(
+               subreq, struct mds_es_ctx);
+       int ret;
+       bool ok;
+
+       ret = mds_es_connect_recv(subreq);
+       TALLOC_FREE(subreq);
+       if (ret != 0) {
+               DBG_ERR("HTTP connect failed\n");
+               return;
+       }
+
+       ok = mds_es_next_search_trigger(mds_es_ctx);
+       if (!ok) {
+               DBG_ERR("mds_es_next_search_trigger failed\n");
+       }
+       return;
+}
+
+struct mds_es_connect_state {
+       struct tevent_context *ev;
+       struct mds_es_ctx *mds_es_ctx;
+       struct tevent_queue_entry *qe;
+       const char *server_addr;
+       uint16_t server_port;
+       struct tstream_tls_params *tls_params;
+};
+
+static void mds_es_http_connect_done(struct tevent_req *subreq);
+static void mds_es_http_waited(struct tevent_req *subreq);
+
+static struct tevent_req *mds_es_connect_send(
+                               TALLOC_CTX *mem_ctx,
+                               struct tevent_context *ev,
+                               struct mds_es_ctx *mds_es_ctx)
+{
+       struct tevent_req *req = NULL;
+       struct tevent_req *subreq = NULL;
+       struct mds_es_connect_state *state = NULL;
+       bool use_tls;
+       NTSTATUS status;
+
+       req = tevent_req_create(mem_ctx, &state, struct mds_es_connect_state);
+       if (req == NULL) {
+               return NULL;
+       }
+       *state = (struct mds_es_connect_state) {
+               .ev = ev,
+               .mds_es_ctx = mds_es_ctx,
+       };
+
+       state->server_addr = lp_parm_talloc_string(
+               state,
+               mds_es_ctx->mds_ctx->snum,
+               "elasticsearch",
+               "address",
+               "localhost");
+       state->server_port = lp_parm_int(
+               mds_es_ctx->mds_ctx->snum,
+               "elasticsearch",
+               "port",
+               9200);
+
+       use_tls = lp_parm_bool(
+               mds_es_ctx->mds_ctx->snum,
+               "elasticsearch",
+               "use tls",
+               false);
+
+       DBG_DEBUG("Connecting to HTTP%s [%s] port [%"PRIu16"]\n",
+                 use_tls ? "S" : "", state->server_addr, state->server_port);
+
+       if (use_tls) {
+               const char *ca_file = lp__tls_cafile();
+               const char *crl_file = lp__tls_crlfile();
+               const char *tls_priority = lp_tls_priority();
+               enum tls_verify_peer_state verify_peer = lp_tls_verify_peer();
+
+               status = tstream_tls_params_client(state,
+                                                  ca_file,
+                                                  crl_file,
+                                                  tls_priority,
+                                                  verify_peer,
+                                                  state->server_addr,
+                                                  &state->tls_params);
+               if (!NT_STATUS_IS_OK(status)) {
+                       DBG_ERR("Failed tstream_tls_params_client - %s\n",
+                               nt_errstr(status));
+                       tevent_req_nterror(req, status);
+                       return tevent_req_post(req, ev);
+               }
+       }
+
+       subreq = http_connect_send(state,
+                                  state->ev,
+                                  state->server_addr,
+                                  state->server_port,
+                                  mds_es_ctx->mdssvc_es_ctx->creds,
+                                  state->tls_params);
+       if (tevent_req_nomem(subreq, req)) {
+               return tevent_req_post(req, ev);
+       }
+       tevent_req_set_callback(subreq, mds_es_http_connect_done, req);
+       return req;
+}
+
+static void mds_es_http_connect_done(struct tevent_req *subreq)
+{
+       struct tevent_req *req = tevent_req_callback_data(
+               subreq, struct tevent_req);
+       struct mds_es_connect_state *state = tevent_req_data(
+               req, struct mds_es_connect_state);
+       int error;
+
+       error = http_connect_recv(subreq,
+                                 state->mds_es_ctx,
+                                 &state->mds_es_ctx->http_conn);
+       TALLOC_FREE(subreq);
+       if (error != 0) {
+               DBG_ERR("HTTP connect failed, retrying...\n");
+
+               subreq = tevent_wakeup_send(
+                       state->mds_es_ctx,
+                       state->mds_es_ctx->mdssvc_es_ctx->mdssvc_ctx->ev_ctx,
+                       tevent_timeval_current_ofs(10, 0));
+               if (tevent_req_nomem(subreq, req)) {
+                       return;
+               }
+               tevent_req_set_callback(subreq,
+                                       mds_es_http_waited,
+                                       req);
+               return;
+       }
+
+       DBG_DEBUG("Connected to HTTP%s [%s] port [%"PRIu16"]\n",
+                 state->tls_params ? "S" : "",
+                 state->server_addr, state->server_port);
+
+       tevent_req_done(req);
+       return;
+}
+
+static void mds_es_http_waited(struct tevent_req *subreq)
+{
+       struct tevent_req *req = tevent_req_callback_data(
+               subreq, struct tevent_req);
+       struct mds_es_connect_state *state = tevent_req_data(
+               req, struct mds_es_connect_state);
+       bool ok;
+
+       ok = tevent_wakeup_recv(subreq);
+       TALLOC_FREE(subreq);
+       if (!ok) {
+               tevent_req_error(req, ETIMEDOUT);
+               return;
+       }
+
+       subreq = mds_es_connect_send(
+                       state->mds_es_ctx,
+                       state->mds_es_ctx->mdssvc_es_ctx->mdssvc_ctx->ev_ctx,
+                       state->mds_es_ctx);
+       if (tevent_req_nomem(subreq, req)) {
+               return;
+       }
+       tevent_req_set_callback(subreq, mds_es_connected, state->mds_es_ctx);
+}
+
+static int mds_es_connect_recv(struct tevent_req *req)
+{
+       return tevent_req_simple_recv_unix(req);
+}
+
+static void mds_es_reconnect_on_error(struct sl_es_search *s)
+{
+       struct mds_es_ctx *mds_es_ctx = s->mds_es_ctx;
+       struct tevent_req *subreq = NULL;
+
+       if (s->slq != NULL) {
+               s->slq->state = SLQ_STATE_ERROR;
+       }
+
+       DBG_WARNING("Reconnecting HTTP...\n");
+       TALLOC_FREE(mds_es_ctx->http_conn);
+
+       subreq = mds_es_connect_send(
+                       mds_es_ctx,
+                       mds_es_ctx->mdssvc_es_ctx->mdssvc_ctx->ev_ctx,
+                       mds_es_ctx);
+       if (subreq == NULL) {
+               DBG_ERR("mds_es_connect_send failed\n");
+               return;
+       }
+       tevent_req_set_callback(subreq, mds_es_connected, mds_es_ctx);
+}
+
+static int search_destructor(struct sl_es_search *s)
+{
+       DLIST_REMOVE(s->mds_es_ctx->searches, s);
+       return 0;
+}
+
+static struct tevent_req *mds_es_search_send(TALLOC_CTX *mem_ctx,
+                                            struct tevent_context *ev,
+                                            struct sl_es_search *s);
+static int mds_es_search_recv(struct tevent_req *req);
+static void mds_es_search_done(struct tevent_req *subreq);
+
+static bool mds_es_search(struct sl_query *slq)
+{
+       struct mds_es_ctx *mds_es_ctx = talloc_get_type_abort(
+               slq->mds_ctx->backend_private, struct mds_es_ctx);
+       struct sl_es_search *s = NULL;
+       bool ok;
+
+       s = talloc_zero(slq, struct sl_es_search);
+       if (s == NULL) {
+               return false;
+       }
+       *s = (struct sl_es_search) {
+               .ev = mds_es_ctx->mdssvc_es_ctx->mdssvc_ctx->ev_ctx,
+               .mds_es_ctx = mds_es_ctx,
+               .slq = slq,
+               .size = MAX_SL_RESULTS,
+       };
+
+       /* 0 would mean no limit */
+       s->max = lp_parm_ulonglong(s->slq->mds_ctx->snum,
+                                  "elasticsearch",
+                                  "max results",
+                                  MAX_SL_RESULTS);
+
+       DBG_DEBUG("Spotlight query: '%s'\n", slq->query_string);
+
+       ok = map_spotlight_to_es_query(
+               s,
+               mds_es_ctx->mdssvc_es_ctx->mappings,
+               slq->path_scope,
+               slq->query_string,
+               &s->es_query);
+       if (!ok) {
+               TALLOC_FREE(s);
+               return false;
+       }
+       DBG_DEBUG("Elasticsearch query: '%s'\n", s->es_query);
+
+       slq->backend_private = s;
+       slq->state = SLQ_STATE_RUNNING;
+       DLIST_ADD_END(mds_es_ctx->searches, s);
+       talloc_set_destructor(s, search_destructor);
+
+       return mds_es_next_search_trigger(mds_es_ctx);
+}
+
+static bool mds_es_next_search_trigger(struct mds_es_ctx *mds_es_ctx)
+{
+       struct tevent_req *subreq = NULL;
+       struct sl_es_search *s = mds_es_ctx->searches;
+
+       if (mds_es_ctx->http_conn == NULL) {
+               DBG_DEBUG("Waiting for HTTP connection...\n");
+               return true;
+       }
+       if (s == NULL) {
+               DBG_DEBUG("No pending searches, idling...\n");
+               return true;
+       }
+       if (s->pending) {
+               DBG_DEBUG("Search pending [%p]\n", s);
+               return true;
+       }
+
+       subreq = mds_es_search_send(s, s->ev, s);
+       if (subreq == NULL) {
+               return false;
+       }
+       tevent_req_set_callback(subreq, mds_es_search_done, s);
+       return true;
+}
+
+static void mds_es_search_done(struct tevent_req *subreq)
+{
+       struct sl_es_search *s = tevent_req_callback_data(
+               subreq, struct sl_es_search);
+       struct mds_es_ctx *mds_es_ctx = s->mds_es_ctx;
+       struct sl_query *slq = s->slq;
+       int ret;
+       bool ok;
+
+       DBG_DEBUG("Search done for search [%p]\n", s);
+
+       DLIST_REMOVE(mds_es_ctx->searches, s);
+
+       ret = mds_es_search_recv(subreq);
+       TALLOC_FREE(subreq);
+       if (ret != 0) {
+               mds_es_reconnect_on_error(s);
+               return;
+       }
+
+       if (slq == NULL) {
+               /*
+                * Closed by the user. This is the only place where we free "s"
+                * explicitly because the talloc parent slq is already gone.
+                * Everywhere else we rely on the destructor of slq to free s"."
+                */
+               TALLOC_FREE(s);
+               goto trigger;
+       }
+
+       SLQ_DEBUG(10, slq, "search done");
+
+       if (s->total == 0 || s->from >= s->max) {
+               slq->state = SLQ_STATE_DONE;
+               goto trigger;
+       }
+
+       if (slq->query_results->num_results >= MAX_SL_RESULTS) {
+               slq->state = SLQ_STATE_FULL;
+               goto trigger;
+       }
+
+       /*
+        * Reschedule this query as there are more results waiting in the
+        * Elasticsearch server and the client result queue has room as
+        * well. But put it at the end of the list of active queries as a simple
+        * heuristic that should ensure all client queries are dispatched to the
+        * server.
+        */
+       DLIST_ADD_END(mds_es_ctx->searches, s);
+
+trigger:
+       ok = mds_es_next_search_trigger(mds_es_ctx);
+       if (!ok) {
+               DBG_ERR("mds_es_next_search_trigger failed\n");
+       }
+}
+
+static void mds_es_search_http_send_done(struct tevent_req *subreq);
+static void mds_es_search_http_read_done(struct tevent_req *subreq);
+
+struct mds_es_search_state {
+       struct tevent_context *ev;
+       struct sl_es_search *s;
+       struct tevent_queue_entry *qe;
+       struct http_request http_request;
+       struct http_request *http_response;
+};
+
+static int mds_es_search_pending_destructor(struct sl_es_search *s)
+{
+       /*
+        * s is a child of slq which may get freed when a user closes a
+        * query. To maintain the HTTP request/response sequence on the HTTP
+        * channel, we keep processing pending requests and free s when we
+        * receive the HTTP response for pending requests.
+        */
+       DBG_DEBUG("Preserving pending search [%p]\n", s);
+       s->slq = NULL;
+       return -1;
+}
+
+static void mds_es_search_set_pending(struct sl_es_search *s)
+{
+       DBG_DEBUG("Set pending [%p]\n", s);
+       SLQ_DEBUG(10, s->slq, "pending");
+
+       s->pending = true;
+       talloc_set_destructor(s, mds_es_search_pending_destructor);
+}
+
+static void mds_es_search_unset_pending(struct sl_es_search *s)
+{
+       DBG_DEBUG("Unset pending [%p]\n", s);
+       if (s->slq != NULL) {
+               SLQ_DEBUG(10, s->slq, "unset pending");
+       }
+
+       s->pending = false;
+       talloc_set_destructor(s, NULL);
+}
+
+static struct tevent_req *mds_es_search_send(TALLOC_CTX *mem_ctx,
+                                             struct tevent_context *ev,
+                                             struct sl_es_search *s)
+{
+       struct tevent_req *req = NULL;
+       struct tevent_req *subreq = NULL;
+       struct mds_es_search_state *state = NULL;
+       const char *index = NULL;
+       char *elastic_query = NULL;
+       char *uri = NULL;
+       size_t elastic_query_len;
+       char *elastic_query_len_str = NULL;
+       char *hostname = NULL;
+       bool pretty = false;
+
+       req = tevent_req_create(mem_ctx, &state, struct mds_es_search_state);
+       if (req == NULL) {
+               return NULL;
+       }
+       *state = (struct mds_es_search_state) {
+               .ev = ev,
+               .s = s,
+       };
+
+       if (!tevent_req_set_endtime(req, ev, timeval_current_ofs(60, 0))) {
+               return tevent_req_post(req, s->ev);
+       }
+
+       index = lp_parm_const_string(s->slq->mds_ctx->snum,
+                                    "elasticsearch",
+                                    "index",
+                                    "_all");
+       if (tevent_req_nomem(index, req)) {
+               return tevent_req_post(req, ev);
+       }
+
+       if (DEBUGLVL(10)) {
+               pretty = true;
+       }
+
+       uri = talloc_asprintf(state,
+                             "/%s/_search%s",
+                             index,
+                             pretty ? "?pretty" : "");
+       if (tevent_req_nomem(uri, req)) {
+               return tevent_req_post(req, ev);
+       }
+
+       elastic_query = talloc_asprintf(state,
+                                       MDSSVC_ELASTIC_QUERY_TEMPLATE,
+                                       s->from,
+                                       s->size,
+                                       MDSSVC_ELASTIC_SOURCES,
+                                       s->es_query);
+       if (tevent_req_nomem(elastic_query, req)) {
+               return tevent_req_post(req, ev);
+       }
+       DBG_DEBUG("Elastic query: '%s'\n", elastic_query);
+
+       elastic_query_len = strlen(elastic_query);
+
+       state->http_request = (struct http_request) {
+               .type = HTTP_REQ_POST,
+               .uri = uri,
+               .body = data_blob_const(elastic_query, elastic_query_len),
+               .major = '1',
+               .minor = '1',
+       };
+
+       elastic_query_len_str = talloc_asprintf(state, "%zu", elastic_query_len);
+       if (tevent_req_nomem(elastic_query_len_str, req)) {
+               return tevent_req_post(req, ev);
+       }
+
+       hostname = get_myname(state);
+       if (tevent_req_nomem(hostname, req)) {
+               return tevent_req_post(req, ev);
+       }
+
+       http_add_header(state, &state->http_request.headers,
+                       "Content-Type", "application/json");
+       http_add_header(state, &state->http_request.headers,
+                       "Accept", "application/json");
+       http_add_header(state, &state->http_request.headers,
+                       "User-Agent", "Samba/mdssvc");
+       http_add_header(state, &state->http_request.headers,
+                       "Host", hostname);
+       http_add_header(state, &state->http_request.headers,
+                       "Content-Length", elastic_query_len_str);
+
+       subreq = http_send_request_send(state,
+                                       ev,
+                                       s->mds_es_ctx->http_conn,
+                                       &state->http_request);
+       if (tevent_req_nomem(subreq, req)) {
+               return tevent_req_post(req, ev);
+       }
+       mds_es_search_set_pending(s);
+       tevent_req_set_callback(subreq, mds_es_search_http_send_done, req);
+       return req;
+}
+
+static void mds_es_search_http_send_done(struct tevent_req *subreq)
+{
+       struct tevent_req *req = tevent_req_callback_data(
+               subreq, struct tevent_req);
+       struct mds_es_search_state *state = tevent_req_data(
+               req, struct mds_es_search_state);
+       NTSTATUS status;
+
+       DBG_DEBUG("Sent out search [%p]\n", state->s);
+
+       status = http_send_request_recv(subreq);
+       TALLOC_FREE(subreq);
+       if (!NT_STATUS_IS_OK(status)) {
+               tevent_req_error(req, map_errno_from_nt_status(status));
+               return;
+       }
+
+       if (state->s->mds_es_ctx->mds_ctx == NULL) {
+               mds_es_search_unset_pending(state->s);
+               tevent_req_error(req, ECANCELED);
+               return;
+       }
+
+       subreq = http_read_response_send(state,
+                                        state->ev,
+                                        state->s->mds_es_ctx->http_conn,
+                                        MAX_SL_RESULTS * 8192);
+       if (tevent_req_nomem(subreq, req)) {
+               return;
+       }
+       tevent_req_set_callback(subreq, mds_es_search_http_read_done, req);
+}
+
+static void mds_es_search_http_read_done(struct tevent_req *subreq)
+{
+       struct tevent_req *req = tevent_req_callback_data(
+               subreq, struct tevent_req);
+       struct mds_es_search_state *state = tevent_req_data(
+               req, struct mds_es_search_state);
+       struct sl_es_search *s = state->s;
+       struct sl_query *slq = s->slq;
+       json_t *root = NULL;
+       json_t *matches = NULL;
+       json_t *match = NULL;
+       size_t i;
+       json_error_t error;
+       int hits;
+       NTSTATUS status;
+       int ret;
+       bool ok;
+
+       DBG_DEBUG("Got response for search [%p]\n", s);
+
+       mds_es_search_unset_pending(s);
+
+       status = http_read_response_recv(subreq, state, &state->http_response);
+       TALLOC_FREE(subreq);
+       if (!NT_STATUS_IS_OK(status)) {
+               DBG_DEBUG("HTTP response failed: %s\n", nt_errstr(status));
+               tevent_req_error(req, map_errno_from_nt_status(status));
+               return;
+       }
+
+       if (slq == NULL) {
+               tevent_req_done(req);
+               return;
+       }
+       if (s->mds_es_ctx->mds_ctx == NULL) {
+               tevent_req_error(req, ECANCELED);
+               return;
+       }
+
+       switch (state->http_response->response_code) {
+       case 200:
+               break;
+       default:
+               DBG_ERR("HTTP server response: %u\n",
+                       state->http_response->response_code);
+               goto fail;
+       }
+
+       DBG_DEBUG("JSON response:\n%s\n",
+                 talloc_strndup(talloc_tos(),
+                                (char *)state->http_response->body.data,
+                                state->http_response->body.length));
+
+       root = json_loadb((char *)state->http_response->body.data,
+                         state->http_response->body.length,
+                         0,
+                         &error);
+       if (root == NULL) {
+               DBG_ERR("json_loadb failed\n");
+               goto fail;
+       }
+
+       if (s->total == 0) {
+               /*
+                * Get the total number of results the first time, format
+                * used by Elasticsearch 7.0 or newer
+                */
+               ret = json_unpack(root, "{s: {s: {s: i}}}",
+                                 "hits", "total", "value", &s->total);
+               if (ret != 0) {
+                       /* Format used before 7.0 */
+                       ret = json_unpack(root, "{s: {s: i}}",
+                                         "hits", "total", &s->total);
+                       if (ret != 0) {
+                               DBG_ERR("json_unpack failed\n");
+                               goto fail;
+                       }
+               }
+
+               DBG_DEBUG("Total: %zu\n", s->total);
+
+               if (s->total == 0) {
+                       json_decref(root);
+                       tevent_req_done(req);
+                       return;
+               }
+       }
+
+       if (s->max == 0 || s->max > s->total) {
+               s->max = s->total;
+       }
+
+       ret = json_unpack(root, "{s: {s:o}}",
+                         "hits", "hits", &matches);
+       if (ret != 0 || matches == NULL) {
+               DBG_ERR("json_unpack hits failed\n");
+               goto fail;
+       }
+
+       hits = json_array_size(matches);
+       if (hits == 0) {
+               DBG_ERR("Hu?! No results?\n");
+               goto fail;
+       }
+       DBG_DEBUG("Hits: %d\n", hits);
+
+       for (i = 0; i < hits; i++) {
+               const char *path = NULL;
+
+               match = json_array_get(matches, i);
+               if (match == NULL) {
+                       DBG_ERR("Hu?! No value for index %zu\n", i);
+                       goto fail;
+               }
+               ret = json_unpack(match,
+                                 "{s: {s: {s: s}}}",
+                                 "_source",
+                                 "path",
+                                 "real",
+                                 &path);
+               if (ret != 0) {
+                       DBG_ERR("Missing path.real in JSON result\n");
+                       goto fail;
+               }
+
+               ok = mds_add_result(slq, path);
+               if (!ok) {
+                       DBG_ERR("error adding result for path: %s\n", path);
+                       goto fail;
+               }
+       }
+       json_decref(root);
+
+       s->from += hits;
+       slq->state = SLQ_STATE_RESULTS;
+       tevent_req_done(req);
+       return;
+
+fail:
+       if (root != NULL) {
+               json_decref(root);
+       }
+       slq->state = SLQ_STATE_ERROR;
+       tevent_req_error(req, EINVAL);
+       return;
+}
+
+static int mds_es_search_recv(struct tevent_req *req)
+{
+       return tevent_req_simple_recv_unix(req);
+}
+
+static bool mds_es_search_cont(struct sl_query *slq)
+{
+       struct sl_es_search *s = talloc_get_type_abort(
+               slq->backend_private, struct sl_es_search);
+
+       SLQ_DEBUG(10, slq, "continue");
+       DLIST_ADD_END(s->mds_es_ctx->searches, s);
+       return mds_es_next_search_trigger(s->mds_es_ctx);
+}
+
+struct mdssvc_backend mdsscv_backend_es = {
+       .init = mdssvc_es_init,
+       .shutdown = mdssvc_es_shutdown,
+       .connect = mds_es_connect,
+       .search_start = mds_es_search,
+       .search_cont = mds_es_search_cont,
+};
diff --git a/source3/rpc_server/mdssvc/mdssvc_es.h b/source3/rpc_server/mdssvc/mdssvc_es.h
new file mode 100644 (file)
index 0000000..19797fa
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+   Unix SMB/CIFS implementation.
+   Main metadata server / Spotlight routines / HTTP/ES/JSON backend
+
+   Copyright (C) Ralph Boehme                  2019
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _MDSSVC_ES_H_
+#define _MDSSVC_ES_H_
+
+#include <jansson.h>
+
+/*
+ * Some global state
+ */
+struct mdssvc_es_ctx {
+       struct mdssvc_ctx *mdssvc_ctx;
+       struct cli_credentials *creds;
+       json_t *mappings;
+};
+
+/*
+ * Per mdssvc RPC bind state
+ */
+struct mds_es_ctx {
+       /*
+        * Pointer to higher level mds_ctx
+        */
+       struct mds_ctx *mds_ctx;
+
+       /*
+        * Pointer to our global context
+        */
+       struct mdssvc_es_ctx *mdssvc_es_ctx;
+
+       /*
+        * The HTTP connection handle to the ES server
+        */
+       struct http_conn *http_conn;
+
+       /*
+        * List of pending searches
+        */
+       struct sl_es_search *searches;
+};
+
+/* Per search request */
+struct sl_es_search {
+       /*
+        * List pointers
+        */
+       struct sl_es_search *prev, *next;
+
+       /*
+        * Search is being executed. Only the list head can be pending.
+        */
+       bool pending;
+
+       /*
+        * Shorthand to our tevent context
+        */
+       struct tevent_context *ev;
+
+       /*
+        * Pointer to the RPC connection ctx the request is using
+        */
+       struct mds_es_ctx *mds_es_ctx;
+
+       /*
+        * The upper mdssvc.c level query context
+        */
+       struct sl_query *slq;
+
+       /*
+        * Maximum number of results we process and total number of
+        * results of a query.
+        */
+       size_t total;
+       size_t max;
+
+       /*
+        * For paging results
+        */
+       size_t from;
+       size_t size;
+
+       /*
+        * The translated Es query
+        */
+       char *es_query;
+};
+
+extern struct mdssvc_backend mdsscv_backend_es;
+
+#endif /* _MDSSVC_ES_H_ */
index 429c51b55cd19e9791fabec86f33bbccdf737414..870eb3d39d60195e390960e4c2926f58f41a7531 100644 (file)
@@ -161,6 +161,18 @@ if bld.env.spotlight_backend_tracker:
                           '''
     rpc_mdssvc_deps += 'tevent-glib-glue ' + bld.env['libtracker']
 
+if bld.env.spotlight_backend_es:
+    rpc_mdssvc_sources += '''
+                          mdssvc/mdssvc_es.c
+                          mdssvc/es_mapping.c
+                          mdssvc/es_parser.y
+                          mdssvc/es_lexer.l
+                          '''
+    rpc_mdssvc_deps += ' http jansson'
+    if bld.SAMBA3_IS_ENABLED_MODULE('rpc_mdssvc_module'):
+        bld.INSTALL_FILES(bld.env.SAMBA_DATADIR,
+                          'mdssvc/elasticsearch_mappings.json')
+
 bld.SAMBA3_MODULE('rpc_mdssvc_module',
                   subsystem='rpc',
                   allow_undefined_symbols=True,
index e29bf657b5e9e831fa45117b3b0bf9f1685aaddd..a6092b87615812c75005cf6bf108e956450785fd 100644 (file)
@@ -1789,6 +1789,13 @@ main() {
         and conf.CONFIG_GET('HAVE_UTF8_NORMALISATION')
     )
 
+    with_spotlight_es_backend = (
+        conf.CONFIG_SET('HAVE_JSON_OBJECT')
+        and conf.env['BISON']
+        and conf.env['FLEX']
+        and conf.CONFIG_GET('HAVE_UTF8_NORMALISATION')
+    )
+
     conf.env.with_spotlight = False
     if Options.options.with_spotlight is not False:
         backends = ['noindex']
@@ -1804,14 +1811,23 @@ main() {
             Logs.warn('Missing libtracker-sparql development files for Spotlight backend "tracker"')
         if not conf.CONFIG_SET('HAVE_GLIB'):
             Logs.warn('Missing glib-2.0 development files for Spotlight backend "tracker"')
+        if not conf.CONFIG_GET('HAVE_JSON_OBJECT'):
+            Logs.warn('Missing libjansson development files for Spotlight backend "elasticsearch"')
 
         if with_spotlight_tracker_backend:
             conf.env.spotlight_backend_tracker = True
             backends.append('tracker')
             conf.DEFINE('HAVE_SPOTLIGHT_BACKEND_TRACKER', '1')
 
-        if Options.options.with_spotlight is True and not conf.env.spotlight_backend_tracker:
-            conf.fatal("Unmet dependencies for Spotlight backend")
+        if with_spotlight_es_backend:
+            conf.env.spotlight_backend_es = True
+            backends.append('elasticsearch')
+            conf.DEFINE('HAVE_SPOTLIGHT_BACKEND_ES', '1')
+
+        if (Options.options.with_spotlight is True
+            and not conf.env.spotlight_backend_tracker
+            and not conf.env.spotlight_backend_es):
+            conf.fatal("Unmet dependencies for Spotlight backends")
 
         Logs.info("Building with Spotlight support, available backends: %s" % ', '.join(backends))
         default_static_modules.extend(TO_LIST('rpc_mdssvc_module'))
index d49512e59e7846a2d69d6be9a6082d8ba3c5bb1c..7d44e843c49d6721143811300a07166f1bc2145b 100644 (file)
@@ -1335,6 +1335,16 @@ bld.SAMBA3_BINARY('spotlight2sparql',
                  enabled=bld.env.spotlight_backend_tracker,
                  install=False)
 
+bld.SAMBA3_BINARY('spotlight2es',
+                 source='''
+                 rpc_server/mdssvc/es_parser_test.c
+                 rpc_server/mdssvc/es_parser.y
+                 rpc_server/mdssvc/es_lexer.l
+                 rpc_server/mdssvc/es_mapping.c''',
+                 deps='samba3-util talloc jansson smbconf',
+                 enabled=bld.env.spotlight_backend_es,
+                 install=False)
+
 bld.SAMBA3_BINARY('tevent_glib_glue_test',
                  source='lib/tevent_glib_glue_tests.c',
                  deps='''