Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTeodor Sigaev2018-04-07 17:58:03 +0000
committerTeodor Sigaev2018-04-07 17:58:03 +0000
commit1c1791e00065f6986f9d44a78ce7c28b2d1322dd (patch)
tree7287b278140f8fe30ceb289146afc4fdcc97a040 /src/backend
parent529ab7bd1fb9c836fe5ccd96f79329d407522e20 (diff)
Add json(b)_to_tsvector function
Jsonb has a complex nature so there isn't best-for-everything way to convert it to tsvector for full text search. Current to_tsvector(json(b)) suggests to convert only string values, but it's possible to index keys, numerics and even booleans value. To solve that json(b)_to_tsvector has a second required argument contained a list of desired types of json fields. Second argument is a jsonb scalar or array right now with possibility to add new options in a future. Bump catalog version Author: Dmitry Dolgov with some editorization by me Reviewed by: Teodor Sigaev Discussion: https://www.postgresql.org/message-id/CA+q6zcXJQbS1b4kJ_HeAOoOc=unfnOrUEL=KGgE32QKDww7d8g@mail.gmail.com
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/tsearch/to_tsany.c128
-rw-r--r--src/backend/utils/adt/jsonfuncs.c179
2 files changed, 271 insertions, 36 deletions
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index 6055fb6b4e5..2474b723b41 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -267,12 +267,12 @@ to_tsvector(PG_FUNCTION_ARGS)
PointerGetDatum(in)));
}
-Datum
-jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
+/*
+ * Worker function for jsonb(_string)_to_tsvector(_byid)
+ */
+static TSVector
+jsonb_to_tsvector_worker(Oid cfgId, Jsonb *jb, uint32 flags)
{
- Oid cfgId = PG_GETARG_OID(0);
- Jsonb *jb = PG_GETARG_JSONB_P(1);
- TSVector result;
TSVectorBuildState state;
ParsedText prs;
@@ -281,33 +281,77 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
state.prs = &prs;
state.cfgId = cfgId;
- iterate_jsonb_string_values(jb, &state, add_to_tsvector);
+ iterate_jsonb_values(jb, flags, &state, add_to_tsvector);
- PG_FREE_IF_COPY(jb, 1);
+ return make_tsvector(&prs);
+}
+
+Datum
+jsonb_string_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+ Oid cfgId = PG_GETARG_OID(0);
+ Jsonb *jb = PG_GETARG_JSONB_P(1);
+ TSVector result;
- result = make_tsvector(&prs);
+ result = jsonb_to_tsvector_worker(cfgId, jb, jtiString);
+ PG_FREE_IF_COPY(jb, 1);
PG_RETURN_TSVECTOR(result);
}
Datum
-jsonb_to_tsvector(PG_FUNCTION_ARGS)
+jsonb_string_to_tsvector(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB_P(0);
Oid cfgId;
+ TSVector result;
cfgId = getTSCurrentConfig(true);
- PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid,
- ObjectIdGetDatum(cfgId),
- JsonbPGetDatum(jb)));
+ result = jsonb_to_tsvector_worker(cfgId, jb, jtiString);
+ PG_FREE_IF_COPY(jb, 0);
+
+ PG_RETURN_TSVECTOR(result);
}
Datum
-json_to_tsvector_byid(PG_FUNCTION_ARGS)
+jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
{
Oid cfgId = PG_GETARG_OID(0);
- text *json = PG_GETARG_TEXT_P(1);
+ Jsonb *jb = PG_GETARG_JSONB_P(1);
+ Jsonb *jbFlags = PG_GETARG_JSONB_P(2);
+ TSVector result;
+ uint32 flags = parse_jsonb_index_flags(jbFlags);
+
+ result = jsonb_to_tsvector_worker(cfgId, jb, flags);
+ PG_FREE_IF_COPY(jb, 1);
+ PG_FREE_IF_COPY(jbFlags, 2);
+
+ PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+jsonb_to_tsvector(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ Jsonb *jbFlags = PG_GETARG_JSONB_P(1);
+ Oid cfgId;
TSVector result;
+ uint32 flags = parse_jsonb_index_flags(jbFlags);
+
+ cfgId = getTSCurrentConfig(true);
+ result = jsonb_to_tsvector_worker(cfgId, jb, flags);
+ PG_FREE_IF_COPY(jb, 0);
+ PG_FREE_IF_COPY(jbFlags, 1);
+
+ PG_RETURN_TSVECTOR(result);
+}
+
+/*
+ * Worker function for json(_string)_to_tsvector(_byid)
+ */
+static TSVector
+json_to_tsvector_worker(Oid cfgId, text *json, uint32 flags)
+{
TSVectorBuildState state;
ParsedText prs;
@@ -316,11 +360,50 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS)
state.prs = &prs;
state.cfgId = cfgId;
- iterate_json_string_values(json, &state, add_to_tsvector);
+ iterate_json_values(json, flags, &state, add_to_tsvector);
+
+ return make_tsvector(&prs);
+}
+
+Datum
+json_string_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+ Oid cfgId = PG_GETARG_OID(0);
+ text *json = PG_GETARG_TEXT_P(1);
+ TSVector result;
+ result = json_to_tsvector_worker(cfgId, json, jtiString);
PG_FREE_IF_COPY(json, 1);
- result = make_tsvector(&prs);
+ PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+json_string_to_tsvector(PG_FUNCTION_ARGS)
+{
+ text *json = PG_GETARG_TEXT_P(0);
+ Oid cfgId;
+ TSVector result;
+
+ cfgId = getTSCurrentConfig(true);
+ result = json_to_tsvector_worker(cfgId, json, jtiString);
+ PG_FREE_IF_COPY(json, 0);
+
+ PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+json_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+ Oid cfgId = PG_GETARG_OID(0);
+ text *json = PG_GETARG_TEXT_P(1);
+ Jsonb *jbFlags = PG_GETARG_JSONB_P(2);
+ TSVector result;
+ uint32 flags = parse_jsonb_index_flags(jbFlags);
+
+ result = json_to_tsvector_worker(cfgId, json, flags);
+ PG_FREE_IF_COPY(json, 1);
+ PG_FREE_IF_COPY(jbFlags, 2);
PG_RETURN_TSVECTOR(result);
}
@@ -329,12 +412,17 @@ Datum
json_to_tsvector(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_P(0);
+ Jsonb *jbFlags = PG_GETARG_JSONB_P(1);
Oid cfgId;
+ TSVector result;
+ uint32 flags = parse_jsonb_index_flags(jbFlags);
cfgId = getTSCurrentConfig(true);
- PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid,
- ObjectIdGetDatum(cfgId),
- PointerGetDatum(json)));
+ result = json_to_tsvector_worker(cfgId, json, flags);
+ PG_FREE_IF_COPY(json, 0);
+ PG_FREE_IF_COPY(jbFlags, 1);
+
+ PG_RETURN_TSVECTOR(result);
}
/*
@@ -353,7 +441,7 @@ add_to_tsvector(void *_state, char *elem_value, int elem_len)
* First time through: initialize words array to a reasonable size.
* (parsetext() will realloc it bigger as needed.)
*/
- prs->lenwords = Max(elem_len / 6, 64);
+ prs->lenwords = 16;
prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
prs->curwords = 0;
prs->pos = 0;
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index 805a1a08940..2f12d0325ab 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -60,6 +60,7 @@ typedef struct IterateJsonStringValuesState
JsonIterateStringValuesAction action; /* an action that will be applied
* to each json value */
void *action_state; /* any necessary context for iteration */
+ uint32 flags; /* what kind of elements from a json we want to iterate */
} IterateJsonStringValuesState;
/* state for transform_json_string_values function */
@@ -474,8 +475,9 @@ static void setPathArray(JsonbIterator **it, Datum *path_elems,
int level, Jsonb *newval, uint32 nelems, int op_type);
static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb);
-/* function supporting iterate_json_string_values */
-static void iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
+/* function supporting iterate_json_values */
+static void iterate_values_scalar(void *state, char *token, JsonTokenType tokentype);
+static void iterate_values_object_field_start(void *state, char *fname, bool isnull);
/* functions supporting transform_json_string_values */
static void transform_string_values_object_start(void *state);
@@ -4939,11 +4941,79 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
}
/*
- * Iterate over jsonb string values or elements, and pass them together with an
- * iteration state to a specified JsonIterateStringValuesAction.
+ * Parse information about what elements of a jsonb document we want to iterate
+ * in functions iterate_json(b)_values. This information is presented in jsonb
+ * format, so that it can be easily extended in the future.
+ */
+uint32
+parse_jsonb_index_flags(Jsonb *jb)
+{
+ JsonbIterator *it;
+ JsonbValue v;
+ JsonbIteratorToken type;
+ uint32 flags = 0;
+
+ it = JsonbIteratorInit(&jb->root);
+
+ type = JsonbIteratorNext(&it, &v, false);
+
+ /*
+ * We iterate over array (scalar internally is represented as array, so, we
+ * will accept it too) to check all its elements. Flag's names are choosen
+ * the same as jsonb_typeof uses.
+ */
+ if (type != WJB_BEGIN_ARRAY)
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("wrong flag type, only arrays and scalars are allowed")));
+
+ while ((type = JsonbIteratorNext(&it, &v, false)) == WJB_ELEM)
+ {
+ if (v.type != jbvString)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("flag array element is not a string"),
+ errhint("Possible values are: \"string\", \"numeric\", \"boolean\", \"key\" and \"all\"")));
+
+ if (v.val.string.len == 3 &&
+ pg_strncasecmp(v.val.string.val, "all", 3) == 0)
+ flags |= jtiAll;
+ else if (v.val.string.len == 3 &&
+ pg_strncasecmp(v.val.string.val, "key", 3) == 0)
+ flags |= jtiKey;
+ else if (v.val.string.len == 6 &&
+ pg_strncasecmp(v.val.string.val, "string", 5) == 0)
+ flags |= jtiString;
+ else if (v.val.string.len == 7 &&
+ pg_strncasecmp(v.val.string.val, "numeric", 7) == 0)
+ flags |= jtiNumeric;
+ else if (v.val.string.len == 7 &&
+ pg_strncasecmp(v.val.string.val, "boolean", 7) == 0)
+ flags |= jtiBool;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("wrong flag in flag array: \"%s\"",
+ pnstrdup(v.val.string.val, v.val.string.len)),
+ errhint("Possible values are: \"string\", \"numeric\", \"boolean\", \"key\" and \"all\"")));
+ }
+
+ /* user should not get it */
+ if (type != WJB_END_ARRAY)
+ elog(ERROR, "unexpected end of flag array");
+
+ /* get final WJB_DONE and free iterator */
+ JsonbIteratorNext(&it, &v, false);
+
+ return flags;
+}
+
+/*
+ * Iterate over jsonb values or elements, specified by flags, and pass them
+ * together with an iteration state to a specified JsonIterateStringValuesAction.
*/
void
-iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesAction action)
+iterate_jsonb_values(Jsonb *jb, uint32 flags, void *state,
+ JsonIterateStringValuesAction action)
{
JsonbIterator *it;
JsonbValue v;
@@ -4951,21 +5021,67 @@ iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesActio
it = JsonbIteratorInit(&jb->root);
+ /*
+ * Just recursively iterating over jsonb and call callback on all
+ * correspoding elements
+ */
while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
{
- if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+ if (type == WJB_KEY)
+ {
+ if (flags & jtiKey)
+ action(state, v.val.string.val, v.val.string.len);
+
+ continue;
+ }
+ else if (!(type == WJB_VALUE || type == WJB_ELEM))
{
- action(state, v.val.string.val, v.val.string.len);
+ /* do not call callback for composite JsonbValue */
+ continue;
+ }
+
+ /* JsonbValue is a value of object or element of array */
+ switch(v.type)
+ {
+ case jbvString:
+ if (flags & jtiString)
+ action(state, v.val.string.val, v.val.string.len);
+ break;
+ case jbvNumeric:
+ if (flags & jtiNumeric)
+ {
+ char *val;
+
+ val = DatumGetCString(DirectFunctionCall1(numeric_out,
+ NumericGetDatum(v.val.numeric)));
+
+ action(state, val, strlen(val));
+ pfree(val);
+ }
+ break;
+ case jbvBool:
+ if (flags & jtiBool)
+ {
+ if (v.val.boolean)
+ action(state, "true", 4);
+ else
+ action(state, "false", 5);
+ }
+ break;
+ default:
+ /* do not call callback for composite JsonbValue */
+ break;
}
}
}
/*
- * Iterate over json string values or elements, and pass them together with an
- * iteration state to a specified JsonIterateStringValuesAction.
+ * Iterate over json values and elements, specified by flags, and pass them
+ * together with an iteration state to a specified JsonIterateStringValuesAction.
*/
void
-iterate_json_string_values(text *json, void *action_state, JsonIterateStringValuesAction action)
+iterate_json_values(text *json, uint32 flags, void *action_state,
+ JsonIterateStringValuesAction action)
{
JsonLexContext *lex = makeJsonLexContext(json, true);
JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
@@ -4974,24 +5090,55 @@ iterate_json_string_values(text *json, void *action_state, JsonIterateStringValu
state->lex = lex;
state->action = action;
state->action_state = action_state;
+ state->flags = flags;
sem->semstate = (void *) state;
- sem->scalar = iterate_string_values_scalar;
+ sem->scalar = iterate_values_scalar;
+ sem->object_field_start = iterate_values_object_field_start;
pg_parse_json(lex, sem);
}
/*
- * An auxiliary function for iterate_json_string_values to invoke a specified
- * JsonIterateStringValuesAction.
+ * An auxiliary function for iterate_json_values to invoke a specified
+ * JsonIterateStringValuesAction for specified values.
*/
static void
-iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype)
+iterate_values_scalar(void *state, char *token, JsonTokenType tokentype)
{
IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state;
- if (tokentype == JSON_TOKEN_STRING)
- _state->action(_state->action_state, token, strlen(token));
+ switch(tokentype)
+ {
+ case JSON_TOKEN_STRING:
+ if (_state->flags & jtiString)
+ _state->action(_state->action_state, token, strlen(token));
+ break;
+ case JSON_TOKEN_NUMBER:
+ if (_state->flags & jtiNumeric)
+ _state->action(_state->action_state, token, strlen(token));
+ break;
+ case JSON_TOKEN_TRUE:
+ case JSON_TOKEN_FALSE:
+ if (_state->flags & jtiBool)
+ _state->action(_state->action_state, token, strlen(token));
+ break;
+ default:
+ /* do not call callback for any other token */
+ break;
+ }
+}
+
+static void
+iterate_values_object_field_start(void *state, char *fname, bool isnull)
+{
+ IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state;
+
+ if (_state->flags & jtiKey)
+ {
+ char *val = pstrdup(fname);
+ _state->action(_state->action_state, val, strlen(val));
+ }
}
/*