diff options
author | Teodor Sigaev | 2018-04-07 17:58:03 +0000 |
---|---|---|
committer | Teodor Sigaev | 2018-04-07 17:58:03 +0000 |
commit | 1c1791e00065f6986f9d44a78ce7c28b2d1322dd (patch) | |
tree | 7287b278140f8fe30ceb289146afc4fdcc97a040 /src/backend | |
parent | 529ab7bd1fb9c836fe5ccd96f79329d407522e20 (diff) |
Add json(b)_to_tsvector function
Jsonb has a complex nature so there isn't best-for-everything way to convert it
to tsvector for full text search. Current to_tsvector(json(b)) suggests to
convert only string values, but it's possible to index keys, numerics and even
booleans value. To solve that json(b)_to_tsvector has a second required
argument contained a list of desired types of json fields. Second argument is
a jsonb scalar or array right now with possibility to add new options in a
future.
Bump catalog version
Author: Dmitry Dolgov with some editorization by me
Reviewed by: Teodor Sigaev
Discussion: https://www.postgresql.org/message-id/CA+q6zcXJQbS1b4kJ_HeAOoOc=unfnOrUEL=KGgE32QKDww7d8g@mail.gmail.com
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/tsearch/to_tsany.c | 128 | ||||
-rw-r--r-- | src/backend/utils/adt/jsonfuncs.c | 179 |
2 files changed, 271 insertions, 36 deletions
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c index 6055fb6b4e5..2474b723b41 100644 --- a/src/backend/tsearch/to_tsany.c +++ b/src/backend/tsearch/to_tsany.c @@ -267,12 +267,12 @@ to_tsvector(PG_FUNCTION_ARGS) PointerGetDatum(in))); } -Datum -jsonb_to_tsvector_byid(PG_FUNCTION_ARGS) +/* + * Worker function for jsonb(_string)_to_tsvector(_byid) + */ +static TSVector +jsonb_to_tsvector_worker(Oid cfgId, Jsonb *jb, uint32 flags) { - Oid cfgId = PG_GETARG_OID(0); - Jsonb *jb = PG_GETARG_JSONB_P(1); - TSVector result; TSVectorBuildState state; ParsedText prs; @@ -281,33 +281,77 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS) state.prs = &prs; state.cfgId = cfgId; - iterate_jsonb_string_values(jb, &state, add_to_tsvector); + iterate_jsonb_values(jb, flags, &state, add_to_tsvector); - PG_FREE_IF_COPY(jb, 1); + return make_tsvector(&prs); +} + +Datum +jsonb_string_to_tsvector_byid(PG_FUNCTION_ARGS) +{ + Oid cfgId = PG_GETARG_OID(0); + Jsonb *jb = PG_GETARG_JSONB_P(1); + TSVector result; - result = make_tsvector(&prs); + result = jsonb_to_tsvector_worker(cfgId, jb, jtiString); + PG_FREE_IF_COPY(jb, 1); PG_RETURN_TSVECTOR(result); } Datum -jsonb_to_tsvector(PG_FUNCTION_ARGS) +jsonb_string_to_tsvector(PG_FUNCTION_ARGS) { Jsonb *jb = PG_GETARG_JSONB_P(0); Oid cfgId; + TSVector result; cfgId = getTSCurrentConfig(true); - PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid, - ObjectIdGetDatum(cfgId), - JsonbPGetDatum(jb))); + result = jsonb_to_tsvector_worker(cfgId, jb, jtiString); + PG_FREE_IF_COPY(jb, 0); + + PG_RETURN_TSVECTOR(result); } Datum -json_to_tsvector_byid(PG_FUNCTION_ARGS) +jsonb_to_tsvector_byid(PG_FUNCTION_ARGS) { Oid cfgId = PG_GETARG_OID(0); - text *json = PG_GETARG_TEXT_P(1); + Jsonb *jb = PG_GETARG_JSONB_P(1); + Jsonb *jbFlags = PG_GETARG_JSONB_P(2); + TSVector result; + uint32 flags = parse_jsonb_index_flags(jbFlags); + + result = jsonb_to_tsvector_worker(cfgId, jb, flags); + PG_FREE_IF_COPY(jb, 1); + PG_FREE_IF_COPY(jbFlags, 2); + + PG_RETURN_TSVECTOR(result); +} + +Datum +jsonb_to_tsvector(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + Jsonb *jbFlags = PG_GETARG_JSONB_P(1); + Oid cfgId; TSVector result; + uint32 flags = parse_jsonb_index_flags(jbFlags); + + cfgId = getTSCurrentConfig(true); + result = jsonb_to_tsvector_worker(cfgId, jb, flags); + PG_FREE_IF_COPY(jb, 0); + PG_FREE_IF_COPY(jbFlags, 1); + + PG_RETURN_TSVECTOR(result); +} + +/* + * Worker function for json(_string)_to_tsvector(_byid) + */ +static TSVector +json_to_tsvector_worker(Oid cfgId, text *json, uint32 flags) +{ TSVectorBuildState state; ParsedText prs; @@ -316,11 +360,50 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS) state.prs = &prs; state.cfgId = cfgId; - iterate_json_string_values(json, &state, add_to_tsvector); + iterate_json_values(json, flags, &state, add_to_tsvector); + + return make_tsvector(&prs); +} + +Datum +json_string_to_tsvector_byid(PG_FUNCTION_ARGS) +{ + Oid cfgId = PG_GETARG_OID(0); + text *json = PG_GETARG_TEXT_P(1); + TSVector result; + result = json_to_tsvector_worker(cfgId, json, jtiString); PG_FREE_IF_COPY(json, 1); - result = make_tsvector(&prs); + PG_RETURN_TSVECTOR(result); +} + +Datum +json_string_to_tsvector(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_P(0); + Oid cfgId; + TSVector result; + + cfgId = getTSCurrentConfig(true); + result = json_to_tsvector_worker(cfgId, json, jtiString); + PG_FREE_IF_COPY(json, 0); + + PG_RETURN_TSVECTOR(result); +} + +Datum +json_to_tsvector_byid(PG_FUNCTION_ARGS) +{ + Oid cfgId = PG_GETARG_OID(0); + text *json = PG_GETARG_TEXT_P(1); + Jsonb *jbFlags = PG_GETARG_JSONB_P(2); + TSVector result; + uint32 flags = parse_jsonb_index_flags(jbFlags); + + result = json_to_tsvector_worker(cfgId, json, flags); + PG_FREE_IF_COPY(json, 1); + PG_FREE_IF_COPY(jbFlags, 2); PG_RETURN_TSVECTOR(result); } @@ -329,12 +412,17 @@ Datum json_to_tsvector(PG_FUNCTION_ARGS) { text *json = PG_GETARG_TEXT_P(0); + Jsonb *jbFlags = PG_GETARG_JSONB_P(1); Oid cfgId; + TSVector result; + uint32 flags = parse_jsonb_index_flags(jbFlags); cfgId = getTSCurrentConfig(true); - PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid, - ObjectIdGetDatum(cfgId), - PointerGetDatum(json))); + result = json_to_tsvector_worker(cfgId, json, flags); + PG_FREE_IF_COPY(json, 0); + PG_FREE_IF_COPY(jbFlags, 1); + + PG_RETURN_TSVECTOR(result); } /* @@ -353,7 +441,7 @@ add_to_tsvector(void *_state, char *elem_value, int elem_len) * First time through: initialize words array to a reasonable size. * (parsetext() will realloc it bigger as needed.) */ - prs->lenwords = Max(elem_len / 6, 64); + prs->lenwords = 16; prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords); prs->curwords = 0; prs->pos = 0; diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index 805a1a08940..2f12d0325ab 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -60,6 +60,7 @@ typedef struct IterateJsonStringValuesState JsonIterateStringValuesAction action; /* an action that will be applied * to each json value */ void *action_state; /* any necessary context for iteration */ + uint32 flags; /* what kind of elements from a json we want to iterate */ } IterateJsonStringValuesState; /* state for transform_json_string_values function */ @@ -474,8 +475,9 @@ static void setPathArray(JsonbIterator **it, Datum *path_elems, int level, Jsonb *newval, uint32 nelems, int op_type); static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb); -/* function supporting iterate_json_string_values */ -static void iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype); +/* function supporting iterate_json_values */ +static void iterate_values_scalar(void *state, char *token, JsonTokenType tokentype); +static void iterate_values_object_field_start(void *state, char *fname, bool isnull); /* functions supporting transform_json_string_values */ static void transform_string_values_object_start(void *state); @@ -4939,11 +4941,79 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls, } /* - * Iterate over jsonb string values or elements, and pass them together with an - * iteration state to a specified JsonIterateStringValuesAction. + * Parse information about what elements of a jsonb document we want to iterate + * in functions iterate_json(b)_values. This information is presented in jsonb + * format, so that it can be easily extended in the future. + */ +uint32 +parse_jsonb_index_flags(Jsonb *jb) +{ + JsonbIterator *it; + JsonbValue v; + JsonbIteratorToken type; + uint32 flags = 0; + + it = JsonbIteratorInit(&jb->root); + + type = JsonbIteratorNext(&it, &v, false); + + /* + * We iterate over array (scalar internally is represented as array, so, we + * will accept it too) to check all its elements. Flag's names are choosen + * the same as jsonb_typeof uses. + */ + if (type != WJB_BEGIN_ARRAY) + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("wrong flag type, only arrays and scalars are allowed"))); + + while ((type = JsonbIteratorNext(&it, &v, false)) == WJB_ELEM) + { + if (v.type != jbvString) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("flag array element is not a string"), + errhint("Possible values are: \"string\", \"numeric\", \"boolean\", \"key\" and \"all\""))); + + if (v.val.string.len == 3 && + pg_strncasecmp(v.val.string.val, "all", 3) == 0) + flags |= jtiAll; + else if (v.val.string.len == 3 && + pg_strncasecmp(v.val.string.val, "key", 3) == 0) + flags |= jtiKey; + else if (v.val.string.len == 6 && + pg_strncasecmp(v.val.string.val, "string", 5) == 0) + flags |= jtiString; + else if (v.val.string.len == 7 && + pg_strncasecmp(v.val.string.val, "numeric", 7) == 0) + flags |= jtiNumeric; + else if (v.val.string.len == 7 && + pg_strncasecmp(v.val.string.val, "boolean", 7) == 0) + flags |= jtiBool; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("wrong flag in flag array: \"%s\"", + pnstrdup(v.val.string.val, v.val.string.len)), + errhint("Possible values are: \"string\", \"numeric\", \"boolean\", \"key\" and \"all\""))); + } + + /* user should not get it */ + if (type != WJB_END_ARRAY) + elog(ERROR, "unexpected end of flag array"); + + /* get final WJB_DONE and free iterator */ + JsonbIteratorNext(&it, &v, false); + + return flags; +} + +/* + * Iterate over jsonb values or elements, specified by flags, and pass them + * together with an iteration state to a specified JsonIterateStringValuesAction. */ void -iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesAction action) +iterate_jsonb_values(Jsonb *jb, uint32 flags, void *state, + JsonIterateStringValuesAction action) { JsonbIterator *it; JsonbValue v; @@ -4951,21 +5021,67 @@ iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesActio it = JsonbIteratorInit(&jb->root); + /* + * Just recursively iterating over jsonb and call callback on all + * correspoding elements + */ while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) { - if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString) + if (type == WJB_KEY) + { + if (flags & jtiKey) + action(state, v.val.string.val, v.val.string.len); + + continue; + } + else if (!(type == WJB_VALUE || type == WJB_ELEM)) { - action(state, v.val.string.val, v.val.string.len); + /* do not call callback for composite JsonbValue */ + continue; + } + + /* JsonbValue is a value of object or element of array */ + switch(v.type) + { + case jbvString: + if (flags & jtiString) + action(state, v.val.string.val, v.val.string.len); + break; + case jbvNumeric: + if (flags & jtiNumeric) + { + char *val; + + val = DatumGetCString(DirectFunctionCall1(numeric_out, + NumericGetDatum(v.val.numeric))); + + action(state, val, strlen(val)); + pfree(val); + } + break; + case jbvBool: + if (flags & jtiBool) + { + if (v.val.boolean) + action(state, "true", 4); + else + action(state, "false", 5); + } + break; + default: + /* do not call callback for composite JsonbValue */ + break; } } } /* - * Iterate over json string values or elements, and pass them together with an - * iteration state to a specified JsonIterateStringValuesAction. + * Iterate over json values and elements, specified by flags, and pass them + * together with an iteration state to a specified JsonIterateStringValuesAction. */ void -iterate_json_string_values(text *json, void *action_state, JsonIterateStringValuesAction action) +iterate_json_values(text *json, uint32 flags, void *action_state, + JsonIterateStringValuesAction action) { JsonLexContext *lex = makeJsonLexContext(json, true); JsonSemAction *sem = palloc0(sizeof(JsonSemAction)); @@ -4974,24 +5090,55 @@ iterate_json_string_values(text *json, void *action_state, JsonIterateStringValu state->lex = lex; state->action = action; state->action_state = action_state; + state->flags = flags; sem->semstate = (void *) state; - sem->scalar = iterate_string_values_scalar; + sem->scalar = iterate_values_scalar; + sem->object_field_start = iterate_values_object_field_start; pg_parse_json(lex, sem); } /* - * An auxiliary function for iterate_json_string_values to invoke a specified - * JsonIterateStringValuesAction. + * An auxiliary function for iterate_json_values to invoke a specified + * JsonIterateStringValuesAction for specified values. */ static void -iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype) +iterate_values_scalar(void *state, char *token, JsonTokenType tokentype) { IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state; - if (tokentype == JSON_TOKEN_STRING) - _state->action(_state->action_state, token, strlen(token)); + switch(tokentype) + { + case JSON_TOKEN_STRING: + if (_state->flags & jtiString) + _state->action(_state->action_state, token, strlen(token)); + break; + case JSON_TOKEN_NUMBER: + if (_state->flags & jtiNumeric) + _state->action(_state->action_state, token, strlen(token)); + break; + case JSON_TOKEN_TRUE: + case JSON_TOKEN_FALSE: + if (_state->flags & jtiBool) + _state->action(_state->action_state, token, strlen(token)); + break; + default: + /* do not call callback for any other token */ + break; + } +} + +static void +iterate_values_object_field_start(void *state, char *fname, bool isnull) +{ + IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state; + + if (_state->flags & jtiKey) + { + char *val = pstrdup(fname); + _state->action(_state->action_state, val, strlen(val)); + } } /* |