From: Andres Freund Date: Thu, 29 Aug 2019 17:14:07 +0000 (-0700) Subject: WIP: Introduce compile-time node type metadata collection & reimplement node funcs. X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=79cac70107220a4462fb5002e1f5a533c2a60b9a;p=users%2Fandresfreund%2Fpostgres.git WIP: Introduce compile-time node type metadata collection & reimplement node funcs. Author: Reviewed-By: Discussion: https://postgr.es/m/ Backpatch: --- diff --git a/src/backend/Makefile b/src/backend/Makefile index b03d5e510f..fdc2a05988 100644 --- a/src/backend/Makefile +++ b/src/backend/Makefile @@ -53,7 +53,7 @@ endif ########################################################################## -all: submake-libpgport submake-catalog-headers submake-utils-headers postgres $(POSTGRES_IMP) +all: submake-libpgport submake-catalog-headers submake-utils-headers submake-node-data postgres $(POSTGRES_IMP) ifneq ($(PORTNAME), cygwin) ifneq ($(PORTNAME), win32) @@ -144,7 +144,11 @@ submake-catalog-headers: submake-utils-headers: $(MAKE) -C utils distprep generated-header-symlinks -.PHONY: submake-catalog-headers submake-utils-headers +# run this unconditionally to avoid needing to know its dependencies here: +submake-node-data: + $(MAKE) -C nodes distprep generated-node-data + +.PHONY: submake-catalog-headers submake-utils-headers submake-node-data # Make symlinks for these headers in the include directory. That way # we can cut down on the -I options. Also, a symlink is automatically @@ -159,7 +163,7 @@ submake-utils-headers: .PHONY: generated-headers -generated-headers: $(top_builddir)/src/include/parser/gram.h $(top_builddir)/src/include/storage/lwlocknames.h submake-catalog-headers submake-utils-headers +generated-headers: $(top_builddir)/src/include/parser/gram.h $(top_builddir)/src/include/storage/lwlocknames.h submake-catalog-headers submake-utils-headers submake-node-data $(top_builddir)/src/include/parser/gram.h: parser/gram.h prereqdir=`cd '$(dir $<)' >/dev/null && pwd` && \ diff --git a/src/backend/nodes/Makefile b/src/backend/nodes/Makefile index 0b1e98c019..d9eaa0cf25 100644 --- a/src/backend/nodes/Makefile +++ b/src/backend/nodes/Makefile @@ -13,7 +13,61 @@ top_builddir = ../../.. include $(top_builddir)/src/Makefile.global OBJS = nodeFuncs.o nodes.o list.o bitmapset.o tidbitmap.o \ - copyfuncs.o equalfuncs.o extensible.o makefuncs.o \ - outfuncs.o readfuncs.o print.o read.o params.o value.o + copyfuncs.o copyfuncs_new.o equalfuncs.o equalfuncs_new.o extensible.o \ + makefuncs.o nodeinfo_data.o outfuncs.o outfuncs_new.o readfuncs.o \ + readfuncs_new.o print.o read.o params.o value.o + +# node metadata generation +GENNODE_FILES = nodes/primnodes.h \ + nodes/pathnodes.h \ + nodes/plannodes.h \ + nodes/execnodes.h \ + nodes/memnodes.h \ + nodes/value.h \ + nodes/pg_list.h \ + nodes/extensible.h \ + nodes/parsenodes.h \ + nodes/replnodes.h \ + nodes/supportnodes.h \ + nodes/value.h \ + utils/rel.h + +all: generated-node-data + +# FIXME: If we want to support doing this during cross compilation, +# this'd need to be done using the host compiler +gennodes.o: override CFLAGS += $(LLVM_CFLAGS) +gennodes.o: override CPPFLAGS += $(LLVM_CPPFLAGS) +gennodes: override LDFLAGS += -lclang + +gennodes: | submake-libpgport + +generated-node-data: $(top_srcdir)/src/backend/nodes/nodeinfo_data.c + +# metadata generation depends on the to be the processed headers +$(top_srcdir)/src/backend/nodes/nodeinfo_data.c: \ + $(addprefix $(top_srcdir)/src/include/, $(GENNODE_FILES)) + +# But also on some other headers +$(top_srcdir)/src/backend/nodes/nodeinfo_data.c: \ + $(top_srcdir)/src/backend/nodes/gennodes.c \ + $(top_srcdir)/src/include/nodes/nodeinfo.h + +# Order only dependency on gennodes, so gennodes only needs to get +# only be built (and thus the LLVM dependency is only required in that +# case) when the node metadata is out of date. +# +# FIXME: proper error message when LLVM isn't available +# +# XXX: It'd be nicer if we'd had some more granular check whether this +# needs to be rebuilt, obviously there's plenty changes that wouldn't +# matter. +$(top_srcdir)/src/backend/nodes/nodeinfo_data.c: | gennodes + ./gennodes \ + --llvm-config $(LLVM_CONFIG) \ + --output $(top_srcdir)/src/backend/nodes/nodeinfo_data.c \ + $(GENNODE_FILES) \ + -- \ + $(CPPFLAGS) -Wno-ignored-attributes include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 26d2f467e0..690602654d 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -4733,15 +4733,8 @@ _copyForeignKeyCacheInfo(const ForeignKeyCacheInfo *from) return newnode; } - -/* - * copyObjectImpl -- implementation of copyObject(); see nodes/nodes.h - * - * Create a copy of a Node tree or list. This is a "deep" copy: all - * substructure is copied too, recursively. - */ void * -copyObjectImpl(const void *from) +copyObjectImplOld(const void *from) { void *retval; diff --git a/src/backend/nodes/copyfuncs_new.c b/src/backend/nodes/copyfuncs_new.c new file mode 100644 index 0000000000..a080b7ce95 --- /dev/null +++ b/src/backend/nodes/copyfuncs_new.c @@ -0,0 +1,359 @@ +#include "postgres.h" + +#include "miscadmin.h" +#include "nodes/nodes.h" +#include "nodes/nodeinfo.h" +#include "nodes/pg_list.h" +#include "nodes/primnodes.h" +#include "nodes/value.h" +#include "utils/datum.h" + + +typedef struct CopyNodeContext +{ + size_t required_space; + size_t used_space; + char *space; +} CopyNodeContext; + + +static Node* nodecopy_new_rec(CopyNodeContext *context, const Node *obj); +static void nodecopy_fields(CopyNodeContext *context, Node *dst, const Node *src, const TINodeType *type_info); +static List* nodecopy_list(CopyNodeContext *context, const List *obj, NodeTag tag); +static void nodecopy_value_union(CopyNodeContext *context, Value *dst, const Value *src); + + +#define BITMAPSET_SIZE(nwords) \ + (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword)) + +/* + * copyObjectImpl -- implementation of copyObject(); see nodes/nodes.h + * + * Create a copy of a Node tree or list. This is a "deep" copy: all + * substructure is copied too, recursively. + */ +void * +copyObjectImpl(const void *from) +{ +#ifdef USE_NEW_NODE_FUNCS + return copyObjectImplNew(from); +#else + return copyObjectImplOld(from); +#endif +} + +void * __attribute__((flatten)) +copyObjectImplNew(const void *obj) +{ + CopyNodeContext context = {0}; + + return nodecopy_new_rec(&context, obj); +} + +static inline void* +nodecopy_alloc(CopyNodeContext *context, size_t size, size_t align) +{ + return palloc(size); +} + +static inline void* +nodecopy_alloc0(CopyNodeContext *context, size_t size, size_t align) +{ + return palloc0(size); +} + +static Node* +nodecopy_new_rec(CopyNodeContext *context, const Node *obj) +{ + const TINodeType *type_info; + NodeTag tag; + Node *dst; + + if (obj == NULL) + return NULL; + + tag = nodeTag(obj); + + /* Guard against stack overflow due to overly complex expressions */ + check_stack_depth(); + + switch (tag) + { + case T_List: + case T_OidList: + case T_IntList: + return (Node *) nodecopy_list(context, (List *) obj, tag); + default: + break; + } + + type_info = &ti_node_types[tag]; + + Assert(type_info->size != TYPE_SIZE_UNKNOWN); + + dst = (Node *) nodecopy_alloc0(context, type_info->size, MAXIMUM_ALIGNOF); + dst->type = tag; + + nodecopy_fields(context, dst, obj, type_info); + + return dst; +} + +static void +nodecopy_fields(CopyNodeContext *context, Node *dst, const Node *src, const TINodeType *type_info) +{ + const TIStructField *field_info = &ti_struct_fields[type_info->first_field_at]; + + for (int i = 0; i < type_info->num_fields; i++, field_info++) + { + const void *src_field_ptr; + void *dst_field_ptr; + + // FIXME: ExtensibleNode needs to call callbacks, or be reimplemented + + if (field_info->flags & TYPE_COPY_IGNORE) + continue; + + src_field_ptr = ((const char *) src + field_info->offset); + dst_field_ptr = ((char *) dst + field_info->offset); + + switch (field_info->known_type_id) + { + /* + * These could also be implemented via memcpy, but knowing size + * ahead of time is faster + */ + + case KNOWN_TYPE_UINT16: + *(uint16 *) dst_field_ptr = *(const uint16 *) src_field_ptr; + break; + case KNOWN_TYPE_OPFUNCID: + case KNOWN_TYPE_OID: + case KNOWN_TYPE_UINT32: + *(uint32 *) dst_field_ptr = *(const uint32 *) src_field_ptr; + break; + case KNOWN_TYPE_UINT64: + *(uint64 *) dst_field_ptr = *(const uint64 *) src_field_ptr; + break; + + case KNOWN_TYPE_INT16: + *(int16 *) dst_field_ptr = *(const int16 *) src_field_ptr; + break; + case KNOWN_TYPE_LOCATION: + case KNOWN_TYPE_INT32: + *(int32 *) dst_field_ptr = *(const int32 *) src_field_ptr; + break; + case KNOWN_TYPE_INT64: + *(int64 *) dst_field_ptr = *(const int64 *) src_field_ptr; + break; + + case KNOWN_TYPE_FLOAT32: + *(float *) dst_field_ptr = *(const float *) src_field_ptr; + break; + case KNOWN_TYPE_FLOAT64: + *(double *) dst_field_ptr = *(const double *) src_field_ptr; + break; + + case KNOWN_TYPE_BOOL: + *(bool *) dst_field_ptr = *(const bool *) src_field_ptr; + break; + + case KNOWN_TYPE_CHAR: + *(char *) dst_field_ptr = *(const char *) src_field_ptr; + break; + + case KNOWN_TYPE_NODE: + { + const TINodeType *sub_type_info; + NodeTag sub_tag; + + Assert(field_info->type_id != TYPE_ID_UNKNOWN); + + if (field_info->offset == 0) + sub_tag = field_info->type_id; + else + { + sub_tag = nodeTag(src_field_ptr); + + if (unlikely(ti_node_types[sub_tag].size != ti_node_types[field_info->type_id].size)) + { + elog(ERROR, "%s size %d = %s %d failed", + ti_strings[ti_node_types[sub_tag].name].string, + ti_node_types[sub_tag].size, + ti_strings[ti_node_types[field_info->type_id].name].string, + ti_node_types[field_info->type_id].size); + } + + Assert(ti_node_types[sub_tag].size == + ti_node_types[field_info->type_id].size); + } + + sub_type_info = &ti_node_types[sub_tag]; + + nodecopy_fields(context, + (Node *) dst_field_ptr, + (const Node *) src_field_ptr, + sub_type_info); + + break; + } + + case KNOWN_TYPE_DATUM: + { + const Const *csrc = castNode(Const, (Node *) src); + Const *cdst = castNode(Const, (Node *) dst); + + if (csrc->constbyval || csrc->constisnull) + cdst->constvalue = csrc->constvalue; + else + cdst->constvalue = datumCopy(csrc->constvalue, + csrc->constbyval, + csrc->constlen); + + break; + } + + case KNOWN_TYPE_VALUE_UNION: + { + const Value *vsrc = (const Value *) src; + Value *vdst = (Value *) dst; + + Assert(IsAValue(vsrc) && IsAValue(vdst)); + + nodecopy_value_union(context, vdst, vsrc); + + break; + } + + case KNOWN_TYPE_P_PGARR: + if (*(const PgArrBase **) src_field_ptr != NULL) + { + const PgArrBase *arr_src = *(const PgArrBase **) src_field_ptr; + PgArrBase **arr_dst = (PgArrBase **) dst_field_ptr; + + Assert(field_info->elem_size > 0); + + *arr_dst = pgarr_helper_clone(arr_src, field_info->elem_size); + } + break; + + case KNOWN_TYPE_P_NODE: + if (*(const Node **) src_field_ptr != NULL) + *(Node **) dst_field_ptr = nodecopy_new_rec(context, *(const Node **) src_field_ptr); + break; + + case KNOWN_TYPE_P_CHAR: + if (*(char **) src_field_ptr != NULL) + { + size_t len = strlen(*(const char **) src_field_ptr) + 1; + + *(char **) dst_field_ptr = nodecopy_alloc0(context, len, 1); + memcpy(*(char **) dst_field_ptr, *(const char **) src_field_ptr, len); + } + break; + + case KNOWN_TYPE_P_BITMAPSET: + if (*(const char **) src_field_ptr != NULL) + { + const Bitmapset *bs_src = *(const Bitmapset **) src_field_ptr; + Bitmapset **bs_dst = (Bitmapset **) dst_field_ptr; + size_t bs_size = BITMAPSET_SIZE(bs_src->nwords); + + *bs_dst = (Bitmapset *) nodecopy_alloc0(context, bs_size, MAXIMUM_ALIGNOF); + memcpy(*bs_dst, bs_src, bs_size); + } + break; + + default: + if (field_info->flags & (TYPE_COPY_FORCE_SCALAR || + TYPE_CAT_SCALAR)) + { + Assert(field_info->size != TYPE_SIZE_UNKNOWN); + memcpy(dst_field_ptr, src_field_ptr, field_info->size); + } + else + elog(ERROR, "don't know how to copy field %s %s->%s", + ti_strings[field_info->type].string, + ti_strings[type_info->name].string, + ti_strings[field_info->name].string); + + break; + } + } +} + +static List* +nodecopy_list(CopyNodeContext *context, const List *src, NodeTag tag) +{ + List *dst; + + /* + * XXX: this is copying implementation details from new_list. But + * otherwise it's hard to pass details through copy_list[_deep], and to + * allocate the list itself as part of a larger allocation. + */ + dst = (List *) nodecopy_alloc0(context, + offsetof(List, initial_elements) + + src->length * sizeof(ListCell), + MAXIMUM_ALIGNOF); + dst->type = tag; + dst->length = src->length; + dst->max_length = src->length; + dst->elements = dst->initial_elements; + + switch (tag) + { + case T_List: + for (int i = 0; i < src->length; i++) + lfirst(&dst->elements[i]) = + nodecopy_new_rec(context, lfirst(&src->elements[i])); + break; + + case T_OidList: + case T_IntList: + memcpy(dst->elements, src->elements, + dst->length * sizeof(ListCell)); + break; + + default: + pg_unreachable(); + return NULL; + } + + return dst; +} + +static void +nodecopy_value_union(CopyNodeContext *context, Value *dst, const Value *src) +{ + Assert(nodeTag(src) == nodeTag(dst)); + + switch (nodeTag(src)) + { + case T_Integer: + dst->val.ival = src->val.ival; + break; + + case T_Float: + case T_String: + case T_BitString: + if (src->val.str == NULL) + dst->val.str = NULL; + else + { + size_t len = strlen(src->val.str) + 1; + + dst->val.str = nodecopy_alloc0(context, len, 1); + memcpy(dst->val.str, src->val.str, len); + } + + break; + + case T_Null: + break; + + default: + pg_unreachable(); + break; + } +} diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 0869d0dd27..944ad85862 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -52,7 +52,7 @@ /* Compare a field that is a pointer to some kind of Node or Node tree */ #define COMPARE_NODE_FIELD(fldname) \ do { \ - if (!equal(a->fldname, b->fldname)) \ + if (!nodes_equal_old(a->fldname, b->fldname)) \ return false; \ } while (0) @@ -2336,7 +2336,7 @@ _equalParamRef(const ParamRef *a, const ParamRef *b) static bool _equalAConst(const A_Const *a, const A_Const *b) { - if (!equal(&a->val, &b->val)) /* hack for in-line Value field */ + if (!nodes_equal_old(&a->val, &b->val)) /* hack for in-line Value field */ return false; COMPARE_LOCATION_FIELD(location); @@ -2930,7 +2930,7 @@ _equalList(const List *a, const List *b) case T_List: forboth(item_a, a, item_b, b) { - if (!equal(lfirst(item_a), lfirst(item_b))) + if (!nodes_equal_old(lfirst(item_a), lfirst(item_b))) return false; } break; @@ -2993,12 +2993,8 @@ _equalValue(const Value *a, const Value *b) return true; } -/* - * equal - * returns whether two nodes are equal - */ bool -equal(const void *a, const void *b) +nodes_equal_old(const void *a, const void *b) { bool retval; diff --git a/src/backend/nodes/equalfuncs_new.c b/src/backend/nodes/equalfuncs_new.c new file mode 100644 index 0000000000..c705721400 --- /dev/null +++ b/src/backend/nodes/equalfuncs_new.c @@ -0,0 +1,409 @@ +#include "postgres.h" + +#include "miscadmin.h" +#include "nodes/nodes.h" +#include "nodes/nodeinfo.h" +#include "nodes/pg_list.h" +#include "nodes/primnodes.h" +#include "nodes/value.h" +#include "utils/datum.h" + + +static bool nodes_equal_new_rec(const Node *a, const Node *b); +static bool nodes_equal_new_rec_real(const Node *a, const Node *b); +static bool nodes_equal_list(const List *a, const List *b, NodeTag tag); +static bool nodes_equal_value_union(const Value *a, const Value *b, NodeTag tag); +static bool nodes_equal_fields(const Node *a, const Node *b, const TINodeType *type_info); + + +/* + * equal + * returns whether two nodes are equal + */ +bool +equal(const void *a, const void *b) +{ +#ifdef USE_NEW_NODE_FUNCS + return nodes_equal_new(a, b); +#else + return nodes_equal_old(a, b); +#endif +} + +bool +nodes_equal_new(const void *a, const void *b) +{ + bool retval; + + retval = nodes_equal_new_rec(a, b); +#ifdef CHEAPER_PER_NODE_COMPARE_ASSERT + Assert(retval == nodes_equal_old(a, b)); +#endif + + return retval; +} + +/* + * Recurse into comparing the two nodes. + */ +static bool +nodes_equal_new_rec(const Node *a, const Node *b) +{ + /* + * During development it can be helpful to compare old/new equal + * comparisons on a per-field basis, making it easier to pinpoint the node + * with differing behaviour - but it's quite expensive (because we'll + * compare nodes over and over while recursing down). + */ +#ifdef EXPENSIVE_PER_NODE_COMPARE_ASSERT + bool newretval; + bool oldretval; + + newretval = nodes_equal_new_rec_real(a, b); + oldretval = nodes_equal_old(a, b); + + Assert(newretval == oldretval); + + return newretval; +#else + return nodes_equal_new_rec_real(a, b); +#endif +} + +/* temporary helper for nodes_equal_new_rec */ +static bool +nodes_equal_new_rec_real(const Node *a, const Node *b) +{ + const TINodeType *type_info; + NodeTag tag; + + if (a == b) + return true; + + /* note that a!=b, so only one of them can be NULL */ + if (a == NULL || b == NULL) + return false; + + /* are they the same type of nodes? */ + tag = nodeTag(a); + if (tag != nodeTag(b)) + return false; + + /* Guard against stack overflow due to overly complex expressions */ + check_stack_depth(); + + /* + * Compare types of node we cannot / do not want to handle using + * elementwise comparisons. Either because that'd not be correct + * (e.g. because of an embedded tagged union), incomplete (e.g. because we + * need to compare all elements of a list, which needs knowledge of two + * struct members), or because it'd be less efficient. + */ + switch (tag) + { + case T_List: + case T_OidList: + case T_IntList: + return nodes_equal_list((const List *) a, (const List *) b, tag); + + default: + break; + } + + type_info = &ti_node_types[tag]; + + return nodes_equal_fields(a, b, type_info); +} + +/* + * Compare the fields of a struct, using the provided TINodeType + * metadata. + * + * The compared nodes may be nodes may be separately allocated, or be embedded + * in a surrounding struct. + * * This function does *not* check for the nodes being of the same type, or + * having the same tag! If needed nodes_equal_new_rec() does so. + + * NB: The struct may or may not have a nodeTag() for the type_info - e.g. for + * the struct elements of a "superclass" of a node (e.g. a Scan's .plan) it'll + * be subclasses tag. + */ +static bool +nodes_equal_fields(const Node *a, const Node *b, const TINodeType *type_info) +{ + const TIStructField *field_info = &ti_struct_fields[type_info->first_field_at]; + + for (int i = 0; i < type_info->num_fields; i++, field_info++) + { + // FIXME: ExtensibleNode needs to call callbacks, or be reimplemented + + const void *a_field_ptr; + const void *b_field_ptr; + + if (field_info->flags & TYPE_EQUAL_IGNORE) + continue; + + a_field_ptr = ((const char *) a + field_info->offset); + b_field_ptr = ((const char *) b + field_info->offset); + + switch (field_info->known_type_id) + { + case KNOWN_TYPE_NODE: + { + const TINodeType *sub_type_info; + NodeTag sub_tag; + + Assert(field_info->type_id != TYPE_ID_UNKNOWN); + + /* + * If at offset 0, this shares the NodeTag field with the + * parent class. Therefore we have to rely on the declared + * type. + */ + if (field_info->offset == 0) + sub_tag = field_info->type_id; + else + { + sub_tag = nodeTag(a_field_ptr); + + Assert(ti_node_types[sub_tag].size == + ti_node_types[field_info->type_id].size); + + if (sub_tag != nodeTag(b_field_ptr)) + return false; + } + + sub_type_info = &ti_node_types[sub_tag]; + + if (!nodes_equal_fields((const Node *) a_field_ptr, + (const Node *) b_field_ptr, + sub_type_info)) + return false; + + break; + } + + case KNOWN_TYPE_DATUM: + { + /* currently only embedded in Const */ + const Const *ca = castNode(Const, (Node *) a); + const Const *cb = castNode(Const, (Node *) b); + + Assert(ca->consttype == cb->consttype && + ca->constlen == cb->constlen && + ca->constbyval == cb->constbyval && + ca->constisnull == cb->constisnull); + + /* + * We treat all NULL constants of the same type as + * equal. Someday this might need to change? But datumIsEqual + * doesn't work on nulls, so... + */ + if (ca->constisnull && cb->constisnull) + continue; + else if (!datumIsEqual(ca->constvalue, cb->constvalue, + ca->constbyval, ca->constlen)) + return false; + + break; + } + + case KNOWN_TYPE_VALUE_UNION: + { + const Value *va = (const Value *) a; + const Value *vb = (const Value *) b; + + Assert(IsAValue(va) && IsAValue(vb)); + + if (!nodes_equal_value_union(va, vb, nodeTag(a))) + return false; + + break; + } + + case KNOWN_TYPE_OPFUNCID: + { + const Oid oa = *(const Oid *) a_field_ptr; + const Oid ob = *(const Oid *) b_field_ptr; + + /* + * Special-case opfuncid: it is allowable for it to differ if one node + * contains zero and the other doesn't. This just means that the one node + * isn't as far along in the parse/plan pipeline and hasn't had the + * opfuncid cache filled yet. + */ + if (oa != ob && oa != 0 && ob != 0) + return false; + + break; + + } + + case KNOWN_TYPE_P_PGARR: + Assert(field_info->elem_size != TYPE_SIZE_UNKNOWN); + + /* identical pointers (which may be NULL) are definitely equal */ + if (*(const void **) a_field_ptr != *(const void **) b_field_ptr) + { + /* + * Compare without checking for NULLness, empty array can be + * represented with a NULL pointer, or with an array with zero + * elements. + */ + const PgArrBase *arr_a = *(const PgArrBase **) a_field_ptr; + const PgArrBase *arr_b = *(const PgArrBase **) b_field_ptr; + + if (pgarr_size(arr_a) != pgarr_size(arr_b)) + return false; + + if (!pgarr_empty(arr_a)) + { + /* + * XXX: Should we care about the potential effect of padding + * here? Currently we're only using this for simple scalar + * types, but ... + */ + if (memcmp(arr_a->elementsp, arr_a->elementsp, + arr_a->size * field_info->elem_size) != 0) + return false; + } + + } + break; + + case KNOWN_TYPE_P_BITMAPSET: + /* identical pointers (which may be NULL) are definitely equal */ + if (*(const void **) a_field_ptr != *(const void **) b_field_ptr) + { + const Bitmapset *bs_a = *(const Bitmapset **) a_field_ptr; + const Bitmapset *bs_b = *(const Bitmapset **) b_field_ptr; + + if (!bms_equal(bs_a, bs_b)) + return false; + } + break; + + case KNOWN_TYPE_P_NODE: + /* identical pointers (which may be NULL) are definitely equal */ + if (*(const void **) a_field_ptr == *(const void **) b_field_ptr) + break; + if (*(const void **) a_field_ptr == NULL || + *(const void **) b_field_ptr == NULL) + return false; + else + if (!nodes_equal_new_rec(*(const Node **) a_field_ptr, *(const Node **) b_field_ptr)) + return false; + break; + + case KNOWN_TYPE_P_CHAR: + /* identical pointers (which may be NULL) are definitely equal */ + if (*(const void **) a_field_ptr == *(const void **) b_field_ptr) + break; + if (*(const void **) a_field_ptr == NULL || + *(const void **) b_field_ptr == NULL) + return false; + else + if (strcmp(*(const char **) a_field_ptr, *(const char **) b_field_ptr) != 0) + return false; + break; + + default: + if (field_info->flags & (TYPE_COPY_FORCE_SCALAR || + TYPE_CAT_SCALAR)) + { + if (memcmp(a_field_ptr, b_field_ptr, field_info->size) != 0) + return false; + } + else + { + elog(ERROR, "don't know how to copy field %s %s->%s", + ti_strings[field_info->type].string, + ti_strings[type_info->name].string, + ti_strings[field_info->name].string); + } + break; + } + } + + return true; +} + +static bool +nodes_equal_list(const List *a, const List *b, NodeTag tag) +{ + const ListCell *lc_a; + const ListCell *lc_b; + + /* should have been verified by caller */ + Assert(a != b && a != NULL); + Assert(nodeTag(a) == nodeTag(b)); + + if (a->length != b->length) + return false; + + switch (tag) + { + case T_List: + forboth(lc_a, a, lc_b, b) + { + if (!nodes_equal_new_rec(lfirst(lc_a), lfirst(lc_b))) + return false; + } + break; + + case T_OidList: + forboth(lc_a, a, lc_b, b) + { + if (lfirst_oid(lc_a) != lfirst_oid(lc_b)) + return false; + } + break; + + case T_IntList: + forboth(lc_a, a, lc_b, b) + { + if (lfirst_int(lc_a) != lfirst_int(lc_b)) + return false; + } + break; + + default: + pg_unreachable(); + return false; + } + + return true; +} + +static bool +nodes_equal_value_union(const Value *a, const Value *b, NodeTag tag) +{ + /* should have been verified by caller */ + Assert(a != b && a != NULL); + Assert(nodeTag(a) == nodeTag(b)); + + switch (tag) + { + case T_Integer: + return a->val.ival == b->val.ival; + + case T_Float: + case T_String: + case T_BitString: + if (a->val.str == b->val.str) + return true; + else if (a->val.str == NULL || b->val.str == NULL) + return false; + return strcmp(a->val.str, b->val.str) == 0; + + case T_Null: + return true; + + default: + pg_unreachable(); + return false; + } + + pg_unreachable(); + return false; +} diff --git a/src/backend/nodes/gennodes.c b/src/backend/nodes/gennodes.c new file mode 100644 index 0000000000..ac6d79ebf8 --- /dev/null +++ b/src/backend/nodes/gennodes.c @@ -0,0 +1,929 @@ +/*------------------------------------------------------------------------- + * + * gennodes.c + * metadata generation routines for node types + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/catalog/gennodes.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres_fe.h" + +#include + +#include "lib/pgarr.h" +#include "lib/stringinfo.h" +#include "nodes/nodeinfo.h" + + +#define TYPE_ID_UNKNOWN PG_UINT16_MAX + +typedef struct CollectInfo +{ + PGARR(charstar) strtab; + PGARR(charstar) interesting_node_typedefs; + PGARR(charstar) interesting_node_types; + PGARR(charstar) node_type_strings; + PGARR(charstar) struct_field_strings; + + PGARR(charstar) interesting_enums; + PGARR(charstar) enum_strings; + PGARR(charstar) enum_field_strings; + + CXType current_struct_type; + size_t off; +} CollectInfo; + +/* for collecting information about a pgarr.h style array */ +typedef struct PgArrFieldsState +{ + uint32 off; + bool valid; + CXType tp; +} PgArrFieldsState; + + +/* + * FIXME: this is used for lookups in too many places - need something better + * than O(N). + */ +static int +string_in_arr(PGARR(charstar) *arr, const char *match) +{ + for (int i = 0; i < pgarr_size(arr); i++) + { + const char *el = *pgarr_at(arr, i); + + if (el == NULL && match != NULL) + continue; + + if (strcmp(el, match) == 0) + return i; + } + + return -1; +} + +static uint32 +intern_string(CollectInfo *info, const char *str) +{ + uint32 id; + + id = string_in_arr(&info->strtab, str); + + if (id != -1) + return id; + else + { + pgarr_append(char *, &info->strtab, pstrdup(str)); + return pgarr_size(&info->strtab) - 1; + } +} + +static void +flag_append(StringInfo str, char *appendflag) +{ + if (str->len > 0) + appendStringInfoString(str, " | "); + appendStringInfoString(str, appendflag); +} + +static enum CXVisitorResult +find_PgArrFields_vis(CXCursor cursor, CXClientData client_data) +{ + PgArrFieldsState *state = (PgArrFieldsState *) client_data; + const char *fieldname = clang_getCString(clang_getCursorSpelling(cursor)); + + if (state->off == 0) + { + if (strcmp(fieldname, "size") != 0) + return CXVisit_Break; + } + else if (state->off == 1) + { + if (strcmp(fieldname, "capacity") != 0) + return CXVisit_Break; + } + else if (state->off == 2) + { + CXType tp = clang_getCursorType(cursor); + if (strcmp(fieldname, "elementsp") != 0) + return CXVisit_Break; + if (tp.kind != CXType_Pointer) + return CXVisit_Break; + state->tp = clang_getPointeeType(tp); + state->valid = true; + } + + state->off++; + return CXVisit_Continue; +} + +static enum CXChildVisitResult +find_EnumFields_vis(CXCursor cursor, CXCursor parent, CXClientData client_data) +{ + if (cursor.kind == CXCursor_EnumConstantDecl) + { + CollectInfo *collect_info = (CollectInfo *) client_data; + const char *fieldname = clang_getCString(clang_getCursorSpelling(cursor)); + char *s; + + s = psprintf("{.name = %u /* %s */, .value = (uint32) %s /* %u */}", + intern_string(collect_info, fieldname), fieldname, + fieldname, + (uint32) clang_getEnumConstantDeclUnsignedValue(cursor)); + + pgarr_append(char *, &collect_info->enum_field_strings, s); + } + return CXChildVisit_Continue; +} + +static uint16 +get_enum(CollectInfo *collect_info, CXType ctp) +{ + const char *ctp_name = + clang_getCString(clang_getTypeSpelling(ctp)); + int enumid = string_in_arr(&collect_info->interesting_enums, ctp_name); + + if (enumid == -1) + { + size_t fields_at_start = pgarr_size(&collect_info->enum_field_strings); + char *s; + + clang_visitChildren( + clang_getTypeDeclaration(ctp), + find_EnumFields_vis, + collect_info); + + s = psprintf("{.name = %u /* %s */, .first_field_at = %zd, .num_fields = %zd, .size = sizeof(%s)}", + intern_string(collect_info, ctp_name), ctp_name, + fields_at_start, + pgarr_size(&collect_info->enum_field_strings) - fields_at_start, + ctp_name); + + pgarr_append(char *, &collect_info->enum_strings, s); + pgarr_append(char *, &collect_info->interesting_enums, strdup(ctp_name)); + + enumid = pgarr_size(&collect_info->interesting_enums) - 1; + + } + + return (uint16) enumid; +} + +#define tpref(intype, name) \ + (intype.kind == CXType_Pointer ? CppAsString2(CppConcat(KNOWN_TYPE_P_, name)) : CppAsString2(CppConcat(KNOWN_TYPE_, name))) + +static void +categorize_type(CollectInfo *collect_info, CXType intype, + StringInfo flags, + uint16 *type_id, + char **known_type_id, + char **elem_known_type_id, + char **elem_size) +{ + CXType type; + CXType canon_type; + enum CXTypeKind type_kind; + enum CXTypeKind canon_type_kind; + const char *type_name; + const char *canon_type_name; + + if (clang_getCanonicalType(intype).kind == CXType_Pointer) + { + intype = clang_getCanonicalType(intype); + type = clang_getPointeeType(intype); + flag_append(flags, "TYPE_CAT_SCALAR"); + } + else + { + type = intype; + flag_append(flags, "TYPE_CAT_SCALAR"); + } + + canon_type = clang_getCanonicalType(type); + type_kind = type.kind; + canon_type_kind = canon_type.kind; + type_name = clang_getCString(clang_getTypeSpelling(type)); + canon_type_name = clang_getCString(clang_getTypeSpelling(canon_type)); + + if (canon_type_kind == CXType_Enum) + { + *known_type_id = tpref(intype, ENUM); + *type_id = get_enum(collect_info, canon_type); + } + else + { + int tp = string_in_arr(&collect_info->interesting_node_types, canon_type_name); + + if (tp != -1) + { + *type_id = tp; + *known_type_id = tpref(intype, NODE); + } + } + + if (type_kind == CXType_Typedef && canon_type_kind == CXType_UInt && + strcmp(type_name, "Oid") == 0) + *known_type_id = tpref(intype, OID); + else if (type_kind == CXType_Typedef && canon_type_kind == CXType_Int && + strcmp(type_name, "Location") == 0) + { + *known_type_id = tpref(intype, LOCATION); + flag_append(flags, "TYPE_EQUAL_IGNORE"); + } + else if (type_kind == CXType_Typedef && ( + canon_type_kind == CXType_Enum) && + strcmp(type_name, "CoercionForm") == 0) + { + *known_type_id = tpref(intype, COERCIONFORM); + flag_append(flags, "TYPE_EQUAL_IGNORE"); + } + else if (type_kind == CXType_Typedef && ( + canon_type_kind == CXType_Enum) && + strcmp(type_name, "NodeTag") == 0) + { + *known_type_id = tpref(intype, NODE_TAG); + } + else if (type_kind == CXType_Typedef && ( + canon_type_kind == CXType_UInt || + canon_type_kind == CXType_ULong || + canon_type_kind == CXType_ULongLong) && + strcmp(type_name, "Datum") == 0) + *known_type_id = tpref(intype, DATUM); + else if (canon_type_kind == CXType_Char_S || + canon_type_kind == CXType_SChar || + canon_type_kind == CXType_Char_U || + canon_type_kind == CXType_UChar) + *known_type_id = tpref(intype, CHAR); + else if (canon_type_kind == CXType_UShort|| + canon_type_kind == CXType_UInt || + canon_type_kind == CXType_ULong || + canon_type_kind == CXType_ULongLong || + canon_type_kind == CXType_UInt128) + { + if (canon_type_kind == CXType_UShort) + *known_type_id = tpref(intype, UINT16); + else if (canon_type_kind == CXType_UInt) + *known_type_id = tpref(intype, UINT32); + else if (canon_type_kind == CXType_ULong || canon_type_kind == CXType_ULongLong) + { + if (intype.kind != CXType_Pointer) + *known_type_id = psprintf("(sizeof(%s) == 8 ? KNOWN_TYPE_UINT64 : KNOWN_TYPE_UINT32)", canon_type_name); + else + *known_type_id = psprintf("(sizeof(%s) == 8 ? KNOWN_TYPE_P_UINT64 : KNOWN_TYPE_P_UINT32)", canon_type_name); + } + else if (canon_type_kind == CXType_UInt128) + *known_type_id = tpref(intype, UINT128); + } + else if (canon_type_kind == CXType_Short || canon_type_kind == CXType_Int || + canon_type_kind == CXType_Long || canon_type_kind == CXType_LongLong || + canon_type_kind == CXType_Int128) + { + if (canon_type_kind == CXType_Short) + *known_type_id = tpref(intype, INT16); + else if (canon_type_kind == CXType_Int) + *known_type_id = tpref(intype, INT32); + else if (canon_type_kind == CXType_Long || canon_type_kind == CXType_LongLong) + { + if (intype.kind != CXType_Pointer) + *known_type_id = psprintf("(sizeof(%s) == 8 ? KNOWN_TYPE_INT64 : KNOWN_TYPE_INT32)", canon_type_name); + else + *known_type_id = psprintf("(sizeof(%s) == 8 ? KNOWN_TYPE_P_INT64 : KNOWN_TYPE_P_INT32)", canon_type_name); + } + else if (canon_type_kind == CXType_Int128) + *known_type_id = tpref(intype, INT128); + } + else if (canon_type_kind == CXType_Float) + *known_type_id = tpref(intype, FLOAT32); + else if (canon_type_kind == CXType_Double) + *known_type_id = tpref(intype, FLOAT64); + else if (canon_type_kind == CXType_Bool) + *known_type_id = tpref(intype, BOOL); + else if (strcmp(canon_type_name, "struct Bitmapset") == 0) + *known_type_id = tpref(intype, BITMAPSET); /* error if not pointer */ + else if (strcmp(canon_type_name, "struct Node") == 0) + { + /* + * Node* currently isn't actually recognized as a node type, therefore + * it is not recognized as such - but we do use it to point to a + * generic node. + */ + + if (intype.kind != CXType_Pointer) + { + fprintf(stderr, "struct Node cannot be embedded\n"); + exit(EXIT_FAILURE); + } + else + { + Assert(*type_id == TYPE_ID_UNKNOWN); + *type_id = TYPE_ID_UNKNOWN; + *known_type_id = "KNOWN_TYPE_P_NODE"; + } + } + else if (strncmp(canon_type_name, "struct ArrayOf", sizeof("struct ArrayOf") - 1) == 0) + { + PgArrFieldsState state = {0}; + + if (elem_size == NULL) + { + fprintf(stderr, "recursive arrays are not supported\n"); + exit(EXIT_FAILURE); + } + + clang_Type_visitFields( + canon_type, + find_PgArrFields_vis, + &state); + +#if 0 + fprintf(stderr, "pgarr: %s: %s: contains %s: %u %s\n", + clang_getCString(clang_getTypeKindSpelling(canon_type_kind)), + canon_type_name, + type_name + (sizeof("struct ArrayOf") - 1), + state.valid, + clang_getCString(clang_getTypeSpelling(state.tp)) + ); +#endif + + categorize_type(collect_info, state.tp, + flags, type_id, elem_known_type_id, NULL, NULL); + + *known_type_id = tpref(intype, PGARR); + if (clang_Type_getSizeOf(state.tp) >= 0) + *elem_size = psprintf("sizeof(%s)", clang_getCString(clang_getTypeSpelling(state.tp))); + } + else if (canon_type_kind == CXType_Record && strcmp(canon_type_name, "union ValUnion") == 0) + { + *known_type_id = tpref(intype, VALUE_UNION); + } +} + +/* visit elements of the NodeTag enum, to collect the names of all node types */ +static enum CXChildVisitResult +find_NodeTagElems_vis(CXCursor cursor, CXCursor parent, CXClientData client_data) +{ + if (clang_getCursorKind(cursor) == CXCursor_EnumConstantDecl) + { + CollectInfo *collect_info = (CollectInfo *) client_data; + const char *name = clang_getCString(clang_getCursorSpelling(cursor)); + + if (strncmp(name, "T_", 2) != 0) + { + fprintf(stderr, "unexpected name: %s\n", name); + exit(-1); + } + else + { + pgarr_append(char *, &collect_info->interesting_node_typedefs, strdup(name + 2)); + } + } + + return CXChildVisit_Recurse; +} + +/* find the NodeTag enum, and collect elements using find_NodeTagElems_vis */ +static enum CXChildVisitResult +find_NodeTag_vis(CXCursor cursor, CXCursor parent, CXClientData client_data) +{ + if (clang_getCursorKind(cursor) == CXCursor_EnumDecl) + { + const char *spelling = clang_getCString(clang_getCursorSpelling(cursor)); + + if (strcmp(spelling, "NodeTag") != 0) + return CXChildVisit_Recurse; + + clang_visitChildren( + cursor, + find_NodeTagElems_vis, + client_data); + + return CXChildVisit_Break; + } + return CXChildVisit_Recurse; +} + +/* collect information about the elements of Node style struct members */ +static enum CXVisitorResult +find_StructFields_vis(CXCursor cursor, CXClientData client_data) +{ + CollectInfo *collect_info = (CollectInfo *) client_data; + const char *structname = clang_getCString(clang_getTypeSpelling(collect_info->current_struct_type)); + const char *fieldname = clang_getCString(clang_getCursorSpelling(cursor)); + CXType fieldtype = clang_getCanonicalType(clang_getCursorType(cursor)); + const char *fieldtypename = + clang_getCString(clang_getTypeSpelling(fieldtype)); + uint16 type_id = TYPE_ID_UNKNOWN; + char *known_type_id = "KNOWN_TYPE_UNKNOWN"; + char *elem_known_type_id = "KNOWN_TYPE_UNKNOWN"; + char *s; + StringInfoData flags; + char *elem_size = "TYPE_SIZE_UNKNOWN"; + char *field_size; + char *type_id_s; + + initStringInfo(&flags); + + categorize_type(collect_info, clang_getCursorType(cursor), &flags, &type_id, &known_type_id, &elem_known_type_id, &elem_size); + + /* can't measure size for incomplete types (e.g. variable length arrays at the end of a struct) */ + if (clang_Type_getSizeOf(fieldtype) < 0) + { + flag_append(&flags, "TYPE_CAT_INCOMPLETE"); + + field_size = "TYPE_SIZE_UNKNOWN"; + } + else + { + field_size = psprintf("sizeof(%s)", fieldtypename); + } + + + /* XXX: these probably ought to be moved into a different function */ + + if (strcmp(known_type_id, "KNOWN_TYPE_NODE_TAG") == 0 && collect_info->off == 0) + { + /* no need to output the type itself, included otherwise in output */ + flag_append(&flags, "TYPE_OUT_IGNORE"); + } + else if (strcmp(structname, "struct PlaceHolderVar") == 0) + { + if (strcmp(fieldname, "phrels") == 0 || + strcmp(fieldname, "phexpr") == 0) + { + /* + * We intentionally do not compare phexpr. Two PlaceHolderVars + * with the same ID and levelsup should be considered equal even + * if the contained expressions have managed to mutate to + * different states. This will happen during final plan + * construction when there are nested PHVs, since the inner PHV + * will get replaced by a Param in some copies of the outer PHV. + * Another way in which it can happen is that initplan sublinks + * could get replaced by differently-numbered Params when sublink + * folding is done. (The end result of such a situation would be + * some unreferenced initplans, which is annoying but not really a + * problem.) On the same reasoning, there is no need to examine + * phrels. + */ + flag_append(&flags, "TYPE_EQUAL_IGNORE"); + } + } + else if (strcmp(structname, "struct Query") == 0) + { + if (strcmp(fieldname, "queryId") == 0) + { + /* we intentionally ignore queryId, since it might not be set */ + flag_append(&flags, "TYPE_EQUAL_IGNORE"); + } + } + else if (strcmp(structname, "struct Aggref") == 0) + { + if (strcmp(fieldname, "aggtranstype") == 0) + { + /* ignore aggtranstype since it might not be set yet */ + flag_append(&flags, "TYPE_EQUAL_IGNORE"); + } + } + else if (strcmp(structname, "struct GroupingFunc") == 0) + { + if (strcmp(fieldname, "refs") == 0 || + strcmp(fieldname, "cols") == 0) + { + /* We must not compare the refs or cols field */ + flag_append(&flags, "TYPE_EQUAL_IGNORE"); + } + } + else if (strcmp(structname, "struct RestrictInfo") == 0) + + { + if (strcmp(fieldname, "type") != 0 && + strcmp(fieldname, "clause") != 0 && + strcmp(fieldname, "is_pushed_down") != 0 && + strcmp(fieldname, "outerjoin_delayed") != 0 && + strcmp(fieldname, "security_level") != 0 && + strcmp(fieldname, "required_relids") != 0 && + strcmp(fieldname, "outer_relids") != 0 && + strcmp(fieldname, "nullable_relids") != 0) + { + /* + * We ignore all the other fields, since they may not be set yet, and + * should be derivable from the clause anyway. + */ + flag_append(&flags, "TYPE_EQUAL_IGNORE"); + } + + if (strcmp(fieldname, "parent_ec") == 0 || + strcmp(fieldname, "left_ec") == 0 || + strcmp(fieldname, "right_ec") == 0 || + strcmp(fieldname, "left_em") == 0 || + strcmp(fieldname, "right_em") == 0) + { + /* EquivalenceClasses are never copied, so shallow-copy the pointers */ + flag_append(&flags, "TYPE_COPY_FORCE_SCALAR"); + } + + if (strcmp(fieldname, "scansel_cache") == 0) + { + /* MergeScanSelCache isn't a Node, so hard to copy; just reset cache */ + flag_append(&flags, "TYPE_COPY_IGNORE"); + } + } + else if (strcmp(structname, "struct PathKey") == 0) + { + if (strcmp(fieldname, "pk_eclass") == 0) + { + /* We assume pointer equality is sufficient to compare the eclasses */ + flag_append(&flags, "TYPE_EQUAL_FORCE_SCALAR"); + flag_append(&flags, "TYPE_COPY_FORCE_SCALAR"); + } + } + else if (strcmp(fieldname, "opfuncid") == 0) + { + known_type_id = "KNOWN_TYPE_OPFUNCID"; + } + + if (type_id == TYPE_ID_UNKNOWN) + type_id_s = "TYPE_ID_UNKNOWN"; + else + type_id_s = psprintf("%u", type_id); + + if (flags.len == 0) + appendStringInfoChar(&flags, '0'); + + s = psprintf("{.name = %u /* %s */, .type = %u /* %s */, .offset = offsetof(%s, %s), .size = %s, .flags = %s, .type_id = %s, .known_type_id = %s, .elem_known_type_id = %s, .elem_size = %s}", + intern_string(collect_info, fieldname), fieldname, + intern_string(collect_info, fieldtypename), fieldtypename, + structname, /* offsetof */ + fieldname, /* offsetof */ + field_size, + flags.data, + type_id_s, + known_type_id, + elem_known_type_id, + elem_size); + + pgarr_append(char *, &collect_info->struct_field_strings, s); + + collect_info->off++; + + free(flags.data); + + return CXVisit_Continue; +} + +/* + * Collect the names of all the structs that "implement" node types (those + * names have previously been collected with find_NodeTag_vis). As we + * sometimes have forward declarations, we need to use a canonicalized name, + * as it's far easier to always use the underlying struct names, than somehow + * go the other way. + */ +static enum CXChildVisitResult +find_NodeStructs_vis(CXCursor cursor, CXCursor parent, CXClientData client_data) +{ + /* + * We'll reach each struct type twice - once for the typedef, and once for + * the struct itself. We only check typedef, including its name, because + * that's what needs to correspond to the NodeTag names. + */ + if (clang_getCursorKind(cursor) == CXCursor_TypedefDecl) + { + const char *spelling = + clang_getCString(clang_getTypeSpelling(clang_getCursorType(cursor))); + CollectInfo *collect_info = (CollectInfo *) client_data; + int type_pos = string_in_arr(&collect_info->interesting_node_typedefs, spelling); + + if (type_pos == -1) + return CXChildVisit_Continue; + + *pgarr_at(&collect_info->interesting_node_types, type_pos) = (char *) + clang_getCString(clang_getTypeSpelling(clang_getCanonicalType(clang_getCursorType(cursor)))); + + return CXChildVisit_Continue; + } + return CXChildVisit_Recurse; +} + +/* + * Collect the definition of all node structs. This is done separately from + * collecting the struct names (in find_NodeStructs_vis), because we need to + * identify whether struct members are node types themselves, for which we + * need their canonical names. + */ +static enum CXChildVisitResult +find_NodeStructDefs_vis(CXCursor cursor, CXCursor parent, CXClientData client_data) +{ + /* + * We'll reach each struct type twice - once for the typedef, and once for + * the struct. Only check one. XXX: Perhaps it'd be better to check the + * name of the typedef? That's what makeNode() etc effectively use? + */ + if (clang_getCursorKind(cursor) == CXCursor_TypedefDecl) + { + const char *spelling = + clang_getCString(clang_getTypeSpelling(clang_getCursorType(cursor))); + CollectInfo *collect_info = (CollectInfo *) client_data; + size_t fields_at_start; + int type_pos = string_in_arr(&collect_info->interesting_node_typedefs, spelling); + char *size; + char *s; + + if (type_pos == -1) + return CXChildVisit_Continue; + + collect_info->off = 0; + collect_info->current_struct_type = clang_getCanonicalType(clang_getCursorType(cursor)); + + fields_at_start = pgarr_size(&collect_info->struct_field_strings); + + clang_Type_visitFields( + collect_info->current_struct_type, + find_StructFields_vis, + collect_info); + + if (clang_Type_getSizeOf(collect_info->current_struct_type) == CXTypeLayoutError_Incomplete) + size = "TYPE_SIZE_UNKNOWN"; + else + size = psprintf("sizeof(%s)", spelling); + + s = psprintf("{.name = %u /* %s */, .first_field_at = %zd, .num_fields = %zd, .size = %s}", + intern_string(collect_info, spelling), spelling, + fields_at_start, + pgarr_size(&collect_info->struct_field_strings) - fields_at_start, + size); + + *pgarr_at(&collect_info->node_type_strings, type_pos) = s; + return CXChildVisit_Continue; + } + return CXChildVisit_Recurse; +} + +int main(int argc, char **argv) +{ + CXCursor cursor; + CollectInfo collect_info = {0}; + CXIndex index; + enum CXErrorCode error; + CXTranslationUnit unit; + uint32 num_diagnostics; + const char *empty_filename = "empty_nodes.c"; + struct CXUnsavedFile empty = { + .Filename = empty_filename}; + PGARR(constcharstar) clang_args = {}; + bool first; + StringInfoData file_contents; + char *output_fname = NULL; + bool parsing_self = true; + FILE *output; + + initStringInfo(&file_contents); + + appendStringInfoString(&file_contents, "#include \"postgres.h\"\n\n"); + + /* to make space for path to llvm-config */ + pgarr_append(const char *, &clang_args, NULL); + + /* FIXME: proper argument parsing / passing */ + for (int argno = 1; argno < argc; argno++) + { + const char *arg = argv[argno]; + + /* + * Until "--" arguments are for this program, after that they're + * passed to clang. + */ + if (parsing_self) + { + if (strcmp(arg, "--llvm-config") == 0) + { + argno++; + if (argno < argc) + { + arg = argv[argno]; + *pgarr_at(&clang_args, 0) = arg; + } + } + else if (strcmp(arg, "--output") == 0) + { + argno++; + if (argno < argc) + output_fname = argv[argno]; + } + else if (strcmp(arg, "--") == 0) + parsing_self = false; + else + { + appendStringInfo(&file_contents, + "#include \"%s\"\n", + arg); + } + } + else + pgarr_append(const char *, &clang_args, arg); + } + + if (*pgarr_at(&clang_args, 0) == NULL) + { + fprintf(stderr, "require path to llvm\n"); + exit(EXIT_FAILURE); + } + else if (output_fname == NULL) + { + fprintf(stderr, "require output_file\n"); + exit(EXIT_FAILURE); + } + + output = fopen(output_fname, PG_BINARY_W); + + empty.Contents = file_contents.data; + empty.Length = file_contents.len; + + index = clang_createIndex( + /* excludeDeclarationsFromPCH */ 0, + /* displayDiagnostics */ 0); + + error = clang_parseTranslationUnit2FullArgv( + index, + /* source_filename */ empty_filename, + /* commandline_args */ pgarr_data(&clang_args), + /* num_commandline_args */ pgarr_size(&clang_args), + /* unsaved_files */ &empty, + /* num_unsaved_files */ 1, + CXTranslationUnit_SkipFunctionBodies, + &unit); + + /* normally parsing succeeds, except if there's some internal errors */ + if (error != CXError_Success) + { + fprintf(stderr, "failure while trying to parse %d\n", error); + exit(EXIT_FAILURE); + } + + /* display diagnostics, and fail if there are any warnings */ + if ((num_diagnostics = clang_getNumDiagnostics(unit)) != 0) + { + uint32 diag_display_opt = clang_defaultDiagnosticDisplayOptions(); + bool has_error = false; + + for (uint32 i = 0; i < num_diagnostics; i++) + { + CXDiagnostic diag = clang_getDiagnostic(unit, i); + CXString lstr; + const char *str; + + /* fail if there's even a warning */ + if (clang_getDiagnosticSeverity(diag) >= CXDiagnostic_Note) + has_error = true; + + lstr = clang_formatDiagnostic(diag, diag_display_opt); + + str = clang_getCString(lstr); + fprintf(stderr, "%s\n", str); + + clang_disposeString(lstr); + clang_disposeDiagnostic(diag); + } + + if (has_error) + { + fprintf(stderr, "Unable to parse translation unit\n"); + exit(EXIT_FAILURE); + } + } + + + /* + * Ok, finally ready to analyze. + */ + cursor = clang_getTranslationUnitCursor(unit); + + /* + * First collect elements of NodeTag, to determine for which struct types + * to collect information about. + */ + clang_visitChildren( + cursor, + find_NodeTag_vis, + &collect_info); + + /* + * Find the underlying types for the NodeTag elements where + * possible. + * + * There's a few node types where that's not possible, e.g. because + * they're defined a .c file. + */ + pgarr_set_all(&collect_info.interesting_node_types, + pgarr_size(&collect_info.interesting_node_typedefs), + 0); + clang_visitChildren( + cursor, + find_NodeStructs_vis, + &collect_info); + + /* then traverse again, to find the structs definitions for the types above */ + pgarr_set_all(&collect_info.node_type_strings, + pgarr_size(&collect_info.interesting_node_typedefs), + 0); + clang_visitChildren( + cursor, + find_NodeStructDefs_vis, + &collect_info); + + /* + * Collected all the necessary information, print it out to the output + * file. + */ + appendStringInfoString(&file_contents, "\n#include \"nodes/nodeinfo.h\"\n\n"); + fwrite(file_contents.data, file_contents.len, 1, output); + + first = true; + fprintf(output, "const TINodeType ti_node_types[] = {\n"); + for (size_t i = 0; i < pgarr_size(&collect_info.node_type_strings); i++) + { + const char *s = *pgarr_at(&collect_info.node_type_strings, i); + + if (!first) + fprintf(output, ",\n"); + else + first = false; + + if (s) + fprintf(output, "\t%s", s); + else + fprintf(output, "\t{0}"); + } + fprintf(output, "\n};\n\n"); + + first = true; + fprintf(output, "const TIStructField ti_struct_fields[] = {\n"); + for (size_t i = 0; i < pgarr_size(&collect_info.struct_field_strings); i++) + { + const char *s = *pgarr_at(&collect_info.struct_field_strings, i); + + if (!first) + fprintf(output, ",\n"); + else + first = false; + + fprintf(output, "\t%s", s); + } + fprintf(output, "\n};\n\n"); + + first = true; + fprintf(output, "const TIEnum ti_enums[] = {\n"); + for (size_t i = 0; i < pgarr_size(&collect_info.enum_strings); i++) + { + const char *s = *pgarr_at(&collect_info.enum_strings, i); + + if (!first) + fprintf(output, ",\n"); + else + first = false; + + fprintf(output, "\t%s", s); + } + fprintf(output, "\n};\n\n"); + + first = true; + fprintf(output, "const TIEnumField ti_enum_fields[] = {\n"); + for (size_t i = 0; i < pgarr_size(&collect_info.enum_field_strings); i++) + { + const char *s = *pgarr_at(&collect_info.enum_field_strings, i); + + if (!first) + fprintf(output, ",\n"); + else + first = false; + + fprintf(output, "\t%s", s); + } + fprintf(output, "\n};\n\n"); + + first = true; + fprintf(output, "const TIString ti_strings[] = {\n"); + for (size_t i = 0; i < pgarr_size(&collect_info.strtab); i++) + { + const char *s = *pgarr_at(&collect_info.strtab, i); + + if (!first) + fprintf(output, ",\n"); + else + first = false; + + fprintf(output, "\t{.length = sizeof(\"%s\") - 1, .string = \"%s\"}", s, s); + } + fprintf(output, "\n};\n"); + + clang_disposeTranslationUnit(unit); + clang_disposeIndex(index); + + exit(EXIT_SUCCESS); +} diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 08f3491cba..a51ee0b47d 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -4347,7 +4347,7 @@ outNode(StringInfo str, const void *obj) * returns the ascii representation of the Node as a palloc'd string */ char * -nodeToString(const void *obj) +nodeToStringOld(const void *obj) { StringInfoData str; diff --git a/src/backend/nodes/outfuncs_new.c b/src/backend/nodes/outfuncs_new.c new file mode 100644 index 0000000000..5d3541ba31 --- /dev/null +++ b/src/backend/nodes/outfuncs_new.c @@ -0,0 +1,513 @@ +#include "postgres.h" + +#include "common/shortest_dec.h" +#include "lib/stringinfo.h" +#include "miscadmin.h" +#include "nodes/nodes.h" +#include "nodes/nodeinfo.h" +#include "nodes/pg_list.h" +#include "nodes/primnodes.h" +#include "nodes/value.h" +#include "utils/datum.h" + + +typedef struct NodeOutContext +{ + StringInfoData str; +} NodeOutContext; + + +static void nodeout_new_rec(NodeOutContext *context, const Node *obj); +static void nodeout_fields(NodeOutContext *context, const Node *src, const TINodeType *type_info); +static void nodeout_list(NodeOutContext *context, const List *obj, NodeTag tag); +static void nodeout_field(NodeOutContext *context, const Node *obj, + const TINodeType *type_info, const TIStructField *field_info, + uint16 known_type_id, uint16 size, const void *ptr_src); +static void nodeout_value_union(NodeOutContext *context, const Value *src, NodeTag tag); +static void nodeout_bitmapset(NodeOutContext *context, const Bitmapset *bms); +static void nodeout_token(NodeOutContext *context, const char *s); + + +/* + * nodeToString - + * returns the ascii representation of the Node as a palloc'd string + */ +char * +nodeToString(const void *obj) +{ +#ifdef USE_NEW_NODE_FUNCS + return nodeToStringNew(obj); +#else + return nodeToStringOld(obj); +#endif +} + +char * +nodeToStringNew(const void *obj) +{ + NodeOutContext context = {0}; + + /* see stringinfo.h for an explanation of this maneuver */ + initStringInfo(&context.str); + + nodeout_new_rec(&context, obj); + + return context.str.data; +} + +static void +nodeout_new_rec(NodeOutContext *context, const Node *obj) +{ + const TINodeType *type_info; + NodeTag tag; + + if (obj == NULL) + { + appendStringInfoString(&context->str, "<>"); + return; + } + + tag = nodeTag(obj); + + /* Guard against stack overflow due to overly complex expressions */ + check_stack_depth(); + + switch (tag) + { + case T_List: + case T_OidList: + case T_IntList: + nodeout_list(context, (const List *) obj, tag); + return; + + default: + break; + } + + type_info = &ti_node_types[tag]; + + Assert(type_info->size > 0); + + appendStringInfoChar(&context->str, '{'); + appendBinaryStringInfo(&context->str, + ti_strings[type_info->name].string, + ti_strings[type_info->name].length); + appendStringInfoChar(&context->str, ' '); + appendStringInfoInt32(&context->str, (int) tag); + + nodeout_fields(context, obj, type_info); + + appendStringInfoChar(&context->str, '}'); +} + +static void +nodeout_field(NodeOutContext *context, const Node *obj, + const TINodeType *type_info, const TIStructField *field_info, + uint16 known_type_id, uint16 size, + const void *ptr_src) +{ + Assert(known_type_id != TYPE_ID_UNKNOWN); + Assert(size != TYPE_SIZE_UNKNOWN); + + switch (known_type_id) + { + case KNOWN_TYPE_UINT16: + appendStringInfoUInt32(&context->str, *(const uint16 *) ptr_src); + break; + case KNOWN_TYPE_OPFUNCID: + case KNOWN_TYPE_OID: + case KNOWN_TYPE_UINT32: + appendStringInfoUInt32(&context->str, *(const uint32 *) ptr_src); + break; + case KNOWN_TYPE_UINT64: + appendStringInfoUInt64(&context->str, *(const uint64 *) ptr_src); + break; + + case KNOWN_TYPE_INT16: + appendStringInfoInt32(&context->str, *(const int16 *) ptr_src); + break; + case KNOWN_TYPE_LOCATION: + case KNOWN_TYPE_INT32: + appendStringInfoInt32(&context->str, *(const int32 *) ptr_src); + break; + case KNOWN_TYPE_INT64: + appendStringInfoInt64(&context->str, *(const int64 *) ptr_src); + break; + + case KNOWN_TYPE_FLOAT32: + appendStringInfoFloat(&context->str, *(const float *) ptr_src); + break; + case KNOWN_TYPE_FLOAT64: + appendStringInfoDouble(&context->str, *(const double *) ptr_src); + break; + + case KNOWN_TYPE_BOOL: + appendStringInfoString(&context->str, *(const bool *) ptr_src ? "true" : "false"); + break; + + case KNOWN_TYPE_CHAR: + { + char c = *(const char *) ptr_src; + + if (c == 0) + appendStringInfoString(&context->str, "<>"); + else if (!isalnum((unsigned char) c)) + { + appendStringInfoChar(&context->str, '\\'); + appendStringInfoChar(&context->str, c); + } + else + appendStringInfoChar(&context->str, c); + break; + } + + case KNOWN_TYPE_ENUM: + case KNOWN_TYPE_COERCIONFORM: + case KNOWN_TYPE_NODE_TAG: + { + const TIEnum *enum_info = &ti_enums[field_info->type_id]; + uint32 val = *(const uint32 *) ptr_src; + const TIString *sval = NULL; + int num_fields = enum_info->first_field_at + enum_info->num_fields; + + Assert(field_info->size >= 0); + + for (int i = enum_info->first_field_at; i < num_fields; i++) + { + const TIEnumField *enum_field_info = &ti_enum_fields[i]; + + if (enum_field_info->value == val) + { + sval = &ti_strings[enum_field_info->name]; + break; + } + } + + if (sval == NULL) + elog(ERROR, "unknown enum %s val %u", + ti_strings[enum_info->name].string, + val); + + /* enum name won't need escaping */ + appendBinaryStringInfo(&context->str, + sval->string, + sval->length); + break; + } + + case KNOWN_TYPE_DATUM: + { + const Const *csrc = castNode(Const, (Node *) obj); + + if (csrc->constisnull) + appendStringInfoString(&context->str, "<>"); + else + outDatum(&context->str, csrc->constvalue, csrc->constlen, csrc->constbyval); + + break; + } + + case KNOWN_TYPE_VALUE_UNION: + { + const Value *vsrc = (const Value *) obj; + + Assert(IsAValue(vsrc)); + + nodeout_value_union(context, vsrc, nodeTag(vsrc)); + break; + } + + case KNOWN_TYPE_NODE: + { + const TINodeType *sub_type_info; + NodeTag sub_tag; + + Assert(field_info->type_id != TYPE_ID_UNKNOWN); + + /* + * If at offset 0, this shares the NodeTag field with the + * parent class. Therefore we have to rely on the declared + * type. + */ + if (field_info->offset == 0) + sub_tag = field_info->type_id; + else + { + sub_tag = nodeTag(ptr_src); + Assert(ti_node_types[sub_tag].size == + ti_node_types[field_info->type_id].size); + } + + sub_type_info = &ti_node_types[sub_tag]; + + appendStringInfoChar(&context->str, '{'); + appendBinaryStringInfo(&context->str, + ti_strings[sub_type_info->name].string, + ti_strings[sub_type_info->name].length); + appendStringInfoChar(&context->str, ' '); + appendStringInfoInt32(&context->str, (int) sub_tag); + + nodeout_fields(context, + (const Node *) ptr_src, + sub_type_info); + + appendStringInfoChar(&context->str, '}'); + + break; + } + + + case KNOWN_TYPE_P_CHAR: + if (*(const char **) ptr_src == NULL) + appendStringInfoString(&context->str, "<>"); + else + { + const char* s_src = *(const char **) ptr_src; + + /* + * Need to quote to allow distinguishing a NULL string and a + * zero length string (i.e. starting with '\0'). We use + * nodeout_token() to provide escaping of the string's + * content, but we don't want it to do anything with an empty + * string, as it'd output <>. + */ + appendStringInfoChar(&context->str, '"'); + if (s_src[0] != '\0') + nodeout_token(context, s_src); + appendStringInfoChar(&context->str, '"'); + } + break; + + case KNOWN_TYPE_P_PGARR: + if (*(const PgArrBase **) ptr_src == NULL) + appendStringInfoString(&context->str, "<>"); + else + { + const PgArrBase *arr_src = *(const PgArrBase **) ptr_src; + + Assert(field_info->elem_size > 0); + + appendStringInfoUInt32(&context->str, pgarr_size(arr_src)); + appendStringInfoChar(&context->str, ' '); + for (int i = 0; i < pgarr_size(arr_src); i++) + { + nodeout_field(context, NULL, type_info, field_info, + field_info->elem_known_type_id, field_info->elem_size, + ((char *) arr_src->elementsp) + field_info->elem_size * i); + appendStringInfoChar(&context->str, ' '); + } + } + + break; + + case KNOWN_TYPE_P_NODE: + if (*(const Node **) ptr_src == NULL) + appendStringInfoString(&context->str, "<>"); + else + nodeout_new_rec(context, *(const Node **) ptr_src); + break; + + case KNOWN_TYPE_P_BITMAPSET: + if (*(const Bitmapset **) ptr_src == NULL) + appendStringInfoString(&context->str, "<>"); + else + { + const Bitmapset *bs_src = *(const Bitmapset **) ptr_src; + + nodeout_bitmapset(context, bs_src); + } + break; + + default: + elog(ERROR, "don't know how to copy field %s %s->%s", + ti_strings[field_info->type].string, + ti_strings[type_info->name].string, + ti_strings[field_info->name].string); + break; + } +} + +static void +nodeout_fields(NodeOutContext *context, const Node *src, const TINodeType *type_info) +{ + const TIStructField *field_info = &ti_struct_fields[type_info->first_field_at]; + + for (int i = 0; i < type_info->num_fields; i++, field_info++) + { + // FIXME: ExtensibleNode needs to call callbacks, or be reimplemented + + if (field_info->flags & TYPE_OUT_IGNORE) + continue; + + appendStringInfoString(&context->str, " :"); + appendBinaryStringInfo(&context->str, + ti_strings[field_info->name].string, + ti_strings[field_info->name].length); + appendStringInfoChar(&context->str, ' '); + + nodeout_field(context, src, type_info, field_info, + field_info->known_type_id, field_info->size, + (char *) src + field_info->offset); + } +} + +static void +nodeout_list(NodeOutContext *context, const List *src, NodeTag tag) +{ + appendStringInfoChar(&context->str, '('); + + /* + * Note that we always output the separator, even in the first loop + * iteration. The read routines rely on the output starting with "i ", "o + * ", or " {node data}", which is achieved by always outputting space. + */ + switch (tag) + { + case T_List: + for (int i = 0; i < src->length; i++) + { + appendStringInfoChar(&context->str, ' '); + + nodeout_new_rec(context, lfirst(&src->elements[i])); + } + break; + + case T_OidList: + appendStringInfoChar(&context->str, 'o'); + for (int i = 0; i < src->length; i++) + { + appendStringInfoChar(&context->str, ' '); + + appendStringInfoUInt32(&context->str, + lfirst_oid(&src->elements[i])); + } + break; + + case T_IntList: + appendStringInfoChar(&context->str, 'i'); + for (int i = 0; i < src->length; i++) + { + appendStringInfoChar(&context->str, ' '); + + appendStringInfoUInt32(&context->str, + lfirst_int(&src->elements[i])); + } + break; + + default: + pg_unreachable(); + } + + appendStringInfoChar(&context->str, ')'); +} + +static void +nodeout_value_union(NodeOutContext *context, const Value *src, NodeTag tag) +{ + switch (tag) + { + case T_Integer: + appendStringInfoInt32(&context->str, src->val.ival); + break; + + case T_Float: + /* + * We assume the value is a valid numeric literal and so does not + * need quoting. + */ + appendStringInfoString(&context->str, src->val.str); + break; + + case T_String: + /* + * Need to quote to allow distinguishing a NULL string and a zero + * length string (i.e. starting with '\0'). We use + * nodeout_token() to provide escaping of the string's content, + * but we don't want it to do anything with an empty string, as + * it'd output <>. + */ + appendStringInfoChar(&context->str, '"'); + if (src->val.str[0] != '\0') + nodeout_token(context, src->val.str); + appendStringInfoChar(&context->str, '"'); + break; + + case T_BitString: + /* internal representation already has leading 'b' */ + appendStringInfoString(&context->str, src->val.str); + break; + + case T_Null: + /* this is seen only within A_Const, not in transformed trees */ + appendStringInfoString(&context->str, "<>"); + break; + + default: + Assert(false); + pg_unreachable(); + } +} + +/* + * nodeout_bitmapset - + * converts a bitmap set of integers + * + * Note: the output format is "(b int int ...)", similar to an integer List. + */ +static void +nodeout_bitmapset(NodeOutContext *context, const Bitmapset *bms) +{ + int x; + + appendStringInfoChar(&context->str, '('); + appendStringInfoChar(&context->str, 'b'); + x = -1; + while ((x = bms_next_member(bms, x)) >= 0) + { + appendStringInfoChar(&context->str, ' '); + appendStringInfoInt32(&context->str, x); + } + appendStringInfoChar(&context->str, ')'); +} + + +/* + * nodeout_token + * Convert an ordinary string (eg, an identifier) into a form that + * will be decoded back to a plain token by read.c's functions. + * + * If a null or empty string is given, it is encoded as "<>". + */ +static void +nodeout_token(NodeOutContext *context, const char *s) +{ + if (s == NULL || *s == '\0') + { + appendStringInfoString(&context->str, "<>"); + return; + } + + /* + * Look for characters or patterns that are treated specially by read.c + * (either in pg_strtok() or in nodeRead()), and therefore need a + * protective backslash. + */ +#ifdef NOT_ANYMORE + /* These characters only need to be quoted at the start of the string */ + if (*s == '<' || + *s == '"' || + isdigit((unsigned char) *s) || + ((*s == '+' || *s == '-') && + (isdigit((unsigned char) s[1]) || s[1] == '.'))) + appendStringInfoChar(&context->str, '\\'); +#endif + while (*s) + { + /* These chars must be backslashed anywhere in the string */ + if (*s == ' ' || *s == '\n' || *s == '\t' || + *s == '(' || *s == ')' || *s == '{' || *s == '}' || + *s == '\\') + appendStringInfoChar(&context->str, '\\'); + appendStringInfoChar(&context->str, *s++); + } +} diff --git a/src/backend/nodes/read.c b/src/backend/nodes/read.c index fdf68fdcae..81013a9337 100644 --- a/src/backend/nodes/read.c +++ b/src/backend/nodes/read.c @@ -45,7 +45,7 @@ bool restore_location_fields = false; * in builds with the WRITE_READ_PARSE_PLAN_TREES debugging flag set. */ static void * -stringToNodeInternal(const char *str, bool restore_loc_fields) +stringToNodeInternalOld(const char *str, bool restore_loc_fields) { void *retval; const char *save_strtok; @@ -86,17 +86,17 @@ stringToNodeInternal(const char *str, bool restore_loc_fields) * Externally visible entry points */ void * -stringToNode(const char *str) +stringToNodeOld(const char *str) { - return stringToNodeInternal(str, false); + return stringToNodeInternalOld(str, false); } #ifdef WRITE_READ_PARSE_PLAN_TREES void * -stringToNodeWithLocations(const char *str) +stringToNodeWithLocationsOld(const char *str) { - return stringToNodeInternal(str, true); + return stringToNodeInternalOld(str, true); } #endif diff --git a/src/backend/nodes/readfuncs_new.c b/src/backend/nodes/readfuncs_new.c new file mode 100644 index 0000000000..da0ac91247 --- /dev/null +++ b/src/backend/nodes/readfuncs_new.c @@ -0,0 +1,749 @@ +#include "postgres.h" + +#include "common/shortest_dec.h" +#include "common/string.h" +#include "lib/stringinfo.h" +#include "miscadmin.h" +#include "nodes/nodes.h" +#include "nodes/nodeinfo.h" +#include "nodes/pg_list.h" +#include "nodes/primnodes.h" +#include "nodes/value.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/int8.h" + + +typedef struct NodeInContext +{ + bool restore_locations; + + const char *str; + const char *cur; +} NodeInContext; + +static void *nodein_read(NodeInContext *context, const char *token, int tok_len); +static const char *nodein_strtok(NodeInContext *context, int *token_length); +static Node *nodein_read_node(NodeInContext *context); + +static void nodein_fields(NodeInContext *context, const TINodeType *type_info, Node* dst); +static void nodein_field(NodeInContext *context, Node *obj, + const TINodeType *type_info, const TIStructField *field_info, + uint16 known_type_id, uint16 size, void *ptr_dst); +static List *nodein_list(NodeInContext *context, const char *token, int token_length); +static char * nodein_debackslash(NodeInContext *context, const char *token, int token_length); +static Datum nodein_datum(NodeInContext *context, bool typbyval, const char *token, int token_length); +static Bitmapset *nodein_bitmapset(NodeInContext *context, const char *token, int token_length); +static void nodein_value_union(NodeInContext *context, Value *dst, const char *token, int token_length); +static void nodein_enum(NodeInContext *context, uint16 type_id, void *ptr_dst, const char *token, int token_length); + + +void * +stringToNode(const char *str) +{ +#ifdef USE_NEW_NODE_FUNCS + return stringToNodeNew(str); +#else + return stringToNodeOld(str); +#endif +} + +#ifdef WRITE_READ_PARSE_PLAN_TREES +void * +stringToNodeWithLocations(const char *str) +{ +#ifdef USE_NEW_NODE_FUNCS + return stringToNodeWithLocationsNew(str); +#else + return stringToNodeWithLocationsOld(str); +#endif +} +#endif + +static void * +stringToNodeNewInternal(const char *str, bool restore_locations) +{ + NodeInContext context = {.str = str, + .cur = str, + .restore_locations = restore_locations}; + + return nodein_read(&context, NULL, 0); +} + +void * +stringToNodeNew(const char *str) +{ + return stringToNodeNewInternal(str, false); +} + +#ifdef WRITE_READ_PARSE_PLAN_TREES +void * +stringToNodeWithLocationsNew(const char *str) +{ + return stringToNodeNewInternal(str, true); +} +#endif + +static void * +nodein_read(NodeInContext *context, const char *token, int token_length) +{ + Node *result; + + /* Guard against stack overflow due to overly complex expressions */ + check_stack_depth(); + + if (token == NULL) /* need to read a token? */ + { + token = nodein_strtok(context, &token_length); + + if (token == NULL) /* end of input */ + return NULL; + } + + if (token_length == 0) + return NULL; + else if (token[0] == '{') + { + Assert(token_length == 1); /* cf nodein_strtok */ + result = nodein_read_node(context); + token = nodein_strtok(context, &token_length); + if (token_length != 1 || token[0] != '}') + elog(ERROR, "did not find '}' at end of input node"); + return result; + } + else if (token[0] == '(') + { + Assert(token_length == 1); /* cf nodein_strtok */ + + return (Node *) nodein_list(context, token, token_length); + } + else + { + /* + * XXX: We used to accept strings (starting with "), integers + * (parsable integer), float (other numbers) and bitstrings (starting + * with b) here, mapping them to T_Value sub-types. + * + * That seemed awkward, especially issues like floating points being + * recognized as integers after a roundtrip, plain C strings not being + * discernible from Value nodes, and the overhead of more complex + * determination token type determination. + * + * If we want to re-introduce that, this'd probably be the best place + * to check for that, not going through the faster paths above. + */ + + elog(ERROR, "unrecognized token: \"%.*s\"", token_length, token); + } + + return NULL; +} + +static const char * +nodein_strtok(NodeInContext *context, int *token_length) +{ + const char *local_str = context->cur; /* working pointer to string */ + const char *ret_str; /* start of token to return */ + + while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t') + local_str++; + + if (*local_str == '\0') + { + *token_length = 0; + context->cur = local_str; + return NULL; /* no more tokens */ + } + + /* + * Now pointing at start of next token. + */ + ret_str = local_str; + + if (*local_str == '(' || *local_str == ')' || + *local_str == '{' || *local_str == '}') + { + /* special 1-character token */ + local_str++; + } + else + { + /* Normal token, possibly containing backslashes */ + while (*local_str != '\0' && + *local_str != ' ' && *local_str != '\n' && + *local_str != '\t' && + *local_str != '(' && *local_str != ')' && + *local_str != '{' && *local_str != '}') + { + if (*local_str == '\\' && local_str[1] != '\0') + local_str += 2; + else + local_str++; + } + } + + *token_length = local_str - ret_str; + + /* Recognize special case for "empty" token */ + if (*token_length == 2 && ret_str[0] == '<' && ret_str[1] == '>') + *token_length = 0; + + context->cur = local_str; + + return ret_str; +} + +static Node* +nodein_read_node(NodeInContext *context) +{ + const char *node_type; + const char *node_type_id_s; + NodeTag node_type_id; + int type_token_length; + int id_token_length; + const TINodeType *type_info; + Node *dst; + + /* + * Node types are always enclosed in {TypeName numeric-type-id ... }, + * the caller processes the curly parens. + */ + node_type = nodein_strtok(context, &type_token_length); + + if (unlikely(type_token_length == 0)) + elog(ERROR, "unexpected zero length token"); + + node_type_id_s = nodein_strtok(context, &id_token_length); + if (unlikely(type_token_length == 0)) + elog(ERROR, "unexpected zero length token"); + + node_type_id = atoi(node_type_id_s); + + // FIXME: check ti_* boundaries + type_info = &ti_node_types[node_type_id]; + + if (strncmp(node_type, ti_strings[type_info->name].string, type_token_length) != 0) + { + elog(ERROR, "unrecognized: %s vs %s", + pnstrdup(node_type, type_token_length), ti_strings[type_info->name].string); + } + + dst = palloc0(type_info->size); + dst->type = node_type_id; + + nodein_fields(context, type_info, dst); + + return dst; +} + +static void +nodein_fields(NodeInContext *context, const TINodeType *type_info, Node* dst) +{ + const TIStructField *field_info = &ti_struct_fields[type_info->first_field_at]; + + /* Guard against stack overflow due to overly complex expressions */ + check_stack_depth(); + + for (int i = 0; i < type_info->num_fields; i++, field_info++) + { + const char *token; + int token_length; + + // FIXME: ExtensibleNode needs to call callbacks, or be reimplemented + + if (field_info->flags & (TYPE_IN_IGNORE | TYPE_OUT_IGNORE)) + continue; + + /* read (which is prefixed with :) and verify field name */ + // XXX: should we do that? The old code didn't, but it seems to add a + // lot of robustness + token = nodein_strtok(context, &token_length); + Assert(token_length > 1); + /* skipping over : */ + Assert(token_length -1 == ti_strings[field_info->name].length); + Assert(memcmp(token + 1, ti_strings[field_info->name].string, token_length - 1) == 0); + + nodein_field(context, dst, type_info, field_info, + field_info->known_type_id, field_info->size, + (char *) dst + field_info->offset); + } +} + +static List * +nodein_list(NodeInContext *context, const char *token, int token_length) +{ + List *l = NIL; + + /*---------- + * Could be an integer list: (i int int ...) + * or an OID list: (o int int ...) + * or a list of nodes/values: (node node ...) + *---------- + */ + token = nodein_strtok(context, &token_length); + if (token == NULL) + elog(ERROR, "unterminated List structure"); + if (token_length == 1 && token[0] == 'i') + { + /* List of integers */ + for (;;) + { + int val; + char *endptr; + + token = nodein_strtok(context, &token_length); + if (token == NULL) + elog(ERROR, "unterminated List structure"); + if (token[0] == ')') + break; + val = (int) strtol(token, &endptr, 10); + if (endptr != token + token_length) + elog(ERROR, "unrecognized integer: \"%.*s\"", + token_length, token); + l = lappend_int(l, val); + } + } + else if (token_length == 1 && token[0] == 'o') + { + /* List of OIDs */ + for (;;) + { + Oid val; + char *endptr; + + token = nodein_strtok(context, &token_length); + if (token == NULL) + elog(ERROR, "unterminated List structure"); + if (token[0] == ')') + break; + val = (Oid) strtoul(token, &endptr, 10); + if (endptr != token + token_length) + elog(ERROR, "unrecognized OID: \"%.*s\"", + token_length, token); + l = lappend_oid(l, val); + } + } + else + { + /* List of other node types */ + for (;;) + { + /* We have already scanned next token... */ + if (token[0] == ')') + break; + l = lappend(l, nodein_read(context, token, token_length)); + token = nodein_strtok(context, &token_length); + if (token == NULL) + elog(ERROR, "unterminated List structure"); + } + } + + return l; +} + +static void +nodein_field(NodeInContext *context, Node *obj, + const TINodeType *type_info, const TIStructField *field_info, + uint16 known_type_id, uint16 size, void *ptr_dst) +{ + const char *token; + int token_length; + + Assert(known_type_id != TYPE_ID_UNKNOWN); + Assert(size != TYPE_SIZE_UNKNOWN); + + token = nodein_strtok(context, &token_length); /* get field value */ + + switch (known_type_id) + { + case KNOWN_TYPE_UINT16: + *(uint16 *) ptr_dst = (uint16) strtoul(token, NULL, 10); + break; + case KNOWN_TYPE_OPFUNCID: + case KNOWN_TYPE_OID: + case KNOWN_TYPE_UINT32: + *(uint32 *) ptr_dst = (uint32) strtoul(token, NULL, 10); + break; + case KNOWN_TYPE_UINT64: + // FIXME: pnstrdup + *(uint64 *) ptr_dst = (uint64) pg_strtouint64(pnstrdup(token, token_length), NULL, 10); + break; + + case KNOWN_TYPE_LOCATION: + /* + * Parse location fields are written out by outfuncs.c, but only + * for debugging use. When reading a location field, we normally + * discard the stored value and set the location field to -1 (ie, + * "unknown"). This is because nodes coming from a stored rule + * should not be thought to have a known location in the current + * query's text. However, if restore_location_fields is true, we + * do restore location fields from the string. This is currently + * intended only for use by the WRITE_READ_PARSE_PLAN_TREES test + * code, which doesn't want to cause any change in the node + * contents. + */ +#ifdef WRITE_READ_PARSE_PLAN_TREES + if (context->restore_locations) + *(uint32 *) ptr_dst = atoi(token); + else +#endif + { + *(uint32 *) ptr_dst = -1; + } + break; + + case KNOWN_TYPE_INT16: + *(uint16 *) ptr_dst = atoi(token); + break; + case KNOWN_TYPE_INT32: + *(uint32 *) ptr_dst = atoi(token); + break; + case KNOWN_TYPE_INT64: + // FIXME: pnstrdup + scanint8(pnstrdup(token, token_length), false, (int64 *) ptr_dst); + break; + + case KNOWN_TYPE_FLOAT32: + *(float *) ptr_dst = strtof(token, NULL); + break; + case KNOWN_TYPE_FLOAT64: + *(double *) ptr_dst = strtod(token, NULL); + break; + + case KNOWN_TYPE_BOOL: + if (token[0] == 't') + { + Assert(strncmp(token, "true", token_length) == 0); + *(bool *) ptr_dst = true; + } + else + { + Assert(strncmp(token, "false", token_length) == 0); + *(bool *) ptr_dst = false; + } + break; + + case KNOWN_TYPE_CHAR: + /* avoid overhead of calling debackslash() for one char */ + if (token_length == 0) + *(char *) ptr_dst = '\0'; + else if (token_length == 2) + { + if (token[0] != '\\') + elog(ERROR, "invalid escape %c", token[0]); + *(char *) ptr_dst = token[1]; + } + else if (token_length == 1) + *(char *) ptr_dst = token[0]; + else + elog(ERROR, "invalid char length %d", token_length); + break; + + case KNOWN_TYPE_DATUM: + { + Const *cobj = castNode(Const, (Node *) obj); + + Assert(&cobj->constvalue == ptr_dst); + + if (cobj->constisnull) + { + /* skip "<>" */ + if (token == NULL || token_length != 0) + elog(ERROR, "expected <>"); + } + else + cobj->constvalue = nodein_datum(context, cobj->constbyval, token, token_length); + + break; + } + + case KNOWN_TYPE_VALUE_UNION: + { + Value *vobj = (Value *) obj; + + Assert(IsAValue(vobj)); + + nodein_value_union(context, vobj, token, token_length); + + break; + } + + case KNOWN_TYPE_ENUM: + case KNOWN_TYPE_COERCIONFORM: + case KNOWN_TYPE_NODE_TAG: + Assert(size == sizeof(int)); + nodein_enum(context, field_info->type_id, ptr_dst, token, token_length); + break; + + case KNOWN_TYPE_NODE: + { + const TINodeType *sub_type_info; + NodeTag sub_tag; + + Assert(field_info->type_id != TYPE_ID_UNKNOWN); + + /* sub-types are always enclosed in {TypeName numeric-type-id ... } */ + if (token_length != 1 || token[0] != '{') + elog(ERROR, "did not find '{' at the start of embedded node"); + + /* read TypeName */ + token = nodein_strtok(context, &token_length); + /* read numeric-type-id */ + token = nodein_strtok(context, &token_length); + + /* + * If at offset 0, this shares the NodeTag field with the + * parent class. Therefore we have to rely on the declared + * type. + */ + if (field_info->offset != 0) + { + sub_tag = atoi(token); + ((Node *) ptr_dst)->type = sub_tag; + } + else + { + sub_tag = field_info->type_id; + } + + sub_type_info = &ti_node_types[sub_tag]; + + nodein_fields(context, + sub_type_info, + (Node *) ptr_dst); + + /* read } */ + token = nodein_strtok(context, &token_length); + if (token_length != 1 || token[0] != '}') + elog(ERROR, "did not find '}' at the end of embedded node"); + break; + } + + case KNOWN_TYPE_P_PGARR: + { + PgArrBase *arr; + size_t arr_length; + + if (token_length == 0) + break; + + Assert(field_info->elem_size > 0); + + arr_length = (uint32) strtoul(token, NULL, 10); + arr = pgarr_helper_alloc(field_info->elem_size, + arr_length); + arr->size = arr_length; + + for (int i = 0; i < arr_length; i++) + { + nodein_field(context, NULL, + type_info, field_info, + field_info->elem_known_type_id, + field_info->elem_size, + (char *) arr->elementsp + field_info->elem_size * i); + } + + *(PgArrBase **) ptr_dst = arr; + + break; + } + + case KNOWN_TYPE_P_NODE: + if (token_length == 0) + break; + + *(Node **) ptr_dst = nodein_read(context, token, token_length); + + break; + + case KNOWN_TYPE_P_CHAR: + if (token_length == 0) + break; + + if (token_length < 2 || token[0] != '"' || token[token_length - 1] != '"') + elog(ERROR, "missing quotes"); + *(char **) ptr_dst = nodein_debackslash(context, token + 1, token_length - 2); + + break; + + case KNOWN_TYPE_P_BITMAPSET: + if (token_length == 0) + break; + + *(Bitmapset **) ptr_dst = nodein_bitmapset(context, token, token_length); + break; + + default: + elog(PANIC, "don't know how to output type %d", (int) known_type_id); + } +} + +/* + * nodein_datum + * + * Given a string representation of a Datum, recreate the appropriate + * Datum. The string representation embeds length info, but not byValue, + * so we must be told that. + */ +static Datum +nodein_datum(NodeInContext *context, bool typbyval, const char *token, int token_length) +{ + Size length, + i; + Datum res; + char *s; + + /* + * read the actual length of the value + */ + length = (unsigned int) strtoul(token, NULL, 10); + + token = nodein_strtok(context, &token_length); /* read the '[' */ + if (token_length != 1 || token[0] != '[') + elog(ERROR, "expected \"[\" to start datum, but got \"%s\"; length = %zu", + token ? pnstrdup(token, token_length) : "[NULL]", length); + + if (typbyval) + { + if (length > (Size) sizeof(Datum)) + elog(ERROR, "byval datum but length = %zu", length); + res = (Datum) 0; + s = (char *) (&res); + for (i = 0; i < (Size) sizeof(Datum); i++) + { + token = nodein_strtok(context, &token_length); + s[i] = (char) atoi(token); + } + } + else if (length <= 0) + res = (Datum) NULL; + else + { + s = (char *) palloc(length); + for (i = 0; i < length; i++) + { + token = nodein_strtok(context, &token_length); + s[i] = (char) atoi(token); + } + res = PointerGetDatum(s); + } + + token = nodein_strtok(context, &token_length); /* read the ']' */ + if (token_length != 1 || token[0] != ']') + elog(ERROR, "expected \"]\" to end datum, but got \"%s\"; length = %zu", + token ? pnstrdup(token, token_length) : "[NULL]", length); + + return res; +} + +static Bitmapset * +nodein_bitmapset(NodeInContext *context, const char *token, int token_length) +{ + Bitmapset *result = NULL; + + if (token == NULL) + elog(ERROR, "incomplete Bitmapset structure"); + if (token_length != 1 || token[0] != '(') + elog(ERROR, "unrecognized token: \"%.*s\"", token_length, token); + + token = nodein_strtok(context, &token_length); + if (token == NULL) + elog(ERROR, "incomplete Bitmapset structure"); + if (token_length != 1 || token[0] != 'b') + elog(ERROR, "unrecognized token: \"%.*s\"", token_length, token); + + for (;;) + { + int val; + char *endptr; + + token = nodein_strtok(context, &token_length); + if (token == NULL) + elog(ERROR, "unterminated Bitmapset structure"); + if (token_length == 1 && token[0] == ')') + break; + val = (int) strtol(token, &endptr, 10); + if (endptr != token + token_length) + elog(ERROR, "unrecognized integer: \"%.*s\"", token_length, token); + result = bms_add_member(result, val); + } + + return result; +} + +static void +nodein_value_union(NodeInContext *context, Value *dst, const char *token, int token_length) +{ + switch (dst->type) + { + case T_Null: + /* skip over <> */ + break; + + case T_Integer: + dst->val.ival = atoi(token); + break; + + case T_Float: + dst->val.str = pnstrdup(token, token_length); + break; + + case T_String: + /* need to remove leading and trailing quotes, and backslashes */ + if (unlikely(token_length < 2 || + token[0] != '"' || + token[token_length - 1] != '"')) + elog(ERROR, "invalid string"); + dst->val.str = nodein_debackslash(context, token + 1, token_length - 2); + break; + + case T_BitString: + /* skip leading 'b' */ + dst->val.str = pnstrdup(token, token_length); + break; + + default: + Assert(false); + pg_unreachable(); + } +} + +static void +nodein_enum(NodeInContext *context, uint16 type_id, void *ptr_dst, const char *token, int token_length) +{ + const TIEnum *enum_info = &ti_enums[type_id]; + int num_fields = enum_info->first_field_at + enum_info->num_fields; + + for (int i = enum_info->first_field_at; i < num_fields; i++) + { + const TIEnumField *cur_field_info = &ti_enum_fields[i]; + + if (ti_strings[cur_field_info->name].length == token_length && + strncmp(ti_strings[cur_field_info->name].string, token, token_length) == 0) + { + memcpy(ptr_dst, &cur_field_info->value, sizeof(int)); + return; + } + } + + elog(ERROR, "unknown enum %s val %s", + ti_strings[enum_info->name].string, + pnstrdup(token, token_length)); +} + +static char * +nodein_debackslash(NodeInContext *context, const char *token, int token_length) +{ + char *result = palloc(token_length + 1); + char *ptr = result; + + while (token_length > 0) + { + if (*token == '\\' && token_length > 1) + token++, token_length--; + *ptr++ = *token++; + token_length--; + } + *ptr = '\0'; + return result; +} diff --git a/src/include/nodes/nodeinfo.h b/src/include/nodes/nodeinfo.h new file mode 100644 index 0000000000..b6f051ca9e --- /dev/null +++ b/src/include/nodes/nodeinfo.h @@ -0,0 +1,128 @@ +#ifndef PG_NODEINFO_H + +#define PG_NODEINFO_H + +#define TYPE_CAT_SCALAR (1U << 0) +#define TYPE_CAT_POINTER (1U << 1) +#define TYPE_CAT_INCOMPLETE (1U << 2) +#define TYPE_EQUAL_IGNORE (1U << 3) +#define TYPE_EQUAL_FORCE_SCALAR (1U << 4) +#define TYPE_COPY_IGNORE (1U << 5) +#define TYPE_COPY_FORCE_SCALAR (1U << 6) +#define TYPE_OUT_IGNORE (1U << 7) +#define TYPE_IN_IGNORE (1U << 8) + +#define TYPE_ID_UNKNOWN PG_UINT16_MAX +#define TYPE_SIZE_UNKNOWN PG_UINT16_MAX + +typedef enum TIKnownTypes +{ + KNOWN_TYPE_UNKNOWN, + + /* scalar types */ + KNOWN_TYPE_INT16, + KNOWN_TYPE_INT32, + KNOWN_TYPE_INT64, + KNOWN_TYPE_INT128, + KNOWN_TYPE_UINT16, + KNOWN_TYPE_OID, + KNOWN_TYPE_UINT32, + KNOWN_TYPE_UINT64, + KNOWN_TYPE_UINT128, + KNOWN_TYPE_FLOAT32, + KNOWN_TYPE_FLOAT64, + KNOWN_TYPE_BOOL, + KNOWN_TYPE_CHAR, + KNOWN_TYPE_ENUM, + KNOWN_TYPE_NODE_TAG, + KNOWN_TYPE_NODE, + KNOWN_TYPE_LOCATION, + KNOWN_TYPE_DATUM, + KNOWN_TYPE_VALUE_UNION, + KNOWN_TYPE_COERCIONFORM, + KNOWN_TYPE_OPFUNCID, + + /* pointer types */ + KNOWN_TYPE_P_CHAR, + KNOWN_TYPE_P_NODE, + KNOWN_TYPE_P_BITMAPSET, + + KNOWN_TYPE_P_INT16, + KNOWN_TYPE_P_INT32, + KNOWN_TYPE_P_INT64, + KNOWN_TYPE_P_INT128, + KNOWN_TYPE_P_UINT16, + KNOWN_TYPE_P_OID, + KNOWN_TYPE_P_UINT32, + KNOWN_TYPE_P_UINT64, + KNOWN_TYPE_P_UINT128, + KNOWN_TYPE_P_FLOAT32, + KNOWN_TYPE_P_FLOAT64, + KNOWN_TYPE_P_BOOL, + KNOWN_TYPE_P_ENUM, + KNOWN_TYPE_P_DATUM, + + KNOWN_TYPE_P_PGARR +} TIKnownTypes; + +typedef struct TINodeType +{ + /* struct name */ + uint16 name; + uint16 first_field_at; + uint16 num_fields; + /* allocation size, or TYPE_SIZE_UNKNOWN */ + uint16 size; +} TINodeType; + +typedef struct TIStructField +{ + /* struct field name */ + uint16 name; + uint16 type; + /* offset within the containing struct */ + uint16 offset; + /* allocation size, or TYPE_SIZE_UNKNOWN */ + uint16 size; + uint16 flags; + uint16 type_id; + uint16 known_type_id; + uint16 elem_known_type_id; + /* allocation size, or TYPE_SIZE_UNKNOWN */ + uint16 elem_size; +} TIStructField; + +typedef struct TIEnum +{ + /* name of enum */ + uint16 name; + uint16 first_field_at; + uint16 num_fields; + uint16 size; +} TIEnum; + +typedef struct TIEnumField +{ + uint16 name; + uint32 value; +} TIEnumField; + +/* + * XXX: Wasting a lot of space due to padding and pointer. Instead we could + * store all strings together, and use an offset pointer into that? + */ +typedef struct TIString +{ + uint16 length; + const char *const string; +} TIString; + +extern const TINodeType ti_node_types[]; +extern const TIStructField ti_struct_fields[]; +extern const TIEnum ti_enums[]; +extern const TIEnumField ti_enum_fields[]; +extern const TIString ti_strings[]; + +#define USE_NEW_NODE_FUNCS + +#endif /* PG_NODEINFO_H */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index c734b8d29c..e60a0f06a7 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -92,6 +92,7 @@ typedef enum NodeTag T_PlanRowMark, T_PartitionPruneInfo, T_PartitionedRelPruneInfo, + T_PartitionPruneStep, T_PartitionPruneStepOp, T_PartitionPruneStepCombine, T_PlanInvalItem, @@ -616,14 +617,20 @@ extern void outBitmapset(struct StringInfoData *str, extern void outDatum(struct StringInfoData *str, uintptr_t value, int typlen, bool typbyval); extern char *nodeToString(const void *obj); +extern char *nodeToStringOld(const void *obj); +extern char *nodeToStringNew(const void *obj); extern char *bmsToString(const struct Bitmapset *bms); /* * nodes/{readfuncs.c,read.c} */ extern void *stringToNode(const char *str); +extern void *stringToNodeNew(const char *str); +extern void *stringToNodeOld(const char *str); #ifdef WRITE_READ_PARSE_PLAN_TREES extern void *stringToNodeWithLocations(const char *str); +extern void *stringToNodeWithLocationsOld(const char *str); +extern void *stringToNodeWithLocationsNew(const char *str); #endif extern struct Bitmapset *readBitmapset(void); extern uintptr_t readDatum(bool typbyval); @@ -636,6 +643,8 @@ extern PGARR(AttrNumber) *readAttrNumberCols(void); * nodes/copyfuncs.c */ extern void *copyObjectImpl(const void *obj); +extern void *copyObjectImplOld(const void *obj); +extern void *copyObjectImplNew(const void *obj); /* cast result back to argument type, if supported by compiler */ #ifdef HAVE_TYPEOF @@ -648,6 +657,8 @@ extern void *copyObjectImpl(const void *obj); * nodes/equalfuncs.c */ extern bool equal(const void *a, const void *b); +extern bool nodes_equal_new(const void *a, const void *b); +extern bool nodes_equal_old(const void *a, const void *b); /* diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h index 409d840e79..666e20bdde 100644 --- a/src/include/nodes/pg_list.h +++ b/src/include/nodes/pg_list.h @@ -58,6 +58,9 @@ typedef struct List /* If elements == initial_elements, it's not a separate allocation */ } List; +typedef List OidList; +typedef List IntList; + /* * The *only* valid representation of an empty list is NIL; in other * words, a non-NIL list is guaranteed to have length >= 1. diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index a4ebd09521..6a6c0aabb2 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -199,13 +199,13 @@ typedef struct Const int32 consttypmod; /* typmod value, if any */ Oid constcollid; /* OID of collation, or InvalidOid if none */ int constlen; /* typlen of the constant's datatype */ - Datum constvalue; /* the constant's value */ bool constisnull; /* whether the constant is null (if true, * constvalue is undefined) */ bool constbyval; /* whether this datatype is passed by value. * If true, then all the information is stored * in the Datum. If false, then the Datum * contains a pointer to the information. */ + Datum constvalue; /* the constant's value */ Location location; /* token location, or -1 if unknown */ } Const; diff --git a/src/include/nodes/value.h b/src/include/nodes/value.h index 871ffa8fa9..d3d580cec5 100644 --- a/src/include/nodes/value.h +++ b/src/include/nodes/value.h @@ -49,6 +49,12 @@ typedef struct Value } val; } Value; +typedef Value Integer; +typedef Value Float; +typedef Value String; +typedef Value BitString; +typedef Value Null; + #define intVal(v) (((Value *)(v))->val.ival) #define floatVal(v) atof(((Value *)(v))->val.str) #define strVal(v) (((Value *)(v))->val.str) @@ -58,4 +64,12 @@ extern Value *makeFloat(char *numericStr); extern Value *makeString(char *str); extern Value *makeBitString(char *str); +static inline bool +IsAValue(const void *ptr) +{ + NodeTag tag = nodeTag(ptr); + + return tag == T_Integer || tag == T_Float || tag == T_String || + tag == T_BitString || tag == T_Null; +} #endif /* VALUE_H */ diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out index d01769299e..5cc85f03db 100644 --- a/src/test/regress/expected/rowsecurity.out +++ b/src/test/regress/expected/rowsecurity.out @@ -3455,7 +3455,7 @@ CREATE TABLE coll_t (c) AS VALUES ('bar'::text); CREATE POLICY coll_p ON coll_t USING (c < ('foo'::text COLLATE "C")); ALTER TABLE coll_t ENABLE ROW LEVEL SECURITY; GRANT SELECT ON coll_t TO regress_rls_alice; -SELECT (string_to_array(polqual, ':'))[7] AS inputcollid FROM pg_policy WHERE polrelid = 'coll_t'::regclass; +SELECT (regexp_match(polqual, ':(inputcollid[^:]*)'))[1] AS inputcollid FROM pg_policy WHERE polrelid = 'coll_t'::regclass; inputcollid ------------------ inputcollid 950 diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql index d7a5a36cf8..d93b67de5b 100644 --- a/src/test/regress/sql/rowsecurity.sql +++ b/src/test/regress/sql/rowsecurity.sql @@ -1386,7 +1386,7 @@ CREATE TABLE coll_t (c) AS VALUES ('bar'::text); CREATE POLICY coll_p ON coll_t USING (c < ('foo'::text COLLATE "C")); ALTER TABLE coll_t ENABLE ROW LEVEL SECURITY; GRANT SELECT ON coll_t TO regress_rls_alice; -SELECT (string_to_array(polqual, ':'))[7] AS inputcollid FROM pg_policy WHERE polrelid = 'coll_t'::regclass; +SELECT (regexp_match(polqual, ':(inputcollid[^:]*)'))[1] AS inputcollid FROM pg_policy WHERE polrelid = 'coll_t'::regclass; SET SESSION AUTHORIZATION regress_rls_alice; SELECT * FROM coll_t; ROLLBACK;