From: Andres Freund <andres@anarazel.de>
Date: Thu, 29 Aug 2019 17:14:07 +0000 (-0700)
Subject: WIP: Introduce compile-time node type metadata collection & reimplement node funcs.
X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=79cac70107220a4462fb5002e1f5a533c2a60b9a;p=users%2Fandresfreund%2Fpostgres.git

WIP: Introduce compile-time node type metadata collection & reimplement node funcs.

Author:
Reviewed-By:
Discussion: https://postgr.es/m/
Backpatch:
---

diff --git a/src/backend/Makefile b/src/backend/Makefile
index b03d5e510f..fdc2a05988 100644
--- a/src/backend/Makefile
+++ b/src/backend/Makefile
@@ -53,7 +53,7 @@ endif
 
 ##########################################################################
 
-all: submake-libpgport submake-catalog-headers submake-utils-headers postgres $(POSTGRES_IMP)
+all: submake-libpgport submake-catalog-headers submake-utils-headers submake-node-data postgres $(POSTGRES_IMP)
 
 ifneq ($(PORTNAME), cygwin)
 ifneq ($(PORTNAME), win32)
@@ -144,7 +144,11 @@ submake-catalog-headers:
 submake-utils-headers:
 	$(MAKE) -C utils distprep generated-header-symlinks
 
-.PHONY: submake-catalog-headers submake-utils-headers
+# run this unconditionally to avoid needing to know its dependencies here:
+submake-node-data:
+	$(MAKE) -C nodes distprep generated-node-data
+
+.PHONY: submake-catalog-headers submake-utils-headers submake-node-data
 
 # Make symlinks for these headers in the include directory. That way
 # we can cut down on the -I options. Also, a symlink is automatically
@@ -159,7 +163,7 @@ submake-utils-headers:
 
 .PHONY: generated-headers
 
-generated-headers: $(top_builddir)/src/include/parser/gram.h $(top_builddir)/src/include/storage/lwlocknames.h submake-catalog-headers submake-utils-headers
+generated-headers: $(top_builddir)/src/include/parser/gram.h $(top_builddir)/src/include/storage/lwlocknames.h submake-catalog-headers submake-utils-headers submake-node-data
 
 $(top_builddir)/src/include/parser/gram.h: parser/gram.h
 	prereqdir=`cd '$(dir $<)' >/dev/null && pwd` && \
diff --git a/src/backend/nodes/Makefile b/src/backend/nodes/Makefile
index 0b1e98c019..d9eaa0cf25 100644
--- a/src/backend/nodes/Makefile
+++ b/src/backend/nodes/Makefile
@@ -13,7 +13,61 @@ top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = nodeFuncs.o nodes.o list.o bitmapset.o tidbitmap.o \
-       copyfuncs.o equalfuncs.o extensible.o makefuncs.o \
-       outfuncs.o readfuncs.o print.o read.o params.o value.o
+       copyfuncs.o copyfuncs_new.o equalfuncs.o equalfuncs_new.o extensible.o \
+       makefuncs.o nodeinfo_data.o outfuncs.o outfuncs_new.o readfuncs.o \
+       readfuncs_new.o print.o read.o params.o value.o
+
+# node metadata generation
+GENNODE_FILES = nodes/primnodes.h \
+		nodes/pathnodes.h \
+		nodes/plannodes.h \
+		nodes/execnodes.h \
+		nodes/memnodes.h \
+		nodes/value.h \
+		nodes/pg_list.h \
+		nodes/extensible.h \
+		nodes/parsenodes.h \
+		nodes/replnodes.h \
+		nodes/supportnodes.h \
+		nodes/value.h \
+		utils/rel.h
+
+all: generated-node-data
+
+# FIXME: If we want to support doing this during cross compilation,
+# this'd need to be done using the host compiler
+gennodes.o: override CFLAGS += $(LLVM_CFLAGS)
+gennodes.o: override CPPFLAGS += $(LLVM_CPPFLAGS)
+gennodes: override LDFLAGS += -lclang
+
+gennodes: | submake-libpgport
+
+generated-node-data: $(top_srcdir)/src/backend/nodes/nodeinfo_data.c
+
+# metadata generation depends on the to be the processed headers
+$(top_srcdir)/src/backend/nodes/nodeinfo_data.c: \
+	$(addprefix $(top_srcdir)/src/include/, $(GENNODE_FILES))
+
+# But also on some other headers
+$(top_srcdir)/src/backend/nodes/nodeinfo_data.c: \
+	$(top_srcdir)/src/backend/nodes/gennodes.c \
+	$(top_srcdir)/src/include/nodes/nodeinfo.h
+
+# Order only dependency on gennodes, so gennodes only needs to get
+# only be built (and thus the LLVM dependency is only required in that
+# case) when the node metadata is out of date.
+#
+# FIXME: proper error message when LLVM isn't available
+#
+# XXX: It'd be nicer if we'd had some more granular check whether this
+# needs to be rebuilt, obviously there's plenty changes that wouldn't
+# matter.
+$(top_srcdir)/src/backend/nodes/nodeinfo_data.c: | gennodes
+	./gennodes \
+		--llvm-config $(LLVM_CONFIG) \
+		--output $(top_srcdir)/src/backend/nodes/nodeinfo_data.c \
+		$(GENNODE_FILES) \
+		-- \
+		$(CPPFLAGS) -Wno-ignored-attributes
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 26d2f467e0..690602654d 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -4733,15 +4733,8 @@ _copyForeignKeyCacheInfo(const ForeignKeyCacheInfo *from)
 	return newnode;
 }
 
-
-/*
- * copyObjectImpl -- implementation of copyObject(); see nodes/nodes.h
- *
- * Create a copy of a Node tree or list.  This is a "deep" copy: all
- * substructure is copied too, recursively.
- */
 void *
-copyObjectImpl(const void *from)
+copyObjectImplOld(const void *from)
 {
 	void	   *retval;
 
diff --git a/src/backend/nodes/copyfuncs_new.c b/src/backend/nodes/copyfuncs_new.c
new file mode 100644
index 0000000000..a080b7ce95
--- /dev/null
+++ b/src/backend/nodes/copyfuncs_new.c
@@ -0,0 +1,359 @@
+#include "postgres.h"
+
+#include "miscadmin.h"
+#include "nodes/nodes.h"
+#include "nodes/nodeinfo.h"
+#include "nodes/pg_list.h"
+#include "nodes/primnodes.h"
+#include "nodes/value.h"
+#include "utils/datum.h"
+
+
+typedef struct CopyNodeContext
+{
+	size_t		required_space;
+	size_t		used_space;
+	char	   *space;
+} CopyNodeContext;
+
+
+static Node* nodecopy_new_rec(CopyNodeContext *context, const Node *obj);
+static void nodecopy_fields(CopyNodeContext *context, Node *dst, const Node *src, const TINodeType *type_info);
+static List* nodecopy_list(CopyNodeContext *context, const List *obj, NodeTag tag);
+static void nodecopy_value_union(CopyNodeContext *context, Value *dst, const Value *src);
+
+
+#define BITMAPSET_SIZE(nwords)	\
+	(offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword))
+
+/*
+ * copyObjectImpl -- implementation of copyObject(); see nodes/nodes.h
+ *
+ * Create a copy of a Node tree or list.  This is a "deep" copy: all
+ * substructure is copied too, recursively.
+ */
+void *
+copyObjectImpl(const void *from)
+{
+#ifdef USE_NEW_NODE_FUNCS
+	return copyObjectImplNew(from);
+#else
+	return copyObjectImplOld(from);
+#endif
+}
+
+void * __attribute__((flatten))
+copyObjectImplNew(const void *obj)
+{
+	CopyNodeContext context = {0};
+
+	return nodecopy_new_rec(&context, obj);
+}
+
+static inline void*
+nodecopy_alloc(CopyNodeContext *context, size_t size, size_t align)
+{
+	return palloc(size);
+}
+
+static inline void*
+nodecopy_alloc0(CopyNodeContext *context, size_t size, size_t align)
+{
+	return palloc0(size);
+}
+
+static Node*
+nodecopy_new_rec(CopyNodeContext *context, const Node *obj)
+{
+	const TINodeType *type_info;
+	NodeTag tag;
+	Node *dst;
+
+	if (obj == NULL)
+		return NULL;
+
+	tag = nodeTag(obj);
+
+	/* Guard against stack overflow due to overly complex expressions */
+	check_stack_depth();
+
+	switch (tag)
+	{
+		case T_List:
+		case T_OidList:
+		case T_IntList:
+			return (Node *) nodecopy_list(context, (List *) obj, tag);
+		default:
+			break;
+	}
+
+	type_info = &ti_node_types[tag];
+
+	Assert(type_info->size != TYPE_SIZE_UNKNOWN);
+
+	dst = (Node *) nodecopy_alloc0(context, type_info->size, MAXIMUM_ALIGNOF);
+	dst->type = tag;
+
+	nodecopy_fields(context, dst, obj, type_info);
+
+	return dst;
+}
+
+static void
+nodecopy_fields(CopyNodeContext *context, Node *dst, const Node *src, const TINodeType *type_info)
+{
+	const TIStructField *field_info = &ti_struct_fields[type_info->first_field_at];
+
+	for (int i = 0; i < type_info->num_fields; i++, field_info++)
+	{
+		const void *src_field_ptr;
+		void *dst_field_ptr;
+
+		// FIXME: ExtensibleNode needs to call callbacks, or be reimplemented
+
+		if (field_info->flags & TYPE_COPY_IGNORE)
+			continue;
+
+		src_field_ptr = ((const char *) src + field_info->offset);
+		dst_field_ptr = ((char *) dst + field_info->offset);
+
+		switch (field_info->known_type_id)
+		{
+			/*
+			 * These could also be implemented via memcpy, but knowing size
+			 * ahead of time is faster
+			 */
+
+			case KNOWN_TYPE_UINT16:
+				*(uint16 *) dst_field_ptr = *(const uint16 *) src_field_ptr;
+				break;
+			case KNOWN_TYPE_OPFUNCID:
+			case KNOWN_TYPE_OID:
+			case KNOWN_TYPE_UINT32:
+				*(uint32 *) dst_field_ptr = *(const uint32 *) src_field_ptr;
+				break;
+			case KNOWN_TYPE_UINT64:
+				*(uint64 *) dst_field_ptr = *(const uint64 *) src_field_ptr;
+				break;
+
+			case KNOWN_TYPE_INT16:
+				*(int16 *) dst_field_ptr = *(const int16 *) src_field_ptr;
+				break;
+			case KNOWN_TYPE_LOCATION:
+			case KNOWN_TYPE_INT32:
+				*(int32 *) dst_field_ptr = *(const int32 *) src_field_ptr;
+				break;
+			case KNOWN_TYPE_INT64:
+				*(int64 *) dst_field_ptr = *(const int64 *) src_field_ptr;
+				break;
+
+			case KNOWN_TYPE_FLOAT32:
+				*(float *) dst_field_ptr = *(const float *) src_field_ptr;
+				break;
+			case KNOWN_TYPE_FLOAT64:
+				*(double *) dst_field_ptr = *(const double *) src_field_ptr;
+				break;
+
+			case KNOWN_TYPE_BOOL:
+				*(bool *) dst_field_ptr = *(const bool *) src_field_ptr;
+				break;
+
+			case KNOWN_TYPE_CHAR:
+				*(char *) dst_field_ptr = *(const char *) src_field_ptr;
+				break;
+
+			case KNOWN_TYPE_NODE:
+				{
+					const TINodeType *sub_type_info;
+					NodeTag sub_tag;
+
+					Assert(field_info->type_id != TYPE_ID_UNKNOWN);
+
+					if (field_info->offset == 0)
+						sub_tag = field_info->type_id;
+					else
+					{
+						sub_tag = nodeTag(src_field_ptr);
+
+						if (unlikely(ti_node_types[sub_tag].size != ti_node_types[field_info->type_id].size))
+						{
+							elog(ERROR, "%s size %d = %s %d failed",
+								 ti_strings[ti_node_types[sub_tag].name].string,
+								 ti_node_types[sub_tag].size,
+								 ti_strings[ti_node_types[field_info->type_id].name].string,
+								 ti_node_types[field_info->type_id].size);
+						}
+
+						Assert(ti_node_types[sub_tag].size ==
+							   ti_node_types[field_info->type_id].size);
+					}
+
+					sub_type_info = &ti_node_types[sub_tag];
+
+					nodecopy_fields(context,
+									(Node *) dst_field_ptr,
+									(const Node *) src_field_ptr,
+									sub_type_info);
+
+					break;
+				}
+
+			case KNOWN_TYPE_DATUM:
+				{
+					const Const *csrc = castNode(Const, (Node *) src);
+					Const *cdst = castNode(Const, (Node *) dst);
+
+					if (csrc->constbyval || csrc->constisnull)
+						cdst->constvalue = csrc->constvalue;
+					else
+						cdst->constvalue = datumCopy(csrc->constvalue,
+													 csrc->constbyval,
+													 csrc->constlen);
+
+					break;
+				}
+
+			case KNOWN_TYPE_VALUE_UNION:
+				{
+					const Value *vsrc = (const Value *) src;
+					Value *vdst = (Value *) dst;
+
+					Assert(IsAValue(vsrc) && IsAValue(vdst));
+
+					nodecopy_value_union(context, vdst, vsrc);
+
+					break;
+				}
+
+			case KNOWN_TYPE_P_PGARR:
+				if (*(const PgArrBase **) src_field_ptr != NULL)
+				{
+					const PgArrBase *arr_src = *(const PgArrBase **) src_field_ptr;
+					PgArrBase **arr_dst = (PgArrBase **) dst_field_ptr;
+
+					Assert(field_info->elem_size > 0);
+
+					*arr_dst = pgarr_helper_clone(arr_src, field_info->elem_size);
+				}
+				break;
+
+			case KNOWN_TYPE_P_NODE:
+				if (*(const Node **) src_field_ptr != NULL)
+					*(Node **) dst_field_ptr = nodecopy_new_rec(context, *(const Node **) src_field_ptr);
+				break;
+
+			case KNOWN_TYPE_P_CHAR:
+				if (*(char **) src_field_ptr != NULL)
+				{
+					size_t len = strlen(*(const char **) src_field_ptr) + 1;
+
+					*(char **) dst_field_ptr = nodecopy_alloc0(context, len, 1);
+					memcpy(*(char **) dst_field_ptr, *(const char **) src_field_ptr, len);
+				}
+				break;
+
+			case KNOWN_TYPE_P_BITMAPSET:
+				if (*(const char **) src_field_ptr != NULL)
+				{
+					const Bitmapset *bs_src = *(const Bitmapset **) src_field_ptr;
+					Bitmapset **bs_dst = (Bitmapset **) dst_field_ptr;
+					size_t bs_size = BITMAPSET_SIZE(bs_src->nwords);
+
+					*bs_dst = (Bitmapset *) nodecopy_alloc0(context, bs_size, MAXIMUM_ALIGNOF);
+					memcpy(*bs_dst, bs_src, bs_size);
+				}
+				break;
+
+			default:
+				if (field_info->flags & (TYPE_COPY_FORCE_SCALAR ||
+										 TYPE_CAT_SCALAR))
+				{
+					Assert(field_info->size != TYPE_SIZE_UNKNOWN);
+					memcpy(dst_field_ptr, src_field_ptr, field_info->size);
+				}
+				else
+					elog(ERROR, "don't know how to copy field %s %s->%s",
+						 ti_strings[field_info->type].string,
+						 ti_strings[type_info->name].string,
+						 ti_strings[field_info->name].string);
+
+				break;
+		}
+	}
+}
+
+static List*
+nodecopy_list(CopyNodeContext *context, const List *src, NodeTag tag)
+{
+	List	   *dst;
+
+	/*
+	 * XXX: this is copying implementation details from new_list. But
+	 * otherwise it's hard to pass details through copy_list[_deep], and to
+	 * allocate the list itself as part of a larger allocation.
+	 */
+	dst = (List *) nodecopy_alloc0(context,
+								   offsetof(List, initial_elements) +
+								   src->length * sizeof(ListCell),
+								   MAXIMUM_ALIGNOF);
+	dst->type = tag;
+	dst->length = src->length;
+	dst->max_length = src->length;
+	dst->elements = dst->initial_elements;
+
+	switch (tag)
+	{
+		case T_List:
+			for (int i = 0; i < src->length; i++)
+				lfirst(&dst->elements[i]) =
+					nodecopy_new_rec(context, lfirst(&src->elements[i]));
+			break;
+
+		case T_OidList:
+		case T_IntList:
+			memcpy(dst->elements, src->elements,
+				   dst->length * sizeof(ListCell));
+			break;
+
+		default:
+			pg_unreachable();
+			return NULL;
+	}
+
+	return dst;
+}
+
+static void
+nodecopy_value_union(CopyNodeContext *context, Value *dst, const Value *src)
+{
+	Assert(nodeTag(src) == nodeTag(dst));
+
+	switch (nodeTag(src))
+	{
+		case T_Integer:
+			dst->val.ival = src->val.ival;
+			break;
+
+		case T_Float:
+		case T_String:
+		case T_BitString:
+			if (src->val.str == NULL)
+				dst->val.str = NULL;
+			else
+			{
+				size_t len = strlen(src->val.str) + 1;
+
+				dst->val.str = nodecopy_alloc0(context, len, 1);
+				memcpy(dst->val.str, src->val.str, len);
+			}
+
+			break;
+
+		case T_Null:
+			break;
+
+		default:
+			pg_unreachable();
+			break;
+	}
+}
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 0869d0dd27..944ad85862 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -52,7 +52,7 @@
 /* Compare a field that is a pointer to some kind of Node or Node tree */
 #define COMPARE_NODE_FIELD(fldname) \
 	do { \
-		if (!equal(a->fldname, b->fldname)) \
+		if (!nodes_equal_old(a->fldname, b->fldname)) \
 			return false; \
 	} while (0)
 
@@ -2336,7 +2336,7 @@ _equalParamRef(const ParamRef *a, const ParamRef *b)
 static bool
 _equalAConst(const A_Const *a, const A_Const *b)
 {
-	if (!equal(&a->val, &b->val))	/* hack for in-line Value field */
+	if (!nodes_equal_old(&a->val, &b->val))	/* hack for in-line Value field */
 		return false;
 	COMPARE_LOCATION_FIELD(location);
 
@@ -2930,7 +2930,7 @@ _equalList(const List *a, const List *b)
 		case T_List:
 			forboth(item_a, a, item_b, b)
 			{
-				if (!equal(lfirst(item_a), lfirst(item_b)))
+				if (!nodes_equal_old(lfirst(item_a), lfirst(item_b)))
 					return false;
 			}
 			break;
@@ -2993,12 +2993,8 @@ _equalValue(const Value *a, const Value *b)
 	return true;
 }
 
-/*
- * equal
- *	  returns whether two nodes are equal
- */
 bool
-equal(const void *a, const void *b)
+nodes_equal_old(const void *a, const void *b)
 {
 	bool		retval;
 
diff --git a/src/backend/nodes/equalfuncs_new.c b/src/backend/nodes/equalfuncs_new.c
new file mode 100644
index 0000000000..c705721400
--- /dev/null
+++ b/src/backend/nodes/equalfuncs_new.c
@@ -0,0 +1,409 @@
+#include "postgres.h"
+
+#include "miscadmin.h"
+#include "nodes/nodes.h"
+#include "nodes/nodeinfo.h"
+#include "nodes/pg_list.h"
+#include "nodes/primnodes.h"
+#include "nodes/value.h"
+#include "utils/datum.h"
+
+
+static bool nodes_equal_new_rec(const Node *a, const Node *b);
+static bool nodes_equal_new_rec_real(const Node *a, const Node *b);
+static bool nodes_equal_list(const List *a, const List *b, NodeTag tag);
+static bool nodes_equal_value_union(const Value *a, const Value *b, NodeTag tag);
+static bool nodes_equal_fields(const Node *a, const Node *b, const TINodeType *type_info);
+
+
+/*
+ * equal
+ *	  returns whether two nodes are equal
+ */
+bool
+equal(const void *a, const void *b)
+{
+#ifdef USE_NEW_NODE_FUNCS
+	return nodes_equal_new(a, b);
+#else
+	return nodes_equal_old(a, b);
+#endif
+}
+
+bool
+nodes_equal_new(const void *a, const void *b)
+{
+	bool retval;
+
+	retval = nodes_equal_new_rec(a, b);
+#ifdef CHEAPER_PER_NODE_COMPARE_ASSERT
+	Assert(retval == nodes_equal_old(a, b));
+#endif
+
+	return retval;
+}
+
+/*
+ * Recurse into comparing the two nodes.
+ */
+static bool
+nodes_equal_new_rec(const Node *a, const Node *b)
+{
+	/*
+	 * During development it can be helpful to compare old/new equal
+	 * comparisons on a per-field basis, making it easier to pinpoint the node
+	 * with differing behaviour - but it's quite expensive (because we'll
+	 * compare nodes over and over while recursing down).
+	 */
+#ifdef EXPENSIVE_PER_NODE_COMPARE_ASSERT
+	bool newretval;
+	bool oldretval;
+
+	newretval = nodes_equal_new_rec_real(a, b);
+	oldretval = nodes_equal_old(a, b);
+
+	Assert(newretval == oldretval);
+
+	return newretval;
+#else
+	return nodes_equal_new_rec_real(a, b);
+#endif
+}
+
+/* temporary helper for nodes_equal_new_rec */
+static bool
+nodes_equal_new_rec_real(const Node *a, const Node *b)
+{
+	const TINodeType *type_info;
+	NodeTag tag;
+
+	if (a == b)
+		return true;
+
+	/* note that a!=b, so only one of them can be NULL */
+	if (a == NULL || b == NULL)
+		return false;
+
+	/* are they the same type of nodes? */
+	tag = nodeTag(a);
+	if (tag != nodeTag(b))
+		return false;
+
+	/* Guard against stack overflow due to overly complex expressions */
+	check_stack_depth();
+
+	/*
+	 * Compare types of node we cannot / do not want to handle using
+	 * elementwise comparisons.  Either because that'd not be correct
+	 * (e.g. because of an embedded tagged union), incomplete (e.g. because we
+	 * need to compare all elements of a list, which needs knowledge of two
+	 * struct members), or because it'd be less efficient.
+	 */
+	switch (tag)
+	{
+		case T_List:
+		case T_OidList:
+		case T_IntList:
+			return nodes_equal_list((const List *) a, (const List *) b, tag);
+
+		default:
+			break;
+	}
+
+	type_info = &ti_node_types[tag];
+
+	return nodes_equal_fields(a, b, type_info);
+}
+
+/*
+ * Compare the fields of a struct, using the provided TINodeType
+ * metadata.
+ *
+ * The compared nodes may be nodes may be separately allocated, or be embedded
+ * in a surrounding struct.
+ * * This function does *not* check for the nodes being of the same type, or
+ * having the same tag! If needed nodes_equal_new_rec() does so.
+
+ * NB: The struct may or may not have a nodeTag() for the type_info - e.g. for
+ * the struct elements of a "superclass" of a node (e.g. a Scan's .plan) it'll
+ * be subclasses tag.
+ */
+static bool
+nodes_equal_fields(const Node *a, const Node *b, const TINodeType *type_info)
+{
+	const TIStructField *field_info = &ti_struct_fields[type_info->first_field_at];
+
+	for (int i = 0; i < type_info->num_fields; i++, field_info++)
+	{
+		// FIXME: ExtensibleNode needs to call callbacks, or be reimplemented
+
+		const void *a_field_ptr;
+		const void *b_field_ptr;
+
+		if (field_info->flags & TYPE_EQUAL_IGNORE)
+			continue;
+
+		a_field_ptr = ((const char *) a + field_info->offset);
+		b_field_ptr = ((const char *) b + field_info->offset);
+
+		switch (field_info->known_type_id)
+		{
+			case KNOWN_TYPE_NODE:
+				{
+					const TINodeType *sub_type_info;
+					NodeTag sub_tag;
+
+					Assert(field_info->type_id != TYPE_ID_UNKNOWN);
+
+					/*
+					 * If at offset 0, this shares the NodeTag field with the
+					 * parent class. Therefore we have to rely on the declared
+					 * type.
+					 */
+					if (field_info->offset == 0)
+						sub_tag = field_info->type_id;
+					else
+					{
+						sub_tag = nodeTag(a_field_ptr);
+
+						Assert(ti_node_types[sub_tag].size ==
+							   ti_node_types[field_info->type_id].size);
+
+						if (sub_tag != nodeTag(b_field_ptr))
+							return false;
+					}
+
+					sub_type_info = &ti_node_types[sub_tag];
+
+					if (!nodes_equal_fields((const Node *) a_field_ptr,
+											(const Node *) b_field_ptr,
+											sub_type_info))
+						return false;
+
+					break;
+				}
+
+			case KNOWN_TYPE_DATUM:
+				{
+					/* currently only embedded in Const */
+					const Const *ca = castNode(Const, (Node *) a);
+					const Const *cb = castNode(Const, (Node *) b);
+
+					Assert(ca->consttype == cb->consttype &&
+						   ca->constlen == cb->constlen &&
+						   ca->constbyval == cb->constbyval &&
+						   ca->constisnull == cb->constisnull);
+
+					/*
+					 * We treat all NULL constants of the same type as
+					 * equal. Someday this might need to change?  But datumIsEqual
+					 * doesn't work on nulls, so...
+					 */
+					if (ca->constisnull && cb->constisnull)
+						continue;
+					else if (!datumIsEqual(ca->constvalue, cb->constvalue,
+										   ca->constbyval, ca->constlen))
+						return false;
+
+					break;
+				}
+
+			case KNOWN_TYPE_VALUE_UNION:
+				{
+					const Value *va = (const Value *) a;
+					const Value *vb = (const Value *) b;
+
+					Assert(IsAValue(va) && IsAValue(vb));
+
+					if (!nodes_equal_value_union(va, vb, nodeTag(a)))
+						return false;
+
+					break;
+				}
+
+			case KNOWN_TYPE_OPFUNCID:
+				{
+					const Oid oa = *(const Oid *) a_field_ptr;
+					const Oid ob = *(const Oid *) b_field_ptr;
+
+					/*
+					 * Special-case opfuncid: it is allowable for it to differ if one node
+					 * contains zero and the other doesn't.  This just means that the one node
+					 * isn't as far along in the parse/plan pipeline and hasn't had the
+					 * opfuncid cache filled yet.
+					 */
+					if (oa != ob && oa != 0 && ob != 0)
+						return false;
+
+					break;
+
+				}
+
+			case KNOWN_TYPE_P_PGARR:
+				Assert(field_info->elem_size != TYPE_SIZE_UNKNOWN);
+
+				/* identical pointers (which may be NULL) are definitely equal */
+				if (*(const void **) a_field_ptr != *(const void **) b_field_ptr)
+				{
+					/*
+					 * Compare without checking for NULLness, empty array can be
+					 * represented with a NULL pointer, or with an array with zero
+					 * elements.
+					 */
+					const PgArrBase *arr_a = *(const PgArrBase **) a_field_ptr;
+					const PgArrBase *arr_b = *(const PgArrBase **) b_field_ptr;
+
+					if (pgarr_size(arr_a) != pgarr_size(arr_b))
+						return false;
+
+					if (!pgarr_empty(arr_a))
+					{
+						/*
+						 * XXX: Should we care about the potential effect of padding
+						 * here? Currently we're only using this for simple scalar
+						 * types, but ...
+						 */
+						if (memcmp(arr_a->elementsp, arr_a->elementsp,
+								   arr_a->size * field_info->elem_size) != 0)
+							return false;
+					}
+
+				}
+				break;
+
+			case KNOWN_TYPE_P_BITMAPSET:
+				/* identical pointers (which may be NULL) are definitely equal */
+				if (*(const void **) a_field_ptr != *(const void **) b_field_ptr)
+				{
+					const Bitmapset *bs_a = *(const Bitmapset **) a_field_ptr;
+					const Bitmapset *bs_b = *(const Bitmapset **) b_field_ptr;
+
+					if (!bms_equal(bs_a, bs_b))
+						return false;
+				}
+				break;
+
+			case KNOWN_TYPE_P_NODE:
+				/* identical pointers (which may be NULL) are definitely equal */
+				if (*(const void **) a_field_ptr == *(const void **) b_field_ptr)
+					break;
+				if (*(const void **) a_field_ptr == NULL ||
+					*(const void **) b_field_ptr == NULL)
+					return false;
+				else
+					if (!nodes_equal_new_rec(*(const Node **) a_field_ptr, *(const Node **) b_field_ptr))
+						return false;
+				break;
+
+			case KNOWN_TYPE_P_CHAR:
+				/* identical pointers (which may be NULL) are definitely equal */
+				if (*(const void **) a_field_ptr == *(const void **) b_field_ptr)
+					break;
+				if (*(const void **) a_field_ptr == NULL ||
+					*(const void **) b_field_ptr == NULL)
+					return false;
+				else
+					if (strcmp(*(const char **) a_field_ptr, *(const char **) b_field_ptr) != 0)
+						return false;
+				break;
+
+			default:
+				if (field_info->flags & (TYPE_COPY_FORCE_SCALAR ||
+										 TYPE_CAT_SCALAR))
+				{
+					if (memcmp(a_field_ptr, b_field_ptr, field_info->size) != 0)
+						return false;
+				}
+				else
+				{
+					elog(ERROR, "don't know how to copy field %s %s->%s",
+						 ti_strings[field_info->type].string,
+						 ti_strings[type_info->name].string,
+						 ti_strings[field_info->name].string);
+				}
+				break;
+		}
+	}
+
+	return true;
+}
+
+static bool
+nodes_equal_list(const List *a, const List *b, NodeTag tag)
+{
+	const ListCell *lc_a;
+	const ListCell *lc_b;
+
+	/* should have been verified by caller */
+	Assert(a != b && a != NULL);
+	Assert(nodeTag(a) == nodeTag(b));
+
+	if (a->length != b->length)
+		return false;
+
+	switch (tag)
+	{
+		case T_List:
+			forboth(lc_a, a, lc_b, b)
+			{
+				if (!nodes_equal_new_rec(lfirst(lc_a), lfirst(lc_b)))
+					return false;
+			}
+			break;
+
+		case T_OidList:
+			forboth(lc_a, a, lc_b, b)
+			{
+				if (lfirst_oid(lc_a) != lfirst_oid(lc_b))
+					return false;
+			}
+			break;
+
+		case T_IntList:
+			forboth(lc_a, a, lc_b, b)
+			{
+				if (lfirst_int(lc_a) != lfirst_int(lc_b))
+					return false;
+			}
+			break;
+
+		default:
+			pg_unreachable();
+			return false;
+	}
+
+	return true;
+}
+
+static bool
+nodes_equal_value_union(const Value *a, const Value *b, NodeTag tag)
+{
+	/* should have been verified by caller */
+	Assert(a != b && a != NULL);
+	Assert(nodeTag(a) == nodeTag(b));
+
+	switch (tag)
+	{
+		case T_Integer:
+			return a->val.ival == b->val.ival;
+
+		case T_Float:
+		case T_String:
+		case T_BitString:
+			if (a->val.str == b->val.str)
+				return true;
+			else if (a->val.str == NULL || b->val.str == NULL)
+				return false;
+			return strcmp(a->val.str, b->val.str) == 0;
+
+		case T_Null:
+			return true;
+
+		default:
+			pg_unreachable();
+			return false;
+	}
+
+	pg_unreachable();
+	return false;
+}
diff --git a/src/backend/nodes/gennodes.c b/src/backend/nodes/gennodes.c
new file mode 100644
index 0000000000..ac6d79ebf8
--- /dev/null
+++ b/src/backend/nodes/gennodes.c
@@ -0,0 +1,929 @@
+/*-------------------------------------------------------------------------
+ *
+ * gennodes.c
+ *	  metadata generation routines for node types
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/catalog/gennodes.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres_fe.h"
+
+#include <clang-c/Index.h>
+
+#include "lib/pgarr.h"
+#include "lib/stringinfo.h"
+#include "nodes/nodeinfo.h"
+
+
+#define TYPE_ID_UNKNOWN PG_UINT16_MAX
+
+typedef struct CollectInfo
+{
+	PGARR(charstar) strtab;
+	PGARR(charstar) interesting_node_typedefs;
+	PGARR(charstar) interesting_node_types;
+	PGARR(charstar) node_type_strings;
+	PGARR(charstar) struct_field_strings;
+
+	PGARR(charstar) interesting_enums;
+	PGARR(charstar) enum_strings;
+	PGARR(charstar) enum_field_strings;
+
+	CXType current_struct_type;
+	size_t off;
+} CollectInfo;
+
+/* for collecting information about a pgarr.h style array */
+typedef struct PgArrFieldsState
+{
+	uint32 off;
+	bool valid;
+	CXType tp;
+} PgArrFieldsState;
+
+
+/*
+ * FIXME: this is used for lookups in too many places - need something better
+ * than O(N).
+ */
+static int
+string_in_arr(PGARR(charstar) *arr, const char *match)
+{
+	for (int i = 0; i < pgarr_size(arr); i++)
+	{
+		const char *el = *pgarr_at(arr, i);
+
+		if (el == NULL && match != NULL)
+			continue;
+
+		if (strcmp(el, match) == 0)
+			return i;
+	}
+
+	return -1;
+}
+
+static uint32
+intern_string(CollectInfo *info, const char *str)
+{
+	uint32 id;
+
+	id = string_in_arr(&info->strtab, str);
+
+	if (id != -1)
+		return id;
+	else
+	{
+		pgarr_append(char *, &info->strtab, pstrdup(str));
+		return pgarr_size(&info->strtab) - 1;
+	}
+}
+
+static void
+flag_append(StringInfo str, char *appendflag)
+{
+	if (str->len > 0)
+		appendStringInfoString(str, " | ");
+	appendStringInfoString(str, appendflag);
+}
+
+static enum CXVisitorResult
+find_PgArrFields_vis(CXCursor cursor, CXClientData client_data)
+{
+	PgArrFieldsState *state = (PgArrFieldsState *) client_data;
+	const char *fieldname = clang_getCString(clang_getCursorSpelling(cursor));
+
+	if (state->off == 0)
+	{
+		if (strcmp(fieldname, "size") != 0)
+			return CXVisit_Break;
+	}
+	else if (state->off == 1)
+	{
+		if (strcmp(fieldname, "capacity") != 0)
+			return CXVisit_Break;
+	}
+	else if (state->off == 2)
+	{
+		CXType tp = clang_getCursorType(cursor);
+		if (strcmp(fieldname, "elementsp") != 0)
+			return CXVisit_Break;
+		if (tp.kind != CXType_Pointer)
+			return CXVisit_Break;
+		state->tp = clang_getPointeeType(tp);
+		state->valid = true;
+	}
+
+	state->off++;
+	return CXVisit_Continue;
+}
+
+static enum CXChildVisitResult
+find_EnumFields_vis(CXCursor cursor, CXCursor parent, CXClientData client_data)
+{
+	if (cursor.kind == CXCursor_EnumConstantDecl)
+	{
+		CollectInfo *collect_info = (CollectInfo *) client_data;
+		const char *fieldname = clang_getCString(clang_getCursorSpelling(cursor));
+		char *s;
+
+		s = psprintf("{.name = %u /* %s */, .value = (uint32) %s /* %u */}",
+					 intern_string(collect_info, fieldname), fieldname,
+					 fieldname,
+					 (uint32) clang_getEnumConstantDeclUnsignedValue(cursor));
+
+		pgarr_append(char *, &collect_info->enum_field_strings, s);
+	}
+	return CXChildVisit_Continue;
+}
+
+static uint16
+get_enum(CollectInfo *collect_info, CXType ctp)
+{
+	const char *ctp_name =
+		clang_getCString(clang_getTypeSpelling(ctp));
+	int enumid = string_in_arr(&collect_info->interesting_enums, ctp_name);
+
+	if (enumid == -1)
+	{
+		size_t fields_at_start = pgarr_size(&collect_info->enum_field_strings);
+		char *s;
+
+		clang_visitChildren(
+			clang_getTypeDeclaration(ctp),
+			find_EnumFields_vis,
+			collect_info);
+
+		s = psprintf("{.name = %u /* %s */, .first_field_at = %zd, .num_fields = %zd, .size = sizeof(%s)}",
+					 intern_string(collect_info, ctp_name), ctp_name,
+					 fields_at_start,
+					 pgarr_size(&collect_info->enum_field_strings) - fields_at_start,
+					 ctp_name);
+
+		pgarr_append(char *, &collect_info->enum_strings, s);
+		pgarr_append(char *, &collect_info->interesting_enums, strdup(ctp_name));
+
+		enumid = pgarr_size(&collect_info->interesting_enums) - 1;
+
+	}
+
+	return (uint16) enumid;
+}
+
+#define tpref(intype, name) \
+	(intype.kind == CXType_Pointer ? CppAsString2(CppConcat(KNOWN_TYPE_P_, name)) : CppAsString2(CppConcat(KNOWN_TYPE_, name)))
+
+static void
+categorize_type(CollectInfo *collect_info, CXType intype,
+				StringInfo flags,
+				uint16 *type_id,
+				char **known_type_id,
+				char **elem_known_type_id,
+				char **elem_size)
+{
+	CXType type;
+	CXType canon_type;
+	enum CXTypeKind type_kind;
+	enum CXTypeKind canon_type_kind;
+	const char *type_name;
+	const char *canon_type_name;
+
+	if (clang_getCanonicalType(intype).kind == CXType_Pointer)
+	{
+		intype = clang_getCanonicalType(intype);
+		type = clang_getPointeeType(intype);
+		flag_append(flags, "TYPE_CAT_SCALAR");
+	}
+	else
+	{
+		type = intype;
+		flag_append(flags, "TYPE_CAT_SCALAR");
+	}
+
+	canon_type = clang_getCanonicalType(type);
+	type_kind = type.kind;
+	canon_type_kind = canon_type.kind;
+	type_name = clang_getCString(clang_getTypeSpelling(type));
+	canon_type_name = clang_getCString(clang_getTypeSpelling(canon_type));
+
+	if (canon_type_kind == CXType_Enum)
+	{
+		*known_type_id = tpref(intype, ENUM);
+		*type_id = get_enum(collect_info, canon_type);
+	}
+	else
+	{
+		int tp = string_in_arr(&collect_info->interesting_node_types, canon_type_name);
+
+		if (tp != -1)
+		{
+			*type_id = tp;
+			*known_type_id = tpref(intype, NODE);
+		}
+	}
+
+	if (type_kind == CXType_Typedef && canon_type_kind == CXType_UInt &&
+		strcmp(type_name, "Oid") == 0)
+		*known_type_id = tpref(intype, OID);
+	else if (type_kind == CXType_Typedef && canon_type_kind == CXType_Int &&
+			 strcmp(type_name, "Location") == 0)
+	{
+		*known_type_id = tpref(intype, LOCATION);
+		flag_append(flags, "TYPE_EQUAL_IGNORE");
+	}
+	else if (type_kind == CXType_Typedef && (
+				 canon_type_kind == CXType_Enum) &&
+			 strcmp(type_name, "CoercionForm") == 0)
+	{
+		*known_type_id = tpref(intype, COERCIONFORM);
+		flag_append(flags, "TYPE_EQUAL_IGNORE");
+	}
+	else if (type_kind == CXType_Typedef && (
+				 canon_type_kind == CXType_Enum) &&
+			 strcmp(type_name, "NodeTag") == 0)
+	{
+		*known_type_id = tpref(intype, NODE_TAG);
+	}
+	else if (type_kind == CXType_Typedef && (
+				 canon_type_kind == CXType_UInt ||
+				 canon_type_kind == CXType_ULong ||
+				 canon_type_kind == CXType_ULongLong) &&
+			 strcmp(type_name, "Datum") == 0)
+		*known_type_id = tpref(intype, DATUM);
+	else if (canon_type_kind == CXType_Char_S ||
+			 canon_type_kind == CXType_SChar ||
+			 canon_type_kind == CXType_Char_U ||
+			 canon_type_kind == CXType_UChar)
+		*known_type_id = tpref(intype, CHAR);
+	else if (canon_type_kind == CXType_UShort||
+			 canon_type_kind == CXType_UInt ||
+			 canon_type_kind ==  CXType_ULong ||
+			 canon_type_kind ==  CXType_ULongLong ||
+			 canon_type_kind == CXType_UInt128)
+	{
+		if (canon_type_kind == CXType_UShort)
+			*known_type_id = tpref(intype, UINT16);
+		else if (canon_type_kind == CXType_UInt)
+			*known_type_id = tpref(intype, UINT32);
+		else if (canon_type_kind == CXType_ULong || canon_type_kind == CXType_ULongLong)
+		{
+			if (intype.kind != CXType_Pointer)
+				*known_type_id = psprintf("(sizeof(%s) == 8 ? KNOWN_TYPE_UINT64 : KNOWN_TYPE_UINT32)", canon_type_name);
+			else
+				*known_type_id = psprintf("(sizeof(%s) == 8 ? KNOWN_TYPE_P_UINT64 : KNOWN_TYPE_P_UINT32)", canon_type_name);
+		}
+		else if (canon_type_kind == CXType_UInt128)
+			*known_type_id = tpref(intype, UINT128);
+	}
+	else if (canon_type_kind == CXType_Short || canon_type_kind == CXType_Int ||
+			 canon_type_kind ==  CXType_Long || canon_type_kind ==  CXType_LongLong ||
+			 canon_type_kind == CXType_Int128)
+	{
+		if (canon_type_kind == CXType_Short)
+			*known_type_id = tpref(intype, INT16);
+		else if (canon_type_kind == CXType_Int)
+			*known_type_id = tpref(intype, INT32);
+		else if (canon_type_kind == CXType_Long || canon_type_kind == CXType_LongLong)
+		{
+			if (intype.kind != CXType_Pointer)
+				*known_type_id = psprintf("(sizeof(%s) == 8 ? KNOWN_TYPE_INT64 : KNOWN_TYPE_INT32)", canon_type_name);
+			else
+				*known_type_id = psprintf("(sizeof(%s) == 8 ? KNOWN_TYPE_P_INT64 : KNOWN_TYPE_P_INT32)", canon_type_name);
+		}
+		else if (canon_type_kind == CXType_Int128)
+			*known_type_id = tpref(intype, INT128);
+	}
+	else if (canon_type_kind == CXType_Float)
+		*known_type_id = tpref(intype, FLOAT32);
+	else if (canon_type_kind == CXType_Double)
+		*known_type_id = tpref(intype, FLOAT64);
+	else if (canon_type_kind == CXType_Bool)
+		*known_type_id = tpref(intype, BOOL);
+	else if (strcmp(canon_type_name, "struct Bitmapset") == 0)
+		*known_type_id = tpref(intype, BITMAPSET); /* error if not pointer */
+	else if (strcmp(canon_type_name, "struct Node") == 0)
+	{
+		/*
+		 * Node* currently isn't actually recognized as a node type, therefore
+		 * it is not recognized as such - but we do use it to point to a
+		 * generic node.
+		 */
+
+		if (intype.kind != CXType_Pointer)
+		{
+			fprintf(stderr, "struct Node cannot be embedded\n");
+			exit(EXIT_FAILURE);
+		}
+		else
+		{
+			Assert(*type_id == TYPE_ID_UNKNOWN);
+			*type_id = TYPE_ID_UNKNOWN;
+			*known_type_id = "KNOWN_TYPE_P_NODE";
+		}
+	}
+	else if (strncmp(canon_type_name, "struct ArrayOf", sizeof("struct ArrayOf") - 1) == 0)
+	{
+		PgArrFieldsState state = {0};
+
+		if (elem_size == NULL)
+		{
+			fprintf(stderr, "recursive arrays are not supported\n");
+			exit(EXIT_FAILURE);
+		}
+
+		clang_Type_visitFields(
+			canon_type,
+			find_PgArrFields_vis,
+			&state);
+
+#if 0
+		fprintf(stderr, "pgarr: %s: %s: contains %s: %u %s\n",
+				clang_getCString(clang_getTypeKindSpelling(canon_type_kind)),
+				canon_type_name,
+				type_name + (sizeof("struct ArrayOf") - 1),
+				state.valid,
+				clang_getCString(clang_getTypeSpelling(state.tp))
+			);
+#endif
+
+		categorize_type(collect_info, state.tp,
+						flags, type_id, elem_known_type_id, NULL, NULL);
+
+		*known_type_id = tpref(intype, PGARR);
+		if (clang_Type_getSizeOf(state.tp) >= 0)
+			*elem_size = psprintf("sizeof(%s)", clang_getCString(clang_getTypeSpelling(state.tp)));
+	}
+	else if (canon_type_kind == CXType_Record && strcmp(canon_type_name, "union ValUnion") == 0)
+	{
+		*known_type_id = tpref(intype, VALUE_UNION);
+	}
+}
+
+/* visit elements of the NodeTag enum, to collect the names of all node types */
+static enum CXChildVisitResult
+find_NodeTagElems_vis(CXCursor cursor, CXCursor parent, CXClientData client_data)
+{
+	if (clang_getCursorKind(cursor) == CXCursor_EnumConstantDecl)
+	{
+		CollectInfo *collect_info = (CollectInfo *) client_data;
+		const char *name = clang_getCString(clang_getCursorSpelling(cursor));
+
+		if (strncmp(name, "T_", 2) != 0)
+		{
+			fprintf(stderr, "unexpected name: %s\n", name);
+			exit(-1);
+		}
+		else
+		{
+			pgarr_append(char *, &collect_info->interesting_node_typedefs, strdup(name + 2));
+		}
+	}
+
+	return CXChildVisit_Recurse;
+}
+
+/* find the NodeTag enum, and collect elements using find_NodeTagElems_vis */
+static enum CXChildVisitResult
+find_NodeTag_vis(CXCursor cursor, CXCursor parent, CXClientData client_data)
+{
+	if (clang_getCursorKind(cursor) == CXCursor_EnumDecl)
+	{
+		const char *spelling = clang_getCString(clang_getCursorSpelling(cursor));
+
+		if (strcmp(spelling, "NodeTag") != 0)
+			return CXChildVisit_Recurse;
+
+		clang_visitChildren(
+			cursor,
+			find_NodeTagElems_vis,
+			client_data);
+
+		return CXChildVisit_Break;
+	}
+	return CXChildVisit_Recurse;
+}
+
+/* collect information about the elements of Node style struct members */
+static enum CXVisitorResult
+find_StructFields_vis(CXCursor cursor, CXClientData client_data)
+{
+	CollectInfo *collect_info = (CollectInfo *) client_data;
+	const char *structname = clang_getCString(clang_getTypeSpelling(collect_info->current_struct_type));
+	const char *fieldname = clang_getCString(clang_getCursorSpelling(cursor));
+	CXType fieldtype = clang_getCanonicalType(clang_getCursorType(cursor));
+	const char *fieldtypename =
+		clang_getCString(clang_getTypeSpelling(fieldtype));
+	uint16 type_id = TYPE_ID_UNKNOWN;
+	char *known_type_id = "KNOWN_TYPE_UNKNOWN";
+	char *elem_known_type_id = "KNOWN_TYPE_UNKNOWN";
+	char *s;
+	StringInfoData flags;
+	char *elem_size = "TYPE_SIZE_UNKNOWN";
+	char *field_size;
+	char *type_id_s;
+
+	initStringInfo(&flags);
+
+	categorize_type(collect_info, clang_getCursorType(cursor), &flags, &type_id, &known_type_id, &elem_known_type_id, &elem_size);
+
+	/* can't measure size for incomplete types (e.g. variable length arrays at the end of a struct) */
+	if (clang_Type_getSizeOf(fieldtype) < 0)
+	{
+		flag_append(&flags, "TYPE_CAT_INCOMPLETE");
+
+		field_size = "TYPE_SIZE_UNKNOWN";
+	}
+	else
+	{
+		field_size = psprintf("sizeof(%s)", fieldtypename);
+	}
+
+
+	/* XXX: these probably ought to be moved into a different function */
+
+	if (strcmp(known_type_id, "KNOWN_TYPE_NODE_TAG") == 0 && collect_info->off == 0)
+	{
+		/* no need to output the type itself, included otherwise in output */
+		flag_append(&flags, "TYPE_OUT_IGNORE");
+	}
+	else if (strcmp(structname, "struct PlaceHolderVar") == 0)
+	{
+		if (strcmp(fieldname, "phrels") == 0 ||
+			strcmp(fieldname, "phexpr") == 0)
+		{
+			/*
+			 * We intentionally do not compare phexpr.  Two PlaceHolderVars
+			 * with the same ID and levelsup should be considered equal even
+			 * if the contained expressions have managed to mutate to
+			 * different states.  This will happen during final plan
+			 * construction when there are nested PHVs, since the inner PHV
+			 * will get replaced by a Param in some copies of the outer PHV.
+			 * Another way in which it can happen is that initplan sublinks
+			 * could get replaced by differently-numbered Params when sublink
+			 * folding is done.  (The end result of such a situation would be
+			 * some unreferenced initplans, which is annoying but not really a
+			 * problem.) On the same reasoning, there is no need to examine
+			 * phrels.
+			 */
+			flag_append(&flags, "TYPE_EQUAL_IGNORE");
+		}
+	}
+	else if (strcmp(structname, "struct Query") == 0)
+	{
+		if (strcmp(fieldname, "queryId") == 0)
+		{
+			/* we intentionally ignore queryId, since it might not be set */
+			flag_append(&flags, "TYPE_EQUAL_IGNORE");
+		}
+	}
+	else if (strcmp(structname, "struct Aggref") == 0)
+	{
+		if (strcmp(fieldname, "aggtranstype") == 0)
+		{
+			/* ignore aggtranstype since it might not be set yet */
+			flag_append(&flags, "TYPE_EQUAL_IGNORE");
+		}
+	}
+	else if (strcmp(structname, "struct GroupingFunc") == 0)
+	{
+		if (strcmp(fieldname, "refs") == 0 ||
+			strcmp(fieldname, "cols") == 0)
+		{
+			/* We must not compare the refs or cols field */
+			flag_append(&flags, "TYPE_EQUAL_IGNORE");
+		}
+	}
+	else if (strcmp(structname, "struct RestrictInfo") == 0)
+
+	{
+		if (strcmp(fieldname, "type") != 0 &&
+			strcmp(fieldname, "clause") != 0 &&
+			strcmp(fieldname, "is_pushed_down") != 0 &&
+			strcmp(fieldname, "outerjoin_delayed") != 0 &&
+			strcmp(fieldname, "security_level") != 0 &&
+			strcmp(fieldname, "required_relids") != 0 &&
+			strcmp(fieldname, "outer_relids") != 0 &&
+			strcmp(fieldname, "nullable_relids") != 0)
+		{
+			/*
+			 * We ignore all the other fields, since they may not be set yet, and
+			 * should be derivable from the clause anyway.
+			 */
+			flag_append(&flags, "TYPE_EQUAL_IGNORE");
+		}
+
+		if (strcmp(fieldname, "parent_ec") == 0 ||
+			strcmp(fieldname, "left_ec") == 0 ||
+			strcmp(fieldname, "right_ec") == 0 ||
+			strcmp(fieldname, "left_em") == 0 ||
+			strcmp(fieldname, "right_em") == 0)
+		{
+			/* EquivalenceClasses are never copied, so shallow-copy the pointers */
+			flag_append(&flags, "TYPE_COPY_FORCE_SCALAR");
+		}
+
+		if (strcmp(fieldname, "scansel_cache") == 0)
+		{
+			/* MergeScanSelCache isn't a Node, so hard to copy; just reset cache */
+			flag_append(&flags, "TYPE_COPY_IGNORE");
+		}
+	}
+	else if (strcmp(structname, "struct PathKey") == 0)
+	{
+		if (strcmp(fieldname, "pk_eclass") == 0)
+		{
+			/* We assume pointer equality is sufficient to compare the eclasses */
+			flag_append(&flags, "TYPE_EQUAL_FORCE_SCALAR");
+			flag_append(&flags, "TYPE_COPY_FORCE_SCALAR");
+		}
+	}
+	else if (strcmp(fieldname, "opfuncid") == 0)
+	{
+		known_type_id = "KNOWN_TYPE_OPFUNCID";
+	}
+
+	if (type_id == TYPE_ID_UNKNOWN)
+		type_id_s = "TYPE_ID_UNKNOWN";
+	else
+		type_id_s = psprintf("%u", type_id);
+
+	if (flags.len == 0)
+		appendStringInfoChar(&flags, '0');
+
+	s = psprintf("{.name = %u /* %s */, .type = %u /* %s */, .offset = offsetof(%s, %s), .size = %s, .flags = %s, .type_id = %s, .known_type_id = %s, .elem_known_type_id = %s, .elem_size = %s}",
+				 intern_string(collect_info, fieldname), fieldname,
+				 intern_string(collect_info, fieldtypename), fieldtypename,
+				 structname, /* offsetof */
+				 fieldname, /* offsetof */
+				 field_size,
+				 flags.data,
+				 type_id_s,
+				 known_type_id,
+				 elem_known_type_id,
+				 elem_size);
+
+	pgarr_append(char *, &collect_info->struct_field_strings, s);
+
+	collect_info->off++;
+
+	free(flags.data);
+
+	return CXVisit_Continue;
+}
+
+/*
+ * Collect the names of all the structs that "implement" node types (those
+ * names have previously been collected with find_NodeTag_vis). As we
+ * sometimes have forward declarations, we need to use a canonicalized name,
+ * as it's far easier to always use the underlying struct names, than somehow
+ * go the other way.
+ */
+static enum CXChildVisitResult
+find_NodeStructs_vis(CXCursor cursor, CXCursor parent, CXClientData client_data)
+{
+	/*
+	 * We'll reach each struct type twice - once for the typedef, and once for
+	 * the struct itself. We only check typedef, including its name, because
+	 * that's what needs to correspond to the NodeTag names.
+	 */
+	if (clang_getCursorKind(cursor) == CXCursor_TypedefDecl)
+	{
+		const char *spelling =
+			clang_getCString(clang_getTypeSpelling(clang_getCursorType(cursor)));
+		CollectInfo *collect_info = (CollectInfo *) client_data;
+		int type_pos = string_in_arr(&collect_info->interesting_node_typedefs, spelling);
+
+		if (type_pos == -1)
+			return CXChildVisit_Continue;
+
+		*pgarr_at(&collect_info->interesting_node_types, type_pos) = (char *)
+			clang_getCString(clang_getTypeSpelling(clang_getCanonicalType(clang_getCursorType(cursor))));
+
+		return CXChildVisit_Continue;
+	}
+	return CXChildVisit_Recurse;
+}
+
+/*
+ * Collect the definition of all node structs. This is done separately from
+ * collecting the struct names (in find_NodeStructs_vis), because we need to
+ * identify whether struct members are node types themselves, for which we
+ * need their canonical names.
+ */
+static enum CXChildVisitResult
+find_NodeStructDefs_vis(CXCursor cursor, CXCursor parent, CXClientData client_data)
+{
+	/*
+	 * We'll reach each struct type twice - once for the typedef, and once for
+	 * the struct. Only check one.  XXX: Perhaps it'd be better to check the
+	 * name of the typedef? That's what makeNode() etc effectively use?
+	 */
+	if (clang_getCursorKind(cursor) == CXCursor_TypedefDecl)
+	{
+		const char *spelling =
+			clang_getCString(clang_getTypeSpelling(clang_getCursorType(cursor)));
+		CollectInfo *collect_info = (CollectInfo *) client_data;
+		size_t fields_at_start;
+		int type_pos = string_in_arr(&collect_info->interesting_node_typedefs, spelling);
+		char *size;
+		char *s;
+
+		if (type_pos == -1)
+			return CXChildVisit_Continue;
+
+		collect_info->off = 0;
+		collect_info->current_struct_type = clang_getCanonicalType(clang_getCursorType(cursor));
+
+		fields_at_start = pgarr_size(&collect_info->struct_field_strings);
+
+		clang_Type_visitFields(
+			collect_info->current_struct_type,
+			find_StructFields_vis,
+			collect_info);
+
+		if (clang_Type_getSizeOf(collect_info->current_struct_type) == CXTypeLayoutError_Incomplete)
+			size = "TYPE_SIZE_UNKNOWN";
+		else
+			size = psprintf("sizeof(%s)", spelling);
+
+		s = psprintf("{.name = %u /* %s */, .first_field_at = %zd, .num_fields = %zd, .size = %s}",
+					 intern_string(collect_info, spelling), spelling,
+					 fields_at_start,
+					 pgarr_size(&collect_info->struct_field_strings) - fields_at_start,
+					 size);
+
+		*pgarr_at(&collect_info->node_type_strings, type_pos) = s;
+		return CXChildVisit_Continue;
+	}
+	return CXChildVisit_Recurse;
+}
+
+int main(int argc, char **argv)
+{
+	CXCursor cursor;
+	CollectInfo collect_info = {0};
+	CXIndex index;
+	enum CXErrorCode error;
+	CXTranslationUnit unit;
+	uint32 num_diagnostics;
+	const char *empty_filename = "empty_nodes.c";
+	struct CXUnsavedFile empty = {
+		.Filename = empty_filename};
+	PGARR(constcharstar) clang_args = {};
+	bool first;
+	StringInfoData file_contents;
+	char *output_fname = NULL;
+	bool parsing_self = true;
+	FILE *output;
+
+	initStringInfo(&file_contents);
+
+	appendStringInfoString(&file_contents, "#include \"postgres.h\"\n\n");
+
+	/* to make space for path to llvm-config */
+	pgarr_append(const char *, &clang_args, NULL);
+
+	/* FIXME: proper argument parsing / passing */
+	for (int argno = 1; argno < argc; argno++)
+	{
+		const char *arg = argv[argno];
+
+		/*
+		 * Until "--" arguments are for this program, after that they're
+		 * passed to clang.
+		 */
+		if (parsing_self)
+		{
+			if (strcmp(arg, "--llvm-config") == 0)
+			{
+				argno++;
+				if (argno < argc)
+				{
+					arg = argv[argno];
+					*pgarr_at(&clang_args, 0) = arg;
+				}
+			}
+			else if (strcmp(arg, "--output") == 0)
+			{
+				argno++;
+				if (argno < argc)
+					output_fname = argv[argno];
+			}
+			else if (strcmp(arg, "--") == 0)
+				parsing_self = false;
+			else
+			{
+				appendStringInfo(&file_contents,
+								 "#include \"%s\"\n",
+								 arg);
+			}
+		}
+		else
+			pgarr_append(const char *, &clang_args, arg);
+	}
+
+	if (*pgarr_at(&clang_args, 0) == NULL)
+	{
+		fprintf(stderr, "require path to llvm\n");
+		exit(EXIT_FAILURE);
+	}
+	else if (output_fname == NULL)
+	{
+		fprintf(stderr, "require output_file\n");
+		exit(EXIT_FAILURE);
+	}
+
+	output = fopen(output_fname, PG_BINARY_W);
+
+	empty.Contents = file_contents.data;
+	empty.Length = file_contents.len;
+
+	index = clang_createIndex(
+		/* excludeDeclarationsFromPCH */ 0,
+		/* displayDiagnostics */ 0);
+
+	error = clang_parseTranslationUnit2FullArgv(
+		index,
+		/* source_filename */ empty_filename,
+		/* commandline_args */ pgarr_data(&clang_args),
+		/* num_commandline_args */ pgarr_size(&clang_args),
+		/* unsaved_files */ &empty,
+		/* num_unsaved_files */ 1,
+		CXTranslationUnit_SkipFunctionBodies,
+		&unit);
+
+	/* normally parsing succeeds, except if there's some internal errors */
+	if (error != CXError_Success)
+	{
+		fprintf(stderr, "failure while trying to parse %d\n", error);
+		exit(EXIT_FAILURE);
+	}
+
+	/* display diagnostics, and fail if there are any warnings */
+	if ((num_diagnostics = clang_getNumDiagnostics(unit)) != 0)
+	{
+		uint32 diag_display_opt = clang_defaultDiagnosticDisplayOptions();
+		bool has_error = false;
+
+		for (uint32 i = 0; i < num_diagnostics; i++)
+		{
+			CXDiagnostic diag = clang_getDiagnostic(unit, i);
+			CXString lstr;
+			const char *str;
+
+			/* fail if there's even a warning */
+			if (clang_getDiagnosticSeverity(diag) >= CXDiagnostic_Note)
+				has_error = true;
+
+			lstr = clang_formatDiagnostic(diag, diag_display_opt);
+
+			str = clang_getCString(lstr);
+			fprintf(stderr, "%s\n", str);
+
+			clang_disposeString(lstr);
+			clang_disposeDiagnostic(diag);
+		}
+
+		if (has_error)
+		{
+			fprintf(stderr, "Unable to parse translation unit\n");
+			exit(EXIT_FAILURE);
+		}
+	}
+
+
+	/*
+	 * Ok, finally ready to analyze.
+	 */
+	cursor = clang_getTranslationUnitCursor(unit);
+
+	/*
+	 * First collect elements of NodeTag, to determine for which struct types
+	 * to collect information about.
+	 */
+	clang_visitChildren(
+		cursor,
+		find_NodeTag_vis,
+		&collect_info);
+
+	/*
+	 * Find the underlying types for the NodeTag elements where
+	 * possible.
+	 *
+	 * There's a few node types where that's not possible, e.g. because
+	 * they're defined a .c file.
+	 */
+	pgarr_set_all(&collect_info.interesting_node_types,
+				  pgarr_size(&collect_info.interesting_node_typedefs),
+				  0);
+	clang_visitChildren(
+		cursor,
+		find_NodeStructs_vis,
+		&collect_info);
+
+	/* then traverse again, to find the structs definitions for the types above */
+	pgarr_set_all(&collect_info.node_type_strings,
+				  pgarr_size(&collect_info.interesting_node_typedefs),
+				  0);
+	clang_visitChildren(
+		cursor,
+		find_NodeStructDefs_vis,
+		&collect_info);
+
+	/*
+	 * Collected all the necessary information, print it out to the output
+	 * file.
+	 */
+	appendStringInfoString(&file_contents, "\n#include \"nodes/nodeinfo.h\"\n\n");
+	fwrite(file_contents.data, file_contents.len, 1, output);
+
+	first = true;
+	fprintf(output, "const TINodeType ti_node_types[]  = {\n");
+	for (size_t i = 0; i < pgarr_size(&collect_info.node_type_strings); i++)
+	{
+		const char *s = *pgarr_at(&collect_info.node_type_strings, i);
+
+		if (!first)
+			fprintf(output, ",\n");
+		else
+			first = false;
+
+		if (s)
+			fprintf(output, "\t%s", s);
+		else
+			fprintf(output, "\t{0}");
+	}
+	fprintf(output, "\n};\n\n");
+
+	first = true;
+	fprintf(output, "const TIStructField ti_struct_fields[] = {\n");
+	for (size_t i = 0; i < pgarr_size(&collect_info.struct_field_strings); i++)
+	{
+		const char *s = *pgarr_at(&collect_info.struct_field_strings, i);
+
+		if (!first)
+			fprintf(output, ",\n");
+		else
+			first = false;
+
+		fprintf(output, "\t%s", s);
+	}
+	fprintf(output, "\n};\n\n");
+
+	first = true;
+	fprintf(output, "const TIEnum ti_enums[] = {\n");
+	for (size_t i = 0; i < pgarr_size(&collect_info.enum_strings); i++)
+	{
+		const char *s = *pgarr_at(&collect_info.enum_strings, i);
+
+		if (!first)
+			fprintf(output, ",\n");
+		else
+			first = false;
+
+		fprintf(output, "\t%s", s);
+	}
+	fprintf(output, "\n};\n\n");
+
+	first = true;
+	fprintf(output, "const TIEnumField ti_enum_fields[] = {\n");
+	for (size_t i = 0; i < pgarr_size(&collect_info.enum_field_strings); i++)
+	{
+		const char *s = *pgarr_at(&collect_info.enum_field_strings, i);
+
+		if (!first)
+			fprintf(output, ",\n");
+		else
+			first = false;
+
+		fprintf(output, "\t%s", s);
+	}
+	fprintf(output, "\n};\n\n");
+
+	first = true;
+	fprintf(output, "const TIString ti_strings[] = {\n");
+	for (size_t i = 0; i < pgarr_size(&collect_info.strtab); i++)
+	{
+		const char *s = *pgarr_at(&collect_info.strtab, i);
+
+		if (!first)
+			fprintf(output, ",\n");
+		else
+			first = false;
+
+		fprintf(output, "\t{.length = sizeof(\"%s\") - 1, .string = \"%s\"}", s, s);
+	}
+	fprintf(output, "\n};\n");
+
+	clang_disposeTranslationUnit(unit);
+	clang_disposeIndex(index);
+
+	exit(EXIT_SUCCESS);
+}
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 08f3491cba..a51ee0b47d 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -4347,7 +4347,7 @@ outNode(StringInfo str, const void *obj)
  *	   returns the ascii representation of the Node as a palloc'd string
  */
 char *
-nodeToString(const void *obj)
+nodeToStringOld(const void *obj)
 {
 	StringInfoData str;
 
diff --git a/src/backend/nodes/outfuncs_new.c b/src/backend/nodes/outfuncs_new.c
new file mode 100644
index 0000000000..5d3541ba31
--- /dev/null
+++ b/src/backend/nodes/outfuncs_new.c
@@ -0,0 +1,513 @@
+#include "postgres.h"
+
+#include "common/shortest_dec.h"
+#include "lib/stringinfo.h"
+#include "miscadmin.h"
+#include "nodes/nodes.h"
+#include "nodes/nodeinfo.h"
+#include "nodes/pg_list.h"
+#include "nodes/primnodes.h"
+#include "nodes/value.h"
+#include "utils/datum.h"
+
+
+typedef struct NodeOutContext
+{
+	StringInfoData str;
+} NodeOutContext;
+
+
+static void nodeout_new_rec(NodeOutContext *context, const Node *obj);
+static void nodeout_fields(NodeOutContext *context, const Node *src, const TINodeType *type_info);
+static void nodeout_list(NodeOutContext *context, const List *obj, NodeTag tag);
+static void nodeout_field(NodeOutContext *context, const Node *obj,
+						  const TINodeType *type_info, const TIStructField *field_info,
+						  uint16 known_type_id, uint16 size, const void *ptr_src);
+static void nodeout_value_union(NodeOutContext *context, const Value *src, NodeTag tag);
+static void nodeout_bitmapset(NodeOutContext *context, const Bitmapset *bms);
+static void nodeout_token(NodeOutContext *context, const char *s);
+
+
+/*
+ * nodeToString -
+ *	   returns the ascii representation of the Node as a palloc'd string
+ */
+char *
+nodeToString(const void *obj)
+{
+#ifdef USE_NEW_NODE_FUNCS
+	return nodeToStringNew(obj);
+#else
+	return nodeToStringOld(obj);
+#endif
+}
+
+char *
+nodeToStringNew(const void *obj)
+{
+	NodeOutContext context = {0};
+
+	/* see stringinfo.h for an explanation of this maneuver */
+	initStringInfo(&context.str);
+
+	nodeout_new_rec(&context, obj);
+
+	return context.str.data;
+}
+
+static void
+nodeout_new_rec(NodeOutContext *context, const Node *obj)
+{
+	const TINodeType *type_info;
+	NodeTag tag;
+
+	if (obj == NULL)
+	{
+		appendStringInfoString(&context->str, "<>");
+		return;
+	}
+
+	tag = nodeTag(obj);
+
+	/* Guard against stack overflow due to overly complex expressions */
+	check_stack_depth();
+
+	switch (tag)
+	{
+		case T_List:
+		case T_OidList:
+		case T_IntList:
+			nodeout_list(context, (const List *) obj, tag);
+			return;
+
+		default:
+			break;
+	}
+
+	type_info = &ti_node_types[tag];
+
+	Assert(type_info->size > 0);
+
+	appendStringInfoChar(&context->str, '{');
+	appendBinaryStringInfo(&context->str,
+						   ti_strings[type_info->name].string,
+						   ti_strings[type_info->name].length);
+	appendStringInfoChar(&context->str, ' ');
+	appendStringInfoInt32(&context->str, (int) tag);
+
+	nodeout_fields(context, obj, type_info);
+
+	appendStringInfoChar(&context->str, '}');
+}
+
+static void
+nodeout_field(NodeOutContext *context, const Node *obj,
+			  const TINodeType *type_info, const TIStructField *field_info,
+			  uint16 known_type_id, uint16 size,
+			  const void *ptr_src)
+{
+	Assert(known_type_id != TYPE_ID_UNKNOWN);
+	Assert(size != TYPE_SIZE_UNKNOWN);
+
+	switch (known_type_id)
+	{
+		case KNOWN_TYPE_UINT16:
+			appendStringInfoUInt32(&context->str, *(const uint16 *) ptr_src);
+			break;
+		case KNOWN_TYPE_OPFUNCID:
+		case KNOWN_TYPE_OID:
+		case KNOWN_TYPE_UINT32:
+			appendStringInfoUInt32(&context->str, *(const uint32 *) ptr_src);
+			break;
+		case KNOWN_TYPE_UINT64:
+			appendStringInfoUInt64(&context->str, *(const uint64 *) ptr_src);
+			break;
+
+		case KNOWN_TYPE_INT16:
+			appendStringInfoInt32(&context->str, *(const int16 *) ptr_src);
+			break;
+		case KNOWN_TYPE_LOCATION:
+		case KNOWN_TYPE_INT32:
+			appendStringInfoInt32(&context->str, *(const int32 *) ptr_src);
+			break;
+		case KNOWN_TYPE_INT64:
+			appendStringInfoInt64(&context->str, *(const int64 *) ptr_src);
+			break;
+
+		case KNOWN_TYPE_FLOAT32:
+			appendStringInfoFloat(&context->str, *(const float *) ptr_src);
+			break;
+		case KNOWN_TYPE_FLOAT64:
+			appendStringInfoDouble(&context->str, *(const double *) ptr_src);
+			break;
+
+		case KNOWN_TYPE_BOOL:
+			appendStringInfoString(&context->str, *(const bool *) ptr_src ? "true" : "false");
+			break;
+
+		case KNOWN_TYPE_CHAR:
+			{
+				char c = *(const char *) ptr_src;
+
+				if (c == 0)
+					appendStringInfoString(&context->str, "<>");
+				else if (!isalnum((unsigned char) c))
+				{
+					appendStringInfoChar(&context->str, '\\');
+					appendStringInfoChar(&context->str, c);
+				}
+				else
+					appendStringInfoChar(&context->str, c);
+				break;
+			}
+
+		case KNOWN_TYPE_ENUM:
+		case KNOWN_TYPE_COERCIONFORM:
+		case KNOWN_TYPE_NODE_TAG:
+			{
+				const TIEnum *enum_info = &ti_enums[field_info->type_id];
+				uint32 val = *(const uint32 *) ptr_src;
+				const TIString *sval = NULL;
+				int num_fields = enum_info->first_field_at + enum_info->num_fields;
+
+				Assert(field_info->size >= 0);
+
+				for (int i = enum_info->first_field_at; i < num_fields; i++)
+				{
+					const TIEnumField *enum_field_info = &ti_enum_fields[i];
+
+					if (enum_field_info->value == val)
+					{
+						sval = &ti_strings[enum_field_info->name];
+						break;
+					}
+				}
+
+				if (sval == NULL)
+					elog(ERROR, "unknown enum %s val %u",
+						 ti_strings[enum_info->name].string,
+						 val);
+
+				/* enum name won't need escaping */
+				appendBinaryStringInfo(&context->str,
+									   sval->string,
+									   sval->length);
+				break;
+			}
+
+		case KNOWN_TYPE_DATUM:
+			{
+				const Const *csrc = castNode(Const, (Node *) obj);
+
+				if (csrc->constisnull)
+					appendStringInfoString(&context->str, "<>");
+				else
+					outDatum(&context->str, csrc->constvalue, csrc->constlen, csrc->constbyval);
+
+				break;
+			}
+
+		case KNOWN_TYPE_VALUE_UNION:
+			{
+				const Value *vsrc = (const Value *) obj;
+
+				Assert(IsAValue(vsrc));
+
+				nodeout_value_union(context, vsrc, nodeTag(vsrc));
+				break;
+			}
+
+		case KNOWN_TYPE_NODE:
+			{
+				const TINodeType *sub_type_info;
+				NodeTag sub_tag;
+
+				Assert(field_info->type_id != TYPE_ID_UNKNOWN);
+
+				/*
+				 * If at offset 0, this shares the NodeTag field with the
+				 * parent class. Therefore we have to rely on the declared
+				 * type.
+				 */
+				if (field_info->offset == 0)
+					sub_tag = field_info->type_id;
+				else
+				{
+					sub_tag = nodeTag(ptr_src);
+					Assert(ti_node_types[sub_tag].size ==
+						   ti_node_types[field_info->type_id].size);
+				}
+
+				sub_type_info = &ti_node_types[sub_tag];
+
+				appendStringInfoChar(&context->str, '{');
+				appendBinaryStringInfo(&context->str,
+									   ti_strings[sub_type_info->name].string,
+									   ti_strings[sub_type_info->name].length);
+				appendStringInfoChar(&context->str, ' ');
+				appendStringInfoInt32(&context->str, (int) sub_tag);
+
+				nodeout_fields(context,
+							   (const Node *) ptr_src,
+							   sub_type_info);
+
+				appendStringInfoChar(&context->str, '}');
+
+				break;
+			}
+
+
+		case KNOWN_TYPE_P_CHAR:
+			if (*(const char **) ptr_src == NULL)
+				appendStringInfoString(&context->str, "<>");
+			else
+			{
+				const char* s_src = *(const char **) ptr_src;
+
+				/*
+				 * Need to quote to allow distinguishing a NULL string and a
+				 * zero length string (i.e. starting with '\0').  We use
+				 * nodeout_token() to provide escaping of the string's
+				 * content, but we don't want it to do anything with an empty
+				 * string, as it'd output <>.
+				 */
+				appendStringInfoChar(&context->str, '"');
+				if (s_src[0] != '\0')
+					nodeout_token(context, s_src);
+				appendStringInfoChar(&context->str, '"');
+			}
+			break;
+
+		case KNOWN_TYPE_P_PGARR:
+			if (*(const PgArrBase **) ptr_src == NULL)
+				appendStringInfoString(&context->str, "<>");
+			else
+			{
+				const PgArrBase *arr_src = *(const PgArrBase **) ptr_src;
+
+				Assert(field_info->elem_size > 0);
+
+				appendStringInfoUInt32(&context->str, pgarr_size(arr_src));
+				appendStringInfoChar(&context->str, ' ');
+				for (int i = 0; i < pgarr_size(arr_src); i++)
+				{
+					nodeout_field(context, NULL, type_info, field_info,
+								  field_info->elem_known_type_id, field_info->elem_size,
+								  ((char *) arr_src->elementsp) + field_info->elem_size * i);
+					appendStringInfoChar(&context->str, ' ');
+				}
+			}
+
+			break;
+
+		case KNOWN_TYPE_P_NODE:
+			if (*(const Node **) ptr_src == NULL)
+				appendStringInfoString(&context->str, "<>");
+			else
+				nodeout_new_rec(context, *(const Node **) ptr_src);
+			break;
+
+		case KNOWN_TYPE_P_BITMAPSET:
+			if (*(const Bitmapset **) ptr_src == NULL)
+				appendStringInfoString(&context->str, "<>");
+			else
+			{
+				const Bitmapset *bs_src = *(const Bitmapset **) ptr_src;
+
+				nodeout_bitmapset(context, bs_src);
+			}
+			break;
+
+		default:
+			elog(ERROR, "don't know how to copy field %s %s->%s",
+				 ti_strings[field_info->type].string,
+				 ti_strings[type_info->name].string,
+				 ti_strings[field_info->name].string);
+			break;
+	}
+}
+
+static void
+nodeout_fields(NodeOutContext *context, const Node *src, const TINodeType *type_info)
+{
+	const TIStructField *field_info = &ti_struct_fields[type_info->first_field_at];
+
+	for (int i = 0; i < type_info->num_fields; i++, field_info++)
+	{
+		// FIXME: ExtensibleNode needs to call callbacks, or be reimplemented
+
+		if (field_info->flags & TYPE_OUT_IGNORE)
+			continue;
+
+		appendStringInfoString(&context->str, " :");
+		appendBinaryStringInfo(&context->str,
+							   ti_strings[field_info->name].string,
+							   ti_strings[field_info->name].length);
+		appendStringInfoChar(&context->str, ' ');
+
+		nodeout_field(context, src, type_info, field_info,
+					  field_info->known_type_id, field_info->size,
+					  (char *) src + field_info->offset);
+	}
+}
+
+static void
+nodeout_list(NodeOutContext *context, const List *src, NodeTag tag)
+{
+	appendStringInfoChar(&context->str, '(');
+
+	/*
+	 * Note that we always output the separator, even in the first loop
+	 * iteration. The read routines rely on the output starting with "i ", "o
+	 * ", or " {node data}", which is achieved by always outputting space.
+	 */
+	switch (tag)
+	{
+		case T_List:
+			for (int i = 0; i < src->length; i++)
+			{
+				appendStringInfoChar(&context->str, ' ');
+
+				nodeout_new_rec(context, lfirst(&src->elements[i]));
+			}
+			break;
+
+		case T_OidList:
+			appendStringInfoChar(&context->str, 'o');
+			for (int i = 0; i < src->length; i++)
+			{
+				appendStringInfoChar(&context->str, ' ');
+
+				appendStringInfoUInt32(&context->str,
+									   lfirst_oid(&src->elements[i]));
+			}
+			break;
+
+		case T_IntList:
+			appendStringInfoChar(&context->str, 'i');
+			for (int i = 0; i < src->length; i++)
+			{
+				appendStringInfoChar(&context->str, ' ');
+
+				appendStringInfoUInt32(&context->str,
+									   lfirst_int(&src->elements[i]));
+			}
+			break;
+
+		default:
+			pg_unreachable();
+	}
+
+	appendStringInfoChar(&context->str, ')');
+}
+
+static void
+nodeout_value_union(NodeOutContext *context, const Value *src, NodeTag tag)
+{
+	switch (tag)
+	{
+		case T_Integer:
+			appendStringInfoInt32(&context->str, src->val.ival);
+			break;
+
+		case T_Float:
+			/*
+			 * We assume the value is a valid numeric literal and so does not
+			 * need quoting.
+			 */
+			appendStringInfoString(&context->str, src->val.str);
+			break;
+
+		case T_String:
+			/*
+			 * Need to quote to allow distinguishing a NULL string and a zero
+			 * length string (i.e. starting with '\0').  We use
+			 * nodeout_token() to provide escaping of the string's content,
+			 * but we don't want it to do anything with an empty string, as
+			 * it'd output <>.
+			 */
+			appendStringInfoChar(&context->str, '"');
+			if (src->val.str[0] != '\0')
+				nodeout_token(context, src->val.str);
+			appendStringInfoChar(&context->str, '"');
+			break;
+
+		case T_BitString:
+			/* internal representation already has leading 'b' */
+			appendStringInfoString(&context->str, src->val.str);
+			break;
+
+		case T_Null:
+			/* this is seen only within A_Const, not in transformed trees */
+			appendStringInfoString(&context->str, "<>");
+			break;
+
+		default:
+			Assert(false);
+			pg_unreachable();
+	}
+}
+
+/*
+ * nodeout_bitmapset -
+ *	   converts a bitmap set of integers
+ *
+ * Note: the output format is "(b int int ...)", similar to an integer List.
+ */
+static void
+nodeout_bitmapset(NodeOutContext *context, const Bitmapset *bms)
+{
+	int			x;
+
+	appendStringInfoChar(&context->str, '(');
+	appendStringInfoChar(&context->str, 'b');
+	x = -1;
+	while ((x = bms_next_member(bms, x)) >= 0)
+	{
+		appendStringInfoChar(&context->str, ' ');
+		appendStringInfoInt32(&context->str, x);
+	}
+	appendStringInfoChar(&context->str, ')');
+}
+
+
+/*
+ * nodeout_token
+ *	  Convert an ordinary string (eg, an identifier) into a form that
+ *	  will be decoded back to a plain token by read.c's functions.
+ *
+ *	  If a null or empty string is given, it is encoded as "<>".
+ */
+static void
+nodeout_token(NodeOutContext *context, const char *s)
+{
+	if (s == NULL || *s == '\0')
+	{
+		appendStringInfoString(&context->str, "<>");
+		return;
+	}
+
+	/*
+	 * Look for characters or patterns that are treated specially by read.c
+	 * (either in pg_strtok() or in nodeRead()), and therefore need a
+	 * protective backslash.
+	 */
+#ifdef NOT_ANYMORE
+	/* These characters only need to be quoted at the start of the string */
+	if (*s == '<' ||
+		*s == '"' ||
+		isdigit((unsigned char) *s) ||
+		((*s == '+' || *s == '-') &&
+		 (isdigit((unsigned char) s[1]) || s[1] == '.')))
+		appendStringInfoChar(&context->str, '\\');
+#endif
+	while (*s)
+	{
+		/* These chars must be backslashed anywhere in the string */
+		if (*s == ' ' || *s == '\n' || *s == '\t' ||
+			*s == '(' || *s == ')' || *s == '{' || *s == '}' ||
+			*s == '\\')
+			appendStringInfoChar(&context->str, '\\');
+		appendStringInfoChar(&context->str, *s++);
+	}
+}
diff --git a/src/backend/nodes/read.c b/src/backend/nodes/read.c
index fdf68fdcae..81013a9337 100644
--- a/src/backend/nodes/read.c
+++ b/src/backend/nodes/read.c
@@ -45,7 +45,7 @@ bool		restore_location_fields = false;
  * in builds with the WRITE_READ_PARSE_PLAN_TREES debugging flag set.
  */
 static void *
-stringToNodeInternal(const char *str, bool restore_loc_fields)
+stringToNodeInternalOld(const char *str, bool restore_loc_fields)
 {
 	void	   *retval;
 	const char *save_strtok;
@@ -86,17 +86,17 @@ stringToNodeInternal(const char *str, bool restore_loc_fields)
  * Externally visible entry points
  */
 void *
-stringToNode(const char *str)
+stringToNodeOld(const char *str)
 {
-	return stringToNodeInternal(str, false);
+	return stringToNodeInternalOld(str, false);
 }
 
 #ifdef WRITE_READ_PARSE_PLAN_TREES
 
 void *
-stringToNodeWithLocations(const char *str)
+stringToNodeWithLocationsOld(const char *str)
 {
-	return stringToNodeInternal(str, true);
+	return stringToNodeInternalOld(str, true);
 }
 
 #endif
diff --git a/src/backend/nodes/readfuncs_new.c b/src/backend/nodes/readfuncs_new.c
new file mode 100644
index 0000000000..da0ac91247
--- /dev/null
+++ b/src/backend/nodes/readfuncs_new.c
@@ -0,0 +1,749 @@
+#include "postgres.h"
+
+#include "common/shortest_dec.h"
+#include "common/string.h"
+#include "lib/stringinfo.h"
+#include "miscadmin.h"
+#include "nodes/nodes.h"
+#include "nodes/nodeinfo.h"
+#include "nodes/pg_list.h"
+#include "nodes/primnodes.h"
+#include "nodes/value.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/int8.h"
+
+
+typedef struct NodeInContext
+{
+	bool restore_locations;
+
+	const char *str;
+	const char *cur;
+} NodeInContext;
+
+static void *nodein_read(NodeInContext *context, const char *token, int tok_len);
+static const char *nodein_strtok(NodeInContext *context, int *token_length);
+static Node *nodein_read_node(NodeInContext *context);
+
+static void nodein_fields(NodeInContext *context, const TINodeType  *type_info, Node* dst);
+static void nodein_field(NodeInContext *context, Node *obj,
+						 const TINodeType *type_info, const TIStructField *field_info,
+						 uint16 known_type_id, uint16 size, void *ptr_dst);
+static List *nodein_list(NodeInContext *context, const char *token, int token_length);
+static char * nodein_debackslash(NodeInContext *context, const char *token, int token_length);
+static Datum nodein_datum(NodeInContext *context, bool typbyval, const char *token, int token_length);
+static Bitmapset *nodein_bitmapset(NodeInContext *context, const char *token, int token_length);
+static void nodein_value_union(NodeInContext *context, Value *dst, const char *token, int token_length);
+static void nodein_enum(NodeInContext *context, uint16 type_id, void *ptr_dst, const char *token, int token_length);
+
+
+void *
+stringToNode(const char *str)
+{
+#ifdef USE_NEW_NODE_FUNCS
+	return stringToNodeNew(str);
+#else
+	return stringToNodeOld(str);
+#endif
+}
+
+#ifdef WRITE_READ_PARSE_PLAN_TREES
+void *
+stringToNodeWithLocations(const char *str)
+{
+#ifdef USE_NEW_NODE_FUNCS
+	return stringToNodeWithLocationsNew(str);
+#else
+	return stringToNodeWithLocationsOld(str);
+#endif
+}
+#endif
+
+static void *
+stringToNodeNewInternal(const char *str, bool restore_locations)
+{
+	NodeInContext context = {.str = str,
+							 .cur = str,
+							 .restore_locations = restore_locations};
+
+	return nodein_read(&context, NULL, 0);
+}
+
+void *
+stringToNodeNew(const char *str)
+{
+	return stringToNodeNewInternal(str, false);
+}
+
+#ifdef WRITE_READ_PARSE_PLAN_TREES
+void *
+stringToNodeWithLocationsNew(const char *str)
+{
+	return stringToNodeNewInternal(str, true);
+}
+#endif
+
+static void *
+nodein_read(NodeInContext *context, const char *token, int token_length)
+{
+	Node	   *result;
+
+	/* Guard against stack overflow due to overly complex expressions */
+	check_stack_depth();
+
+	if (token == NULL)			/* need to read a token? */
+	{
+		token = nodein_strtok(context, &token_length);
+
+		if (token == NULL)		/* end of input */
+			return NULL;
+	}
+
+	if (token_length == 0)
+		return NULL;
+	else if (token[0] == '{')
+	{
+		Assert(token_length == 1); /* cf nodein_strtok */
+		result = nodein_read_node(context);
+		token = nodein_strtok(context, &token_length);
+		if (token_length != 1 || token[0] != '}')
+			elog(ERROR, "did not find '}' at end of input node");
+		return result;
+	}
+	else if (token[0] == '(')
+	{
+		Assert(token_length == 1); /* cf nodein_strtok */
+
+		return (Node *) nodein_list(context, token, token_length);
+	}
+	else
+	{
+		/*
+		 * XXX: We used to accept strings (starting with "), integers
+		 * (parsable integer), float (other numbers) and bitstrings (starting
+		 * with b) here, mapping them to T_Value sub-types.
+		 *
+		 * That seemed awkward, especially issues like floating points being
+		 * recognized as integers after a roundtrip, plain C strings not being
+		 * discernible from Value nodes, and the overhead of more complex
+		 * determination token type determination.
+		 *
+		 * If we want to re-introduce that, this'd probably be the best place
+		 * to check for that, not going through the faster paths above.
+		 */
+
+		elog(ERROR, "unrecognized token: \"%.*s\"", token_length, token);
+	}
+
+	return NULL;
+}
+
+static const char *
+nodein_strtok(NodeInContext *context, int *token_length)
+{
+	const char *local_str = context->cur;	/* working pointer to string */
+	const char *ret_str;		/* start of token to return */
+
+	while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t')
+		local_str++;
+
+	if (*local_str == '\0')
+	{
+		*token_length = 0;
+		context->cur = local_str;
+		return NULL;			/* no more tokens */
+	}
+
+	/*
+	 * Now pointing at start of next token.
+	 */
+	ret_str = local_str;
+
+	if (*local_str == '(' || *local_str == ')' ||
+		*local_str == '{' || *local_str == '}')
+	{
+		/* special 1-character token */
+		local_str++;
+	}
+	else
+	{
+		/* Normal token, possibly containing backslashes */
+		while (*local_str != '\0' &&
+			   *local_str != ' ' && *local_str != '\n' &&
+			   *local_str != '\t' &&
+			   *local_str != '(' && *local_str != ')' &&
+			   *local_str != '{' && *local_str != '}')
+		{
+			if (*local_str == '\\' && local_str[1] != '\0')
+				local_str += 2;
+			else
+				local_str++;
+		}
+	}
+
+	*token_length = local_str - ret_str;
+
+	/* Recognize special case for "empty" token */
+	if (*token_length == 2 && ret_str[0] == '<' && ret_str[1] == '>')
+		*token_length = 0;
+
+	context->cur = local_str;
+
+	return ret_str;
+}
+
+static Node*
+nodein_read_node(NodeInContext *context)
+{
+	const char *node_type;
+	const char *node_type_id_s;
+	NodeTag node_type_id;
+	int			type_token_length;
+	int			id_token_length;
+	const TINodeType  *type_info;
+	Node *dst;
+
+	/*
+	 * Node types are always enclosed in {TypeName numeric-type-id ... },
+	 * the caller processes the curly parens.
+	 */
+	node_type = nodein_strtok(context, &type_token_length);
+
+	if (unlikely(type_token_length == 0))
+		elog(ERROR, "unexpected zero length token");
+
+	node_type_id_s = nodein_strtok(context, &id_token_length);
+	if (unlikely(type_token_length == 0))
+		elog(ERROR, "unexpected zero length token");
+
+	node_type_id = atoi(node_type_id_s);
+
+	// FIXME: check ti_* boundaries
+	type_info = &ti_node_types[node_type_id];
+
+	if (strncmp(node_type, ti_strings[type_info->name].string, type_token_length) != 0)
+	{
+		elog(ERROR, "unrecognized: %s vs %s",
+			 pnstrdup(node_type, type_token_length), ti_strings[type_info->name].string);
+	}
+
+	dst = palloc0(type_info->size);
+	dst->type = node_type_id;
+
+	nodein_fields(context, type_info, dst);
+
+	return dst;
+}
+
+static void
+nodein_fields(NodeInContext *context, const TINodeType  *type_info, Node* dst)
+{
+	const TIStructField *field_info = &ti_struct_fields[type_info->first_field_at];
+
+	/* Guard against stack overflow due to overly complex expressions */
+	check_stack_depth();
+
+	for (int i = 0; i < type_info->num_fields; i++, field_info++)
+	{
+		const char *token;
+		int token_length;
+
+		// FIXME: ExtensibleNode needs to call callbacks, or be reimplemented
+
+		if (field_info->flags & (TYPE_IN_IGNORE | TYPE_OUT_IGNORE))
+			continue;
+
+		/* read (which is prefixed with :) and verify field name */
+		// XXX: should we do that? The old code didn't, but it seems to add a
+		// lot of robustness
+		token = nodein_strtok(context, &token_length);
+		Assert(token_length > 1);
+		/* skipping over : */
+		Assert(token_length -1 == ti_strings[field_info->name].length);
+		Assert(memcmp(token + 1, ti_strings[field_info->name].string, token_length - 1) == 0);
+
+		nodein_field(context, dst, type_info, field_info,
+					 field_info->known_type_id, field_info->size,
+					 (char *) dst + field_info->offset);
+	}
+}
+
+static List *
+nodein_list(NodeInContext *context, const char *token, int token_length)
+{
+	List	   *l = NIL;
+
+	/*----------
+	 * Could be an integer list:	(i int int ...)
+	 * or an OID list:				(o int int ...)
+	 * or a list of nodes/values:	(node node ...)
+	 *----------
+	 */
+	token = nodein_strtok(context, &token_length);
+	if (token == NULL)
+		elog(ERROR, "unterminated List structure");
+	if (token_length == 1 && token[0] == 'i')
+	{
+		/* List of integers */
+		for (;;)
+		{
+			int			val;
+			char	   *endptr;
+
+			token = nodein_strtok(context, &token_length);
+			if (token == NULL)
+				elog(ERROR, "unterminated List structure");
+			if (token[0] == ')')
+				break;
+			val = (int) strtol(token, &endptr, 10);
+			if (endptr != token + token_length)
+				elog(ERROR, "unrecognized integer: \"%.*s\"",
+					 token_length, token);
+			l = lappend_int(l, val);
+		}
+	}
+	else if (token_length == 1 && token[0] == 'o')
+	{
+		/* List of OIDs */
+		for (;;)
+		{
+			Oid			val;
+			char	   *endptr;
+
+			token = nodein_strtok(context, &token_length);
+			if (token == NULL)
+				elog(ERROR, "unterminated List structure");
+			if (token[0] == ')')
+				break;
+			val = (Oid) strtoul(token, &endptr, 10);
+			if (endptr != token + token_length)
+				elog(ERROR, "unrecognized OID: \"%.*s\"",
+					 token_length, token);
+			l = lappend_oid(l, val);
+		}
+	}
+	else
+	{
+		/* List of other node types */
+		for (;;)
+		{
+			/* We have already scanned next token... */
+			if (token[0] == ')')
+				break;
+			l = lappend(l, nodein_read(context, token, token_length));
+			token = nodein_strtok(context, &token_length);
+			if (token == NULL)
+				elog(ERROR, "unterminated List structure");
+		}
+	}
+
+	return l;
+}
+
+static void
+nodein_field(NodeInContext *context, Node *obj,
+			 const TINodeType *type_info, const TIStructField *field_info,
+			 uint16 known_type_id, uint16 size, void *ptr_dst)
+{
+	const char *token;
+	int			token_length;
+
+	Assert(known_type_id != TYPE_ID_UNKNOWN);
+	Assert(size != TYPE_SIZE_UNKNOWN);
+
+	token = nodein_strtok(context, &token_length);		/* get field value */
+
+	switch (known_type_id)
+	{
+		case KNOWN_TYPE_UINT16:
+			*(uint16 *) ptr_dst = (uint16) strtoul(token, NULL, 10);
+			break;
+		case KNOWN_TYPE_OPFUNCID:
+		case KNOWN_TYPE_OID:
+		case KNOWN_TYPE_UINT32:
+			*(uint32 *) ptr_dst = (uint32) strtoul(token, NULL, 10);
+			break;
+		case KNOWN_TYPE_UINT64:
+			// FIXME: pnstrdup
+			*(uint64 *) ptr_dst = (uint64) pg_strtouint64(pnstrdup(token, token_length), NULL, 10);
+			break;
+
+		case KNOWN_TYPE_LOCATION:
+			/*
+			 * Parse location fields are written out by outfuncs.c, but only
+			 * for debugging use.  When reading a location field, we normally
+			 * discard the stored value and set the location field to -1 (ie,
+			 * "unknown").  This is because nodes coming from a stored rule
+			 * should not be thought to have a known location in the current
+			 * query's text.  However, if restore_location_fields is true, we
+			 * do restore location fields from the string.  This is currently
+			 * intended only for use by the WRITE_READ_PARSE_PLAN_TREES test
+			 * code, which doesn't want to cause any change in the node
+			 * contents.
+			 */
+#ifdef WRITE_READ_PARSE_PLAN_TREES
+			if (context->restore_locations)
+				*(uint32 *) ptr_dst = atoi(token);
+			else
+#endif
+			{
+				*(uint32 *) ptr_dst = -1;
+			}
+			break;
+
+		case KNOWN_TYPE_INT16:
+			*(uint16 *) ptr_dst = atoi(token);
+			break;
+		case KNOWN_TYPE_INT32:
+			*(uint32 *) ptr_dst = atoi(token);
+			break;
+		case KNOWN_TYPE_INT64:
+			// FIXME: pnstrdup
+			scanint8(pnstrdup(token, token_length), false, (int64 *) ptr_dst);
+			break;
+
+		case KNOWN_TYPE_FLOAT32:
+			*(float *) ptr_dst = strtof(token, NULL);
+			break;
+		case KNOWN_TYPE_FLOAT64:
+			*(double *) ptr_dst = strtod(token, NULL);
+			break;
+
+		case KNOWN_TYPE_BOOL:
+			if (token[0] == 't')
+			{
+				Assert(strncmp(token, "true", token_length) == 0);
+				*(bool *) ptr_dst = true;
+			}
+			else
+			{
+				Assert(strncmp(token, "false", token_length) == 0);
+				*(bool *) ptr_dst = false;
+			}
+			break;
+
+		case KNOWN_TYPE_CHAR:
+			/* avoid overhead of calling debackslash() for one char */
+			if (token_length == 0)
+				*(char *) ptr_dst = '\0';
+			else if (token_length == 2)
+			{
+				if (token[0] != '\\')
+					elog(ERROR, "invalid escape %c", token[0]);
+				*(char *) ptr_dst = token[1];
+			}
+			else if (token_length == 1)
+				*(char *) ptr_dst = token[0];
+			else
+				elog(ERROR, "invalid char length %d", token_length);
+			break;
+
+		case KNOWN_TYPE_DATUM:
+			{
+				Const *cobj = castNode(Const, (Node *) obj);
+
+				Assert(&cobj->constvalue == ptr_dst);
+
+				if (cobj->constisnull)
+				{
+					/* skip "<>" */
+					if (token == NULL || token_length != 0)
+						elog(ERROR, "expected <>");
+				}
+				else
+					cobj->constvalue = nodein_datum(context, cobj->constbyval, token, token_length);
+
+				break;
+			}
+
+		case KNOWN_TYPE_VALUE_UNION:
+			{
+				Value *vobj = (Value *) obj;
+
+				Assert(IsAValue(vobj));
+
+				nodein_value_union(context, vobj, token, token_length);
+
+				break;
+			}
+
+		case KNOWN_TYPE_ENUM:
+		case KNOWN_TYPE_COERCIONFORM:
+		case KNOWN_TYPE_NODE_TAG:
+			Assert(size == sizeof(int));
+			nodein_enum(context, field_info->type_id, ptr_dst, token, token_length);
+			break;
+
+		case KNOWN_TYPE_NODE:
+			{
+				const TINodeType *sub_type_info;
+				NodeTag sub_tag;
+
+				Assert(field_info->type_id != TYPE_ID_UNKNOWN);
+
+				/* sub-types are always enclosed in {TypeName numeric-type-id ... } */
+				if (token_length != 1 || token[0] != '{')
+					elog(ERROR, "did not find '{' at the start of embedded node");
+
+				/* read TypeName */
+				token = nodein_strtok(context, &token_length);
+				/* read numeric-type-id */
+				token = nodein_strtok(context, &token_length);
+
+				/*
+				 * If at offset 0, this shares the NodeTag field with the
+				 * parent class. Therefore we have to rely on the declared
+				 * type.
+				 */
+				if (field_info->offset != 0)
+				{
+					sub_tag = atoi(token);
+					((Node *) ptr_dst)->type = sub_tag;
+				}
+				else
+				{
+					sub_tag = field_info->type_id;
+				}
+
+				sub_type_info = &ti_node_types[sub_tag];
+
+				nodein_fields(context,
+							  sub_type_info,
+							  (Node *) ptr_dst);
+
+				/* read } */
+				token = nodein_strtok(context, &token_length);
+				if (token_length != 1 || token[0] != '}')
+					elog(ERROR, "did not find '}' at the end of embedded node");
+				break;
+			}
+
+		case KNOWN_TYPE_P_PGARR:
+			{
+				PgArrBase *arr;
+				size_t arr_length;
+
+				if (token_length == 0)
+					break;
+
+				Assert(field_info->elem_size > 0);
+
+				arr_length = (uint32) strtoul(token, NULL, 10);
+				arr = pgarr_helper_alloc(field_info->elem_size,
+										 arr_length);
+				arr->size = arr_length;
+
+				for (int i = 0; i < arr_length; i++)
+				{
+					nodein_field(context, NULL,
+								 type_info, field_info,
+								 field_info->elem_known_type_id,
+								 field_info->elem_size,
+								 (char *) arr->elementsp + field_info->elem_size * i);
+				}
+
+				*(PgArrBase **) ptr_dst = arr;
+
+				break;
+			}
+
+		case KNOWN_TYPE_P_NODE:
+			if (token_length == 0)
+				break;
+
+			*(Node **) ptr_dst = nodein_read(context, token, token_length);
+
+			break;
+
+		case KNOWN_TYPE_P_CHAR:
+			if (token_length == 0)
+				break;
+
+			if (token_length < 2 || token[0] != '"' || token[token_length - 1] != '"')
+				elog(ERROR, "missing quotes");
+			*(char **) ptr_dst = nodein_debackslash(context, token + 1, token_length - 2);
+
+			break;
+
+		case KNOWN_TYPE_P_BITMAPSET:
+			if (token_length == 0)
+				break;
+
+			*(Bitmapset **) ptr_dst = nodein_bitmapset(context, token, token_length);
+			break;
+
+		default:
+			elog(PANIC, "don't know how to output type %d", (int) known_type_id);
+	}
+}
+
+/*
+ * nodein_datum
+ *
+ * Given a string representation of a Datum, recreate the appropriate
+ * Datum.  The string representation embeds length info, but not byValue,
+ * so we must be told that.
+ */
+static Datum
+nodein_datum(NodeInContext *context, bool typbyval, const char *token, int token_length)
+{
+	Size		length,
+				i;
+	Datum		res;
+	char	   *s;
+
+	/*
+	 * read the actual length of the value
+	 */
+	length = (unsigned int) strtoul(token, NULL, 10);
+
+	token = nodein_strtok(context, &token_length);	/* read the '[' */
+	if (token_length != 1 || token[0] != '[')
+		elog(ERROR, "expected \"[\" to start datum, but got \"%s\"; length = %zu",
+			 token ? pnstrdup(token, token_length) : "[NULL]", length);
+
+	if (typbyval)
+	{
+		if (length > (Size) sizeof(Datum))
+			elog(ERROR, "byval datum but length = %zu", length);
+		res = (Datum) 0;
+		s = (char *) (&res);
+		for (i = 0; i < (Size) sizeof(Datum); i++)
+		{
+			token = nodein_strtok(context, &token_length);
+			s[i] = (char) atoi(token);
+		}
+	}
+	else if (length <= 0)
+		res = (Datum) NULL;
+	else
+	{
+		s = (char *) palloc(length);
+		for (i = 0; i < length; i++)
+		{
+			token = nodein_strtok(context, &token_length);
+			s[i] = (char) atoi(token);
+		}
+		res = PointerGetDatum(s);
+	}
+
+	token = nodein_strtok(context, &token_length);	/* read the ']' */
+	if (token_length != 1 || token[0] != ']')
+		elog(ERROR, "expected \"]\" to end datum, but got \"%s\"; length = %zu",
+			 token ? pnstrdup(token, token_length) : "[NULL]", length);
+
+	return res;
+}
+
+static Bitmapset *
+nodein_bitmapset(NodeInContext *context, const char *token, int token_length)
+{
+	Bitmapset  *result = NULL;
+
+	if (token == NULL)
+		elog(ERROR, "incomplete Bitmapset structure");
+	if (token_length != 1 || token[0] != '(')
+		elog(ERROR, "unrecognized token: \"%.*s\"", token_length, token);
+
+	token = nodein_strtok(context, &token_length);
+	if (token == NULL)
+		elog(ERROR, "incomplete Bitmapset structure");
+	if (token_length != 1 || token[0] != 'b')
+		elog(ERROR, "unrecognized token: \"%.*s\"", token_length, token);
+
+	for (;;)
+	{
+		int			val;
+		char	   *endptr;
+
+		token = nodein_strtok(context, &token_length);
+		if (token == NULL)
+			elog(ERROR, "unterminated Bitmapset structure");
+		if (token_length == 1 && token[0] == ')')
+			break;
+		val = (int) strtol(token, &endptr, 10);
+		if (endptr != token + token_length)
+			elog(ERROR, "unrecognized integer: \"%.*s\"", token_length, token);
+		result = bms_add_member(result, val);
+	}
+
+	return result;
+}
+
+static void
+nodein_value_union(NodeInContext *context, Value *dst, const char *token, int token_length)
+{
+	switch (dst->type)
+	{
+		case T_Null:
+			/* skip over <> */
+			break;
+
+		case T_Integer:
+			dst->val.ival = atoi(token);
+			break;
+
+		case T_Float:
+			dst->val.str = pnstrdup(token, token_length);
+			break;
+
+		case T_String:
+			/* need to remove leading and trailing quotes, and backslashes */
+			if (unlikely(token_length < 2 ||
+						 token[0] != '"' ||
+						 token[token_length - 1] != '"'))
+				elog(ERROR, "invalid string");
+			dst->val.str = nodein_debackslash(context, token + 1, token_length - 2);
+			break;
+
+		case T_BitString:
+			/* skip leading 'b' */
+			dst->val.str = pnstrdup(token, token_length);
+			break;
+
+		default:
+			Assert(false);
+			pg_unreachable();
+	}
+}
+
+static void
+nodein_enum(NodeInContext *context, uint16 type_id, void *ptr_dst, const char *token, int token_length)
+{
+	const TIEnum *enum_info = &ti_enums[type_id];
+	int num_fields = enum_info->first_field_at + enum_info->num_fields;
+
+	for (int i = enum_info->first_field_at; i < num_fields; i++)
+	{
+		const TIEnumField *cur_field_info = &ti_enum_fields[i];
+
+		if (ti_strings[cur_field_info->name].length == token_length &&
+			strncmp(ti_strings[cur_field_info->name].string, token, token_length) == 0)
+		{
+			memcpy(ptr_dst, &cur_field_info->value, sizeof(int));
+			return;
+		}
+	}
+
+	elog(ERROR, "unknown enum %s val %s",
+		 ti_strings[enum_info->name].string,
+		 pnstrdup(token, token_length));
+}
+
+static char *
+nodein_debackslash(NodeInContext *context, const char *token, int token_length)
+{
+	char	   *result = palloc(token_length + 1);
+	char	   *ptr = result;
+
+	while (token_length > 0)
+	{
+		if (*token == '\\' && token_length > 1)
+			token++, token_length--;
+		*ptr++ = *token++;
+		token_length--;
+	}
+	*ptr = '\0';
+	return result;
+}
diff --git a/src/include/nodes/nodeinfo.h b/src/include/nodes/nodeinfo.h
new file mode 100644
index 0000000000..b6f051ca9e
--- /dev/null
+++ b/src/include/nodes/nodeinfo.h
@@ -0,0 +1,128 @@
+#ifndef PG_NODEINFO_H
+
+#define PG_NODEINFO_H
+
+#define TYPE_CAT_SCALAR (1U << 0)
+#define TYPE_CAT_POINTER (1U << 1)
+#define TYPE_CAT_INCOMPLETE (1U << 2)
+#define TYPE_EQUAL_IGNORE (1U << 3)
+#define TYPE_EQUAL_FORCE_SCALAR (1U << 4)
+#define TYPE_COPY_IGNORE (1U << 5)
+#define TYPE_COPY_FORCE_SCALAR (1U << 6)
+#define TYPE_OUT_IGNORE (1U << 7)
+#define TYPE_IN_IGNORE (1U << 8)
+
+#define TYPE_ID_UNKNOWN PG_UINT16_MAX
+#define TYPE_SIZE_UNKNOWN PG_UINT16_MAX
+
+typedef enum TIKnownTypes
+{
+	KNOWN_TYPE_UNKNOWN,
+
+	/* scalar types */
+	KNOWN_TYPE_INT16,
+	KNOWN_TYPE_INT32,
+	KNOWN_TYPE_INT64,
+	KNOWN_TYPE_INT128,
+	KNOWN_TYPE_UINT16,
+	KNOWN_TYPE_OID,
+	KNOWN_TYPE_UINT32,
+	KNOWN_TYPE_UINT64,
+	KNOWN_TYPE_UINT128,
+	KNOWN_TYPE_FLOAT32,
+	KNOWN_TYPE_FLOAT64,
+	KNOWN_TYPE_BOOL,
+	KNOWN_TYPE_CHAR,
+	KNOWN_TYPE_ENUM,
+	KNOWN_TYPE_NODE_TAG,
+	KNOWN_TYPE_NODE,
+	KNOWN_TYPE_LOCATION,
+	KNOWN_TYPE_DATUM,
+	KNOWN_TYPE_VALUE_UNION,
+	KNOWN_TYPE_COERCIONFORM,
+	KNOWN_TYPE_OPFUNCID,
+
+	/* pointer types */
+	KNOWN_TYPE_P_CHAR,
+	KNOWN_TYPE_P_NODE,
+	KNOWN_TYPE_P_BITMAPSET,
+
+	KNOWN_TYPE_P_INT16,
+	KNOWN_TYPE_P_INT32,
+	KNOWN_TYPE_P_INT64,
+	KNOWN_TYPE_P_INT128,
+	KNOWN_TYPE_P_UINT16,
+	KNOWN_TYPE_P_OID,
+	KNOWN_TYPE_P_UINT32,
+	KNOWN_TYPE_P_UINT64,
+	KNOWN_TYPE_P_UINT128,
+	KNOWN_TYPE_P_FLOAT32,
+	KNOWN_TYPE_P_FLOAT64,
+	KNOWN_TYPE_P_BOOL,
+	KNOWN_TYPE_P_ENUM,
+	KNOWN_TYPE_P_DATUM,
+
+	KNOWN_TYPE_P_PGARR
+}  TIKnownTypes;
+
+typedef struct TINodeType
+{
+	/* struct name */
+	uint16 name;
+	uint16 first_field_at;
+	uint16 num_fields;
+	/* allocation size, or TYPE_SIZE_UNKNOWN */
+	uint16 size;
+} TINodeType;
+
+typedef struct TIStructField
+{
+	/* struct field name */
+	uint16 name;
+	uint16 type;
+	/* offset within the containing struct */
+	uint16 offset;
+	/* allocation size, or TYPE_SIZE_UNKNOWN */
+	uint16 size;
+	uint16 flags;
+	uint16 type_id;
+	uint16 known_type_id;
+	uint16 elem_known_type_id;
+	/* allocation size, or TYPE_SIZE_UNKNOWN */
+	uint16 elem_size;
+} TIStructField;
+
+typedef struct TIEnum
+{
+	/* name of enum */
+	uint16 name;
+	uint16 first_field_at;
+	uint16 num_fields;
+	uint16 size;
+} TIEnum;
+
+typedef struct TIEnumField
+{
+	uint16 name;
+	uint32 value;
+} TIEnumField;
+
+/*
+ * XXX: Wasting a lot of space due to padding and pointer. Instead we could
+ * store all strings together, and use an offset pointer into that?
+ */
+typedef struct TIString
+{
+	uint16 length;
+	const char *const string;
+} TIString;
+
+extern const TINodeType ti_node_types[];
+extern const TIStructField ti_struct_fields[];
+extern const TIEnum ti_enums[];
+extern const TIEnumField ti_enum_fields[];
+extern const TIString ti_strings[];
+
+#define USE_NEW_NODE_FUNCS
+
+#endif /* PG_NODEINFO_H */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index c734b8d29c..e60a0f06a7 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -92,6 +92,7 @@ typedef enum NodeTag
 	T_PlanRowMark,
 	T_PartitionPruneInfo,
 	T_PartitionedRelPruneInfo,
+	T_PartitionPruneStep,
 	T_PartitionPruneStepOp,
 	T_PartitionPruneStepCombine,
 	T_PlanInvalItem,
@@ -616,14 +617,20 @@ extern void outBitmapset(struct StringInfoData *str,
 extern void outDatum(struct StringInfoData *str, uintptr_t value,
 					 int typlen, bool typbyval);
 extern char *nodeToString(const void *obj);
+extern char *nodeToStringOld(const void *obj);
+extern char *nodeToStringNew(const void *obj);
 extern char *bmsToString(const struct Bitmapset *bms);
 
 /*
  * nodes/{readfuncs.c,read.c}
  */
 extern void *stringToNode(const char *str);
+extern void *stringToNodeNew(const char *str);
+extern void *stringToNodeOld(const char *str);
 #ifdef WRITE_READ_PARSE_PLAN_TREES
 extern void *stringToNodeWithLocations(const char *str);
+extern void *stringToNodeWithLocationsOld(const char *str);
+extern void *stringToNodeWithLocationsNew(const char *str);
 #endif
 extern struct Bitmapset *readBitmapset(void);
 extern uintptr_t readDatum(bool typbyval);
@@ -636,6 +643,8 @@ extern PGARR(AttrNumber) *readAttrNumberCols(void);
  * nodes/copyfuncs.c
  */
 extern void *copyObjectImpl(const void *obj);
+extern void *copyObjectImplOld(const void *obj);
+extern void *copyObjectImplNew(const void *obj);
 
 /* cast result back to argument type, if supported by compiler */
 #ifdef HAVE_TYPEOF
@@ -648,6 +657,8 @@ extern void *copyObjectImpl(const void *obj);
  * nodes/equalfuncs.c
  */
 extern bool equal(const void *a, const void *b);
+extern bool nodes_equal_new(const void *a, const void *b);
+extern bool nodes_equal_old(const void *a, const void *b);
 
 
 /*
diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h
index 409d840e79..666e20bdde 100644
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@@ -58,6 +58,9 @@ typedef struct List
 	/* If elements == initial_elements, it's not a separate allocation */
 } List;
 
+typedef List OidList;
+typedef List IntList;
+
 /*
  * The *only* valid representation of an empty list is NIL; in other
  * words, a non-NIL list is guaranteed to have length >= 1.
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index a4ebd09521..6a6c0aabb2 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -199,13 +199,13 @@ typedef struct Const
 	int32		consttypmod;	/* typmod value, if any */
 	Oid			constcollid;	/* OID of collation, or InvalidOid if none */
 	int			constlen;		/* typlen of the constant's datatype */
-	Datum		constvalue;		/* the constant's value */
 	bool		constisnull;	/* whether the constant is null (if true,
 								 * constvalue is undefined) */
 	bool		constbyval;		/* whether this datatype is passed by value.
 								 * If true, then all the information is stored
 								 * in the Datum. If false, then the Datum
 								 * contains a pointer to the information. */
+	Datum		constvalue;		/* the constant's value */
 	Location	location;		/* token location, or -1 if unknown */
 } Const;
 
diff --git a/src/include/nodes/value.h b/src/include/nodes/value.h
index 871ffa8fa9..d3d580cec5 100644
--- a/src/include/nodes/value.h
+++ b/src/include/nodes/value.h
@@ -49,6 +49,12 @@ typedef struct Value
 	}			val;
 } Value;
 
+typedef Value Integer;
+typedef Value Float;
+typedef Value String;
+typedef Value BitString;
+typedef Value Null;
+
 #define intVal(v)		(((Value *)(v))->val.ival)
 #define floatVal(v)		atof(((Value *)(v))->val.str)
 #define strVal(v)		(((Value *)(v))->val.str)
@@ -58,4 +64,12 @@ extern Value *makeFloat(char *numericStr);
 extern Value *makeString(char *str);
 extern Value *makeBitString(char *str);
 
+static inline bool
+IsAValue(const void *ptr)
+{
+	NodeTag tag = nodeTag(ptr);
+
+	return tag == T_Integer || tag == T_Float || tag == T_String ||
+		tag == T_BitString || tag == T_Null;
+}
 #endif							/* VALUE_H */
diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out
index d01769299e..5cc85f03db 100644
--- a/src/test/regress/expected/rowsecurity.out
+++ b/src/test/regress/expected/rowsecurity.out
@@ -3455,7 +3455,7 @@ CREATE TABLE coll_t (c) AS VALUES ('bar'::text);
 CREATE POLICY coll_p ON coll_t USING (c < ('foo'::text COLLATE "C"));
 ALTER TABLE coll_t ENABLE ROW LEVEL SECURITY;
 GRANT SELECT ON coll_t TO regress_rls_alice;
-SELECT (string_to_array(polqual, ':'))[7] AS inputcollid FROM pg_policy WHERE polrelid = 'coll_t'::regclass;
+SELECT (regexp_match(polqual, ':(inputcollid[^:]*)'))[1] AS inputcollid FROM pg_policy WHERE polrelid = 'coll_t'::regclass;
    inputcollid    
 ------------------
  inputcollid 950 
diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql
index d7a5a36cf8..d93b67de5b 100644
--- a/src/test/regress/sql/rowsecurity.sql
+++ b/src/test/regress/sql/rowsecurity.sql
@@ -1386,7 +1386,7 @@ CREATE TABLE coll_t (c) AS VALUES ('bar'::text);
 CREATE POLICY coll_p ON coll_t USING (c < ('foo'::text COLLATE "C"));
 ALTER TABLE coll_t ENABLE ROW LEVEL SECURITY;
 GRANT SELECT ON coll_t TO regress_rls_alice;
-SELECT (string_to_array(polqual, ':'))[7] AS inputcollid FROM pg_policy WHERE polrelid = 'coll_t'::regclass;
+SELECT (regexp_match(polqual, ':(inputcollid[^:]*)'))[1] AS inputcollid FROM pg_policy WHERE polrelid = 'coll_t'::regclass;
 SET SESSION AUTHORIZATION regress_rls_alice;
 SELECT * FROM coll_t;
 ROLLBACK;