From: Svetlana Derevyanko <svetlo_nika@mail.ru>
Date: Fri, 20 Aug 2021 10:04:10 +0000 (+0300)
Subject: Adding numeric type support
X-Git-Tag: REL_14_1~9
X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=20b8aaf75de2a30e7536a1917dfb3e545f250da4;p=pg_filedump.git

Adding numeric type support

Added numeric type support.
Move data decompression, detoasting and aligning into separated function
to avoid redundancy.
---

diff --git a/README.pg_filedump b/README.pg_filedump
index 3654e5a..d10bfb3 100644
--- a/README.pg_filedump
+++ b/README.pg_filedump
@@ -85,6 +85,7 @@ The following options are valid for heap and index files:
         * json
         * macaddr
         * name
+        * numeric
         * oid
         * real
         * serial
diff --git a/decode.c b/decode.c
index 027459e..ad5f1c6 100644
--- a/decode.c
+++ b/decode.c
@@ -29,7 +29,8 @@
 static int
 ReadStringFromToast(const char *buffer,
 		unsigned int buff_size,
-		unsigned int* out_size);
+		unsigned int* out_size,
+		int (*parse_value)(const char *, int));
 
 /*
  * Utilities for manipulation of header information for compressed
@@ -104,6 +105,12 @@ decode_char(const char *buffer, unsigned int buff_size, unsigned int *out_size);
 static int
 decode_name(const char *buffer, unsigned int buff_size, unsigned int *out_size);
 
+static int
+decode_numeric(const char *buffer, unsigned int buff_size, unsigned int *out_size);
+
+static int
+extract_data(const char *buffer, unsigned int buff_size, unsigned int *out_size, int (*parse_value)(const char *, int));
+
 static int
 decode_ignore(const char *buffer, unsigned int buff_size, unsigned int *out_size);
 
@@ -181,6 +188,9 @@ static ParseCallbackTableItem callback_table[] =
 	{
 		"name", &decode_name
 	},
+	{
+		"numeric", &decode_numeric
+	},
 	{
 		"char", &decode_char
 	},
@@ -263,7 +273,7 @@ CopyAppend(const char *str)
  * Append given string to current COPY line and encode special symbols
  * like \r, \n, \t and \\.
  */
-static void
+static int
 CopyAppendEncode(const char *str, int orig_len)
 {
 	/*
@@ -339,6 +349,7 @@ CopyAppendEncode(const char *str, int orig_len)
 
 	tmp_buff[curr_offset] = '\0';
 	CopyAppend(tmp_buff);
+	return 0;
 }
 
 /* CopyAppend version with format string support */
@@ -348,6 +359,144 @@ CopyAppendEncode(const char *str, int orig_len)
 	  CopyAppend(__copy_format_buff); \
   } while(0)
 
+/*
+ * Decode a numeric type and append the result to current COPY line
+ */
+static int
+CopyAppendNumeric(const char *buffer, int num_size)
+{
+       struct NumericData num;
+
+       num = *(struct NumericData *)buffer;
+       if (NUMERIC_IS_SPECIAL(&num))
+       {
+               if (NUMERIC_IS_NINF(&num))
+               {
+                       CopyAppend("-Infinity");
+                       return 0;
+               }
+               if (NUMERIC_IS_PINF(&num))
+               {
+                       CopyAppend("Infinity");
+                       return 0;
+               }
+               if (NUMERIC_IS_NAN(&num))
+               {
+                       CopyAppend("NaN");
+                       return 0;
+               }
+               return -2;
+       }
+       else
+       {
+               int				sign;
+               int				weight;
+               int				dscale;
+               int				ndigits;
+               int				i;
+               char			   *str;
+               char			   *cp;
+               char			   *endcp;
+               int				d;
+               bool				putit;
+               NumericDigit		d1;
+               NumericDigit		dig;
+               NumericDigit	   *digits;
+
+               sign = NUMERIC_SIGN(&num);
+               weight = NUMERIC_WEIGHT(&num);
+               dscale = NUMERIC_DSCALE(&num);
+
+               if (num_size == NUMERIC_HEADER_SIZE(&num))
+               {
+                       /* No digits - compressed zero. */
+                       CopyAppendFmt("%d", 0);
+                       return 0;
+               }
+               else
+               {
+                       ndigits = num_size / sizeof(NumericDigit);
+                       digits = (NumericDigit *)(buffer + NUMERIC_HEADER_SIZE(&num));
+                       i = (weight + 1) * DEC_DIGITS;
+                       if (i <= 0)
+                               i = 1;
+
+                       str = palloc(i + dscale + DEC_DIGITS + 2);
+                       cp = str;
+
+                       /*
+                        * Output a dash for negative values
+                        */
+                       if (sign == NUMERIC_NEG)
+                               *cp++ = '-';
+
+                       /*
+                        * Output all digits before the decimal point
+                        */
+                       if (weight < 0)
+                       {
+                               d = weight + 1;
+                               *cp++ = '0';
+                       }
+                       else
+                       {
+                               for (d = 0; d <= weight; d++)
+                               {
+                                       dig = (d < ndigits) ? digits[d] : 0;
+                                       /* In the first digit, suppress extra leading decimal zeroes */
+                                       putit = (d > 0);
+                                               d1 = dig / 1000;
+                                       dig -= d1 * 1000;
+                                       putit |= (d1 > 0);
+                                       if (putit)
+                                               *cp++ = d1 + '0';
+                                       d1 = dig / 100;
+                                       dig -= d1 * 100;
+                                       putit |= (d1 > 0);
+                                       if (putit)
+                                               *cp++ = d1 + '0';
+                                       d1 = dig / 10;
+                                       dig -= d1 * 10;
+                                       putit |= (d1 > 0);
+                                       if (putit)
+                                               *cp++ = d1 + '0';
+                                       *cp++ = dig + '0';
+                               }
+                       }
+
+                       /*
+                        * If requested, output a decimal point and all the digits that follow it.
+                        * We initially put out a multiple of DEC_DIGITS digits, then truncate if
+                        * needed.
+                        */
+                       if (dscale > 0)
+                       {
+                               *cp++ = '.';
+                               endcp = cp + dscale;
+                               for (i = 0; i < dscale; d++, i += DEC_DIGITS)
+                               {
+                                       dig = (d >= 0 && d < ndigits) ? digits[d] : 0;
+                                       d1 = dig / 1000;
+                                       dig -= d1 * 1000;
+                                       *cp++ = d1 + '0';
+                                       d1 = dig / 100;
+                                       dig -= d1 * 100;
+                                       *cp++ = d1 + '0';
+                                       d1 = dig / 10;
+                                       dig -= d1 * 10;
+                                       *cp++ = d1 + '0';
+                                       *cp++ = dig + '0';
+                               }
+                               cp = endcp;
+                       }
+                       *cp = '\0';
+                       CopyAppend(str);
+                       pfree(str);
+                       return 0;
+               }
+       }
+}
+
 /* Discard accumulated COPY line */
 static void
 CopyClear(void)
@@ -811,6 +960,16 @@ decode_name(const char *buffer, unsigned int buff_size, unsigned int *out_size)
 	return 0;
 }
 
+/*
+ * Decode numeric type.
+ */
+static int
+decode_numeric(const char *buffer, unsigned int buff_size, unsigned int *out_size)
+{
+       int result = extract_data(buffer, buff_size, out_size, &CopyAppendNumeric);
+       return result;
+}
+
 /* Decode a char type */
 static int
 decode_char(const char *buffer, unsigned int buff_size, unsigned int *out_size)
@@ -834,8 +993,20 @@ decode_ignore(const char *buffer, unsigned int buff_size, unsigned int *out_size
 /* Decode char(N), varchar(N), text, json or xml types */
 static int
 decode_string(const char *buffer, unsigned int buff_size, unsigned int *out_size)
+{
+       int result = extract_data(buffer, buff_size, out_size, &CopyAppendEncode);
+       return result;
+}
+
+/*
+ * Align data, parse varlena header, detoast and decompress.
+ * Last parameters responds for actual parsing according to type.
+ */
+static int
+extract_data(const char *buffer, unsigned int buff_size, unsigned int *out_size, int (*parse_value)(const char *, int))
 {
 	int			padding = 0;
+	int			result	= 0;
 
 	/* Skip padding bytes. */
 	while (*buffer == 0x00)
@@ -854,14 +1025,13 @@ decode_string(const char *buffer, unsigned int buff_size, unsigned int *out_size
 		 * 00000001 1-byte length word, unaligned, TOAST pointer
 		 */
 		uint32		len = VARSIZE_EXTERNAL(buffer);
-		int			result = 0;
 
 		if (len > buff_size)
 			return -1;
 
 		if (blockOptions & BLOCK_DECODE_TOAST)
 		{
-			result = ReadStringFromToast(buffer, buff_size, out_size);
+			result = ReadStringFromToast(buffer, buff_size, out_size, parse_value);
 		}
 		else
 		{
@@ -883,9 +1053,9 @@ decode_string(const char *buffer, unsigned int buff_size, unsigned int *out_size
 		if (len > buff_size)
 			return -1;
 
-		CopyAppendEncode(buffer + 1, len - 1);
+		result = parse_value(buffer + 1, len - 1);
 		*out_size = padding + len;
-		return 0;
+		return result;
 	}
 
 	if (VARATT_IS_4B_U(buffer) && buff_size >= 4)
@@ -898,9 +1068,9 @@ decode_string(const char *buffer, unsigned int buff_size, unsigned int *out_size
 		if (len > buff_size)
 			return -1;
 
-		CopyAppendEncode(buffer + 4, len - 4);
+		result = parse_value(buffer + 4, len - 4);
 		*out_size = padding + len;
-		return 0;
+		return result;
 	}
 
 	if (VARATT_IS_4B_C(buffer) && buff_size >= 8)
@@ -911,7 +1081,9 @@ decode_string(const char *buffer, unsigned int buff_size, unsigned int *out_size
 		int						decompress_ret;
 		uint32					len = VARSIZE_4B(buffer);
 		uint32					decompressed_len = 0;
+#if PG_VERSION_NUM >= 140000
 		ToastCompressionId		cmid;
+#endif
 
 #if PG_VERSION_NUM >= 140000
 		decompressed_len = VARDATA_COMPRESSED_GET_EXTSIZE(buffer);
@@ -934,31 +1106,32 @@ decode_string(const char *buffer, unsigned int buff_size, unsigned int *out_size
 			return 0;
 		}
 
+#if PG_VERSION_NUM >= 140000
 		cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(buffer);
 		switch(cmid)
 		{
 			case TOAST_PGLZ_COMPRESSION_ID:
 				decompress_ret = pglz_decompress(VARDATA_4B_C(buffer), len - 2 * sizeof(uint32),
-												 decompress_tmp_buff, decompressed_len
-#if PG_VERSION_NUM >= 120000
-												 , true
-#endif
-												 );
+												 decompress_tmp_buff, decompressed_len, true);
 				break;
-			case TOAST_LZ4_COMPRESSION_ID:
 #ifdef USE_LZ4
+			case TOAST_LZ4_COMPRESSION_ID:
 				decompress_ret = LZ4_decompress_safe(VARDATA_4B_C(buffer), decompress_tmp_buff,
 													 len - 2 * sizeof(uint32), decompressed_len);
 				break;
-#else
-				printf("Error: compression method lz4 not supported.\n");
-				printf("Try to rebuild pg_filedump for PostgreSQL server of version 14+ with --with-lz4 option.\n");
-				return -2;
 #endif
 			default:
 				decompress_ret = -1;
 				break;
 		}
+#else /* PG_VERSION_NUM < 140000 */
+		decompress_ret = pglz_decompress(VARDATA_4B_C(buffer), len - 2 * sizeof(uint32),
+										 decompress_tmp_buff, decompressed_len
+#if PG_VERSION_NUM >= 120000
+										 , true
+#endif
+										 );
+#endif /* PG_VERSION_NUM >= 140000 */
 
 		if ((decompress_ret != decompressed_len) || (decompress_ret < 0))
 		{
@@ -968,9 +1141,9 @@ decode_string(const char *buffer, unsigned int buff_size, unsigned int *out_size
 			return 0;
 		}
 
-		CopyAppendEncode(decompress_tmp_buff, decompressed_len);
+		result = parse_value(decompress_tmp_buff, decompressed_len);
 		*out_size = padding + len;
-		return 0;
+		return result;
 	}
 
 	return -9;
@@ -1033,7 +1206,7 @@ FormatDecode(const char *tupleData, unsigned int tupleSize)
 	CopyFlush();
 }
 
-static int DumpCompressedString(const char *data, int32 compressed_size)
+static int DumpCompressedString(const char *data, int32 compressed_size, int (*parse_value)(const char *, int))
 {
 	int						decompress_ret;
 	char				   *decompress_tmp_buff = malloc(TOAST_COMPRESS_RAWSIZE(data));
@@ -1087,7 +1260,8 @@ static int DumpCompressedString(const char *data, int32 compressed_size)
 static int
 ReadStringFromToast(const char *buffer,
 		unsigned int buff_size,
-		unsigned int* out_size)
+		unsigned int* out_size,
+		int (*parse_value)(const char *, int))
 {
 	int		result = 0;
 
@@ -1160,9 +1334,9 @@ ReadStringFromToast(const char *buffer,
 			if (result == 0)
 			{
 				if (VARATT_EXTERNAL_IS_COMPRESSED(toast_ptr))
-					result = DumpCompressedString(toast_data, toast_ext_size);
+					result = DumpCompressedString(toast_data, toast_ext_size, parse_value);
 				else
-					CopyAppendEncode(toast_data, toast_ext_size);
+					result = parse_value(toast_data, toast_ext_size);
 			}
 			else
 			{
diff --git a/decode.h b/decode.h
index 4d151a4..24ba2e6 100644
--- a/decode.h
+++ b/decode.h
@@ -1,6 +1,14 @@
 #ifndef _PG_FILEDUMP_DECODE_H_
 #define _PG_FILEDUMP_DECODE_H_
 
+#define NBASE          10000
+#define HALF_NBASE     5000
+#define DEC_DIGITS     4                       /* decimal digits per NBASE digit */
+#define MUL_GUARD_DIGITS       2       /* these are measured in NBASE digits */
+#define DIV_GUARD_DIGITS       4
+
+typedef int16 NumericDigit;
+
 int
 ParseAttributeTypesString(const char *str);
 
@@ -15,4 +23,122 @@ ToastChunkDecode(const char* tuple_data,
 		char *chunk_data,
 		unsigned int *chunk_data_size);
 
+struct NumericShort
+{
+       uint16          n_header;               /* Sign + display scale + weight */
+       NumericDigit n_data[FLEXIBLE_ARRAY_MEMBER]; /* Digits */
+};
+
+struct NumericLong
+{
+       uint16          n_sign_dscale;  /* Sign + display scale */
+       int16           n_weight;               /* Weight of 1st digit  */
+       NumericDigit n_data[FLEXIBLE_ARRAY_MEMBER]; /* Digits */
+};
+
+union NumericChoice
+{
+       uint16          n_header;               /* Header word */
+       struct NumericLong n_long;      /* Long form (4-byte header) */
+       struct NumericShort n_short;    /* Short form (2-byte header) */
+};
+
+struct NumericData
+{
+       union NumericChoice choice; /* choice of format */
+};
+
+/*
+ * Interpretation of high bits.
+ */
+
+#define NUMERIC_SIGN_MASK      0xC000
+#define NUMERIC_POS                    0x0000
+#define NUMERIC_NEG                    0x4000
+#define NUMERIC_SHORT          0x8000
+#define NUMERIC_SPECIAL                0xC000
+
+#define NUMERIC_FLAGBITS(n) ((n)->choice.n_header & NUMERIC_SIGN_MASK)
+#define NUMERIC_IS_SHORT(n)            (NUMERIC_FLAGBITS(n) == NUMERIC_SHORT)
+#define NUMERIC_IS_SPECIAL(n)  (NUMERIC_FLAGBITS(n) == NUMERIC_SPECIAL)
+
+#define NUMERIC_HDRSZ  (VARHDRSZ + sizeof(uint16) + sizeof(int16))
+#define NUMERIC_HDRSZ_SHORT (VARHDRSZ + sizeof(uint16))
+
+/*
+ * If the flag bits are NUMERIC_SHORT or NUMERIC_SPECIAL, we want the short
+ * header; otherwise, we want the long one.  Instead of testing against each
+ * value, we can just look at the high bit, for a slight efficiency gain.
+ */
+#define NUMERIC_HEADER_IS_SHORT(n)     (((n)->choice.n_header & 0x8000) != 0)
+#define NUMERIC_HEADER_SIZE(n) \
+       (sizeof(uint16) + \
+        (NUMERIC_HEADER_IS_SHORT(n) ? 0 : sizeof(int16)))
+
+/*
+ * Definitions for special values (NaN, positive infinity, negative infinity).
+ *
+ * The two bits after the NUMERIC_SPECIAL bits are 00 for NaN, 01 for positive
+ * infinity, 11 for negative infinity.  (This makes the sign bit match where
+ * it is in a short-format value, though we make no use of that at present.)
+ * We could mask off the remaining bits before testing the active bits, but
+ * currently those bits must be zeroes, so masking would just add cycles.
+ */
+#define NUMERIC_EXT_SIGN_MASK  0xF000  /* high bits plus NaN/Inf flag bits */
+#define NUMERIC_NAN                            0xC000
+#define NUMERIC_PINF                   0xD000
+#define NUMERIC_NINF                   0xF000
+#define NUMERIC_INF_SIGN_MASK  0x2000
+
+#define NUMERIC_EXT_FLAGBITS(n)        ((n)->choice.n_header & NUMERIC_EXT_SIGN_MASK)
+#define NUMERIC_IS_NAN(n)              ((n)->choice.n_header == NUMERIC_NAN)
+#define NUMERIC_IS_PINF(n)             ((n)->choice.n_header == NUMERIC_PINF)
+#define NUMERIC_IS_NINF(n)             ((n)->choice.n_header == NUMERIC_NINF)
+#define NUMERIC_IS_INF(n) \
+       (((n)->choice.n_header & ~NUMERIC_INF_SIGN_MASK) == NUMERIC_PINF)
+
+/*
+ * Short format definitions.
+ */
+
+#define NUMERIC_SHORT_SIGN_MASK                        0x2000
+#define NUMERIC_SHORT_DSCALE_MASK              0x1F80
+#define NUMERIC_SHORT_DSCALE_SHIFT             7
+#define NUMERIC_SHORT_DSCALE_MAX               \
+       (NUMERIC_SHORT_DSCALE_MASK >> NUMERIC_SHORT_DSCALE_SHIFT)
+#define NUMERIC_SHORT_WEIGHT_SIGN_MASK 0x0040
+#define NUMERIC_SHORT_WEIGHT_MASK              0x003F
+#define NUMERIC_SHORT_WEIGHT_MAX               NUMERIC_SHORT_WEIGHT_MASK
+#define NUMERIC_SHORT_WEIGHT_MIN               (-(NUMERIC_SHORT_WEIGHT_MASK+1))
+
+/*
+ * Extract sign, display scale, weight.  These macros extract field values
+ * suitable for the NumericVar format from the Numeric (on-disk) format.
+ *
+ * Note that we don't trouble to ensure that dscale and weight read as zero
+ * for an infinity; however, that doesn't matter since we never convert
+ * "special" numerics to NumericVar form.  Only the constants defined below
+ * (const_nan, etc) ever represent a non-finite value as a NumericVar.
+ */
+
+#define NUMERIC_DSCALE_MASK                    0x3FFF
+#define NUMERIC_DSCALE_MAX                     NUMERIC_DSCALE_MASK
+
+#define NUMERIC_SIGN(n) \
+       (NUMERIC_IS_SHORT(n) ? \
+               (((n)->choice.n_short.n_header & NUMERIC_SHORT_SIGN_MASK) ? \
+                NUMERIC_NEG : NUMERIC_POS) : \
+               (NUMERIC_IS_SPECIAL(n) ? \
+                NUMERIC_EXT_FLAGBITS(n) : NUMERIC_FLAGBITS(n)))
+#define NUMERIC_DSCALE(n)      (NUMERIC_HEADER_IS_SHORT((n)) ? \
+       ((n)->choice.n_short.n_header & NUMERIC_SHORT_DSCALE_MASK) \
+               >> NUMERIC_SHORT_DSCALE_SHIFT \
+       : ((n)->choice.n_long.n_sign_dscale & NUMERIC_DSCALE_MASK))
+#define NUMERIC_WEIGHT(n)      (NUMERIC_HEADER_IS_SHORT((n)) ? \
+       (((n)->choice.n_short.n_header & NUMERIC_SHORT_WEIGHT_SIGN_MASK ? \
+               ~NUMERIC_SHORT_WEIGHT_MASK : 0) \
+        | ((n)->choice.n_short.n_header & NUMERIC_SHORT_WEIGHT_MASK)) \
+       : ((n)->choice.n_long.n_weight))
+
+
 #endif
diff --git a/pg_filedump.c b/pg_filedump.c
index ba43550..bdd9266 100644
--- a/pg_filedump.c
+++ b/pg_filedump.c
@@ -177,7 +177,7 @@ DisplayOptions(unsigned int validOptions)
 		 "  -D  Decode tuples using given comma separated list of types\n"
 		 "      Supported types:\n"
 		 "        bigint bigserial bool char charN date float float4 float8 int\n"
-		 "        json macaddr name oid real serial smallint smallserial text\n"
+		 "        json macaddr name numeric oid real serial smallint smallserial text\n"
 		 "        time timestamp timetz uuid varchar varcharN xid xml\n"
 		 "      ~ ignores all attributes left in a tuple\n"
 		 "  -f  Display formatted block content dump along with interpretation\n"