For jsonb_hash_ops, hash less
authorPeter Geoghegan <pg@heroku.com>
Wed, 19 Mar 2014 09:02:16 +0000 (02:02 -0700)
committerPeter Geoghegan <pg@heroku.com>
Wed, 19 Mar 2014 09:02:16 +0000 (02:02 -0700)
By limiting the GIN entries to the least-nested level, the delicious.com
sample JSON dataset index shrinks in size from 382MB to 255MB without
any apparent downside.

src/backend/utils/adt/jsonb_gin.c

index 034f3935e6bebad581c56685ae55f283ee06af17..c7cec4ffc9968155226dda6b4dfbd35f950f6703 100644 (file)
@@ -22,7 +22,7 @@
 
 typedef struct PathHashStack
 {
-   uint32  hash_state;
+   uint32  hash;
    struct PathHashStack *next;
 }  PathHashStack;
 
@@ -493,12 +493,12 @@ gin_extract_jsonb_hash(PG_FUNCTION_ARGS)
    it = JsonbIteratorInit(VARDATA(jb));
 
    tail.next = NULL;
-   tail.hash_state = 0;
+   tail.hash = 0;
+
    stack = &tail;
 
    while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
    {
-       uint32          temphash;
        PathHashStack  *tmp;
 
        if (i >= total)
@@ -526,15 +526,23 @@ gin_extract_jsonb_hash(PG_FUNCTION_ARGS)
                break;
            case WJB_KEY:
                /* Calc hash of key and separated into preserved stack item */
-               stack->hash_state = stack->next->hash_state;
-               hash_scalar_value(&v, &stack->hash_state);
+               stack->hash = stack->next->hash;
+               hash_scalar_value(&v, &stack->hash);
                break;
            case WJB_VALUE:
            case WJB_ELEM:
-               stack->hash_state = stack->next->hash_state;
-               hash_scalar_value(&v, &stack->hash_state);
-               temphash = stack->hash_state;
-               entries[i++] = temphash;
+               /*
+                * Since jsonb_hash_ops does not support existence operator,
+                * it's okay to lump together pair values and elements.  For a
+                * key/value pair, the hash value has both values mixed in
+                * together.  For deeply nested key/value pairs (and elements)
+                * the container hash value is mixed in too.
+                */
+               stack->hash = stack->next->hash;
+               hash_scalar_value(&v, &stack->hash);
+               /* Only store entries at the least nested level */
+               if (!it->parent)
+                   entries[i++] = stack->hash;
                break;
            case WJB_END_ARRAY:
            case WJB_END_OBJECT: