Skip to content

Commit

Permalink
Fix shameful bug with maximum item size in entry tree
Browse files Browse the repository at this point in the history
RUM posting trees received rightbound keys.  Accidentally RumMaxItemSize
was corrected according to this.  However RumMaxItemSize is related only
to entry tree which wasn't changed.  That cause long standing bug that long
lexemes can't fit entry tree.  Bug is fixed in this commit.  Also regression
test were added for longest lexemes with longest positions lists.
  • Loading branch information
Alexander Korotkov committed Dec 9, 2016
1 parent 5da261a commit 58fee28
Show file tree
Hide file tree
Showing 4 changed files with 208 additions and 4 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ DATA = rum--1.0.sql
PGFILEDESC = "RUM index access method"

REGRESS = rum rum_hash ruminv timestamp orderby orderby_hash altorder \
altorder_hash
altorder_hash limits

LDFLAGS_SL += $(filter -lm, $(LIBS))

Expand Down
162 changes: 162 additions & 0 deletions expected/limits.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
-- Check we can put and query lexemes of maximum size 2046 bytes
-- with maximum posting list size.
CREATE TABLE limits_test (v tsvector);
INSERT INTO limits_test (SELECT (SELECT (repeat(chr(65 + num % 26), 2046) || ':' || string_agg(i::text, ','))::tsvector FROM generate_series(1,1024) i) FROM generate_series(1,1000) num);
CREATE INDEX limits_test_idx ON limits_test USING rum (v);
SET enable_seqscan = off;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('A', 2046)::tsquery;
count
-------
38
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('B', 2046)::tsquery;
count
-------
39
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('C', 2046)::tsquery;
count
-------
39
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('D', 2046)::tsquery;
count
-------
39
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('E', 2046)::tsquery;
count
-------
39
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('F', 2046)::tsquery;
count
-------
39
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('G', 2046)::tsquery;
count
-------
39
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('H', 2046)::tsquery;
count
-------
39
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('I', 2046)::tsquery;
count
-------
39
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('J', 2046)::tsquery;
count
-------
39
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('K', 2046)::tsquery;
count
-------
39
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('L', 2046)::tsquery;
count
-------
39
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('M', 2046)::tsquery;
count
-------
39
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('N', 2046)::tsquery;
count
-------
38
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('O', 2046)::tsquery;
count
-------
38
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('P', 2046)::tsquery;
count
-------
38
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('Q', 2046)::tsquery;
count
-------
38
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('R', 2046)::tsquery;
count
-------
38
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('S', 2046)::tsquery;
count
-------
38
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('T', 2046)::tsquery;
count
-------
38
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('U', 2046)::tsquery;
count
-------
38
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('V', 2046)::tsquery;
count
-------
38
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('W', 2046)::tsquery;
count
-------
38
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('X', 2046)::tsquery;
count
-------
38
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('Y', 2046)::tsquery;
count
-------
38
(1 row)

SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('Z', 2046)::tsquery;
count
-------
38
(1 row)

33 changes: 33 additions & 0 deletions sql/limits.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
-- Check we can put and query lexemes of maximum size 2046 bytes
-- with maximum posting list size.
CREATE TABLE limits_test (v tsvector);
INSERT INTO limits_test (SELECT (SELECT (repeat(chr(65 + num % 26), 2046) || ':' || string_agg(i::text, ','))::tsvector FROM generate_series(1,1024) i) FROM generate_series(1,1000) num);
CREATE INDEX limits_test_idx ON limits_test USING rum (v);

SET enable_seqscan = off;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('A', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('B', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('C', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('D', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('E', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('F', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('G', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('H', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('I', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('J', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('K', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('L', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('M', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('N', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('O', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('P', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('Q', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('R', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('S', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('T', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('U', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('V', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('W', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('X', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('Y', 2046)::tsquery;
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('Z', 2046)::tsquery;
15 changes: 12 additions & 3 deletions src/rum.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,19 @@ typedef signed char RumNullCategory;
#define RumSetPostingOffset(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,n)
#define RumGetPosting(itup) ((Pointer) ((char*)(itup) + RumGetPostingOffset(itup)))

/*
* Maximum size of an item on entry tree page. Make sure that we fit at least
* three items on each page. (On regular B-tree indexes, we must fit at least
* three items: two data items and the "high key". In RUM entry tree, we don't
* currently store the high key explicitly, we just use the rightmost item on
* the page, so it would actually be enough to fit two items.)
*/
#define RumMaxItemSize \
MAXALIGN_DOWN(((BLCKSZ - SizeOfPageHeaderData - \
MAXALIGN(sizeof(RumPageOpaqueData))) / 6 - \
sizeof(RumKey) /* right bound */))
Min(INDEX_SIZE_MASK, \
MAXALIGN_DOWN(((BLCKSZ - \
MAXALIGN(SizeOfPageHeaderData + 3 * sizeof(ItemIdData)) - \
MAXALIGN(sizeof(RumPageOpaqueData))) / 3)))


/*
* Access macros for non-leaf entry tuples
Expand Down

0 comments on commit 58fee28

Please sign in to comment.