commit
eeb0e488d8
|
@ -462,6 +462,7 @@ static BitmapOr* _copyBitmapOr(const BitmapOr* from)
|
|||
/*
|
||||
* copy remainder of node
|
||||
*/
|
||||
COPY_SCALAR_FIELD(isshared);
|
||||
COPY_NODE_FIELD(bitmapplans);
|
||||
|
||||
return newnode;
|
||||
|
@ -703,6 +704,7 @@ static BitmapIndexScan* _copyBitmapIndexScan(const BitmapIndexScan* from)
|
|||
* copy remainder of node
|
||||
*/
|
||||
COPY_SCALAR_FIELD(indexid);
|
||||
COPY_SCALAR_FIELD(isshared);
|
||||
COPY_NODE_FIELD(indexqual);
|
||||
COPY_NODE_FIELD(indexqualorig);
|
||||
|
||||
|
|
|
@ -873,7 +873,7 @@ static void _outBitmapOr(StringInfo str, BitmapOr* node)
|
|||
WRITE_NODE_TYPE("BITMAPOR");
|
||||
|
||||
_outPlanInfo(str, (Plan*)node);
|
||||
|
||||
WRITE_BOOL_FIELD(isshared);
|
||||
WRITE_NODE_FIELD(bitmapplans);
|
||||
}
|
||||
static void _outCStoreIndexOr(StringInfo str, CStoreIndexOr* node)
|
||||
|
@ -1176,6 +1176,7 @@ static void _outBitmapIndexScan(StringInfo str, BitmapIndexScan* node)
|
|||
_outScanInfo(str, (Scan*)node);
|
||||
|
||||
WRITE_OID_FIELD(indexid);
|
||||
WRITE_BOOL_FIELD(isshared);
|
||||
WRITE_NODE_FIELD(indexqual);
|
||||
WRITE_NODE_FIELD(indexqualorig);
|
||||
#ifdef STREAMPLAN
|
||||
|
|
|
@ -2838,6 +2838,7 @@ static BitmapOr* _readBitmapOr(BitmapOr* local_node)
|
|||
READ_TEMP_LOCALS();
|
||||
|
||||
_readPlan(&local_node->plan);
|
||||
READ_BOOL_FIELD(isshared);
|
||||
READ_NODE_FIELD(bitmapplans);
|
||||
|
||||
READ_DONE();
|
||||
|
@ -3059,6 +3060,7 @@ static BitmapIndexScan* _readBitmapIndexScan(BitmapIndexScan* local_node)
|
|||
_readScan(&local_node->scan);
|
||||
|
||||
READ_OID_FIELD(indexid);
|
||||
READ_BOOL_FIELD(isshared);
|
||||
READ_NODE_FIELD(indexqual);
|
||||
READ_NODE_FIELD(indexqualorig);
|
||||
#ifdef STREAMPLAN
|
||||
|
|
|
@ -45,6 +45,7 @@
|
|||
#include "nodes/bitmapset.h"
|
||||
#include "nodes/tidbitmap.h"
|
||||
#include "utils/hsearch.h"
|
||||
#include "storage/lwlock.h"
|
||||
|
||||
/*
|
||||
* The maximum number of tuples per page is not large (typically 256 with
|
||||
|
@ -117,6 +118,7 @@ typedef struct PagetableEntry {
|
|||
bool recheck; /* should the tuples be rechecked? */
|
||||
bitmapword words[Max(WORDS_PER_PAGE, WORDS_PER_CHUNK)];
|
||||
} PagetableEntry;
|
||||
|
||||
/*
|
||||
* dynahash.c is optimized for relatively large, long-lived hash tables.
|
||||
* This is not ideal for TIDBitMap, particularly when we are using a bitmap
|
||||
|
@ -134,6 +136,15 @@ typedef enum {
|
|||
TBM_HASH /* pagetable is valid, entry1 is not */
|
||||
} TBMStatus;
|
||||
|
||||
/*
|
||||
* Current iterating state of the TBM.
|
||||
*/
|
||||
typedef enum {
|
||||
TBM_NOT_ITERATING, /* not yet converted to page and chunk array */
|
||||
TBM_ITERATING_PRIVATE, /* converted to local page and chunk array */
|
||||
TBM_ITERATING_SHARED /* converted to shared page and chunk array */
|
||||
} TBMIteratingState;
|
||||
|
||||
/*
|
||||
* Here is the representation for a whole TIDBitMap:
|
||||
*/
|
||||
|
@ -146,8 +157,9 @@ struct TIDBitmap {
|
|||
int maxentries; /* limit on same to meet maxbytes */
|
||||
int npages; /* number of exact entries in pagetable */
|
||||
int nchunks; /* number of lossy entries in pagetable */
|
||||
bool iterating; /* tbm_begin_iterate called? */
|
||||
TBMIteratingState iterating; /* tbm_begin_iterate called? */
|
||||
bool isGlobalPart; /* represent global partition index tbm */
|
||||
bool isShared; /* is shared pagetable? */
|
||||
PagetableEntry entry1; /* used when status == TBM_ONE_PAGE */
|
||||
/* these are valid when iterating is true: */
|
||||
PagetableEntry** spages; /* sorted exact-page list, or NULL */
|
||||
|
@ -168,8 +180,40 @@ struct TBMIterator {
|
|||
TBMIterateResult output; /* MUST BE LAST (because variable-size) */
|
||||
};
|
||||
|
||||
/*
|
||||
* Holds the shared members of the iterator so that multiple processes
|
||||
* can jointly iterate.
|
||||
*/
|
||||
struct TBMSharedIteratorState {
|
||||
int nentries; /* number of entries in pagetable */
|
||||
int maxentries; /* limit on same to meet maxbytes */
|
||||
int npages; /* number of exact entries in pagetable */
|
||||
int nchunks; /* number of lossy entries in pagetable */
|
||||
TBMStatus status; /* see codes above */
|
||||
PagetableEntry entry1; /* used when status == TBM_ONE_PAGE */
|
||||
HTAB* pagetable; /* hash table of PagetableEntry's */
|
||||
PagetableEntry** spages; /* sorted exact-page list, or NULL */
|
||||
PagetableEntry** schunks; /* sorted lossy-chunk list, or NULL */
|
||||
pg_atomic_uint32 pagetableRefcount; /* ref count for pagetable */
|
||||
pg_atomic_uint32 pagesRefcount; /* ref count for spages */
|
||||
pg_atomic_uint32 chunksRefcount; /* ref count for schunks */
|
||||
LWLock lock; /* lock to protect below members */
|
||||
int spageptr; /* next spages index */
|
||||
int schunkptr; /* next schunks index */
|
||||
int schunkbit; /* next bit to check in current schunk */
|
||||
};
|
||||
|
||||
/*
|
||||
* same as TBMIterator, but it is used for joint iteration, therefore this
|
||||
* also holds a reference to the shared state.
|
||||
*/
|
||||
struct TBMSharedIterator {
|
||||
TBMSharedIteratorState *state; /* shared state */
|
||||
TBMIterateResult output; /* MUST BE LAST (because variable-size) */
|
||||
};
|
||||
|
||||
/* Local function prototypes */
|
||||
static void tbm_union_page(TIDBitmap* a, const PagetableEntry* bpage);
|
||||
static void tbm_union_page(TIDBitmap *a, const PagetableEntry *bpage);
|
||||
static bool tbm_intersect_page(TIDBitmap* a, PagetableEntry* apage, const TIDBitmap* b);
|
||||
static const PagetableEntry* tbm_find_pageentry(const TIDBitmap* tbm, PagetableEntryNode pageNode);
|
||||
static PagetableEntry* tbm_get_pageentry(TIDBitmap* tbm, PagetableEntryNode pageNode);
|
||||
|
@ -177,15 +221,18 @@ static bool tbm_page_is_lossy(const TIDBitmap* tbm, PagetableEntryNode pageNode)
|
|||
static void tbm_mark_page_lossy(TIDBitmap* tbm, PagetableEntryNode pageNode);
|
||||
static void tbm_lossify(TIDBitmap* tbm);
|
||||
static int tbm_comparator(const void* left, const void* right);
|
||||
static void tbm_sort_pages(TIDBitmap* tbm);
|
||||
|
||||
/*
|
||||
* tbm_create - create an initially-empty bitmap
|
||||
*
|
||||
* The bitmap will live in the memory context that is CurrentMemoryContext
|
||||
* at the time of this call. It will be limited to (approximately) maxbytes
|
||||
* total memory consumption.
|
||||
* total memory consumption. If the DSA passed to this function is not NULL
|
||||
* then the memory for storing elements of the underlying page table will
|
||||
* be allocated from the DSA.
|
||||
*/
|
||||
TIDBitmap* tbm_create(long maxbytes)
|
||||
TIDBitmap* tbm_create(long maxbytes, MemoryContext dsa)
|
||||
{
|
||||
TIDBitmap* tbm = NULL;
|
||||
long nbuckets;
|
||||
|
@ -193,7 +240,13 @@ TIDBitmap* tbm_create(long maxbytes)
|
|||
/* Create the TIDBitmap struct and zero all its fields */
|
||||
tbm = makeNode(TIDBitmap);
|
||||
|
||||
tbm->mcxt = CurrentMemoryContext;
|
||||
if (dsa == NULL) {
|
||||
tbm->mcxt = CurrentMemoryContext;
|
||||
tbm->isShared = false;
|
||||
} else {
|
||||
tbm->mcxt = dsa;
|
||||
tbm->isShared = true;
|
||||
}
|
||||
tbm->status = TBM_EMPTY;
|
||||
tbm->isGlobalPart = false;
|
||||
/*
|
||||
|
@ -255,18 +308,39 @@ static void tbm_create_pagetable(TIDBitmap* tbm)
|
|||
*/
|
||||
void tbm_free(TIDBitmap* tbm)
|
||||
{
|
||||
if (tbm->pagetable != NULL) {
|
||||
/*
|
||||
* Don't call hash_destroy when it's shared, cause the memcxt for hash table is already deleted
|
||||
* when calling dsm_detach. Same for spages and schunks, cause they are alloc by the same memcxt.
|
||||
*/
|
||||
if (!tbm->isShared) {
|
||||
hash_destroy(tbm->pagetable);
|
||||
}
|
||||
if (tbm->spages != NULL) {
|
||||
pfree_ext(tbm->spages);
|
||||
}
|
||||
if (tbm->schunks != NULL) {
|
||||
pfree_ext(tbm->schunks);
|
||||
}
|
||||
pfree_ext(tbm);
|
||||
}
|
||||
|
||||
/*
|
||||
* tbm_free_shared_area - free shared state
|
||||
*
|
||||
* Free shared iterator state, Also free shared pagetable and iterator arrays
|
||||
* memory if they are not referred by any of the shared iterator i.e recount
|
||||
* is becomes 0.
|
||||
*/
|
||||
void tbm_free_shared_area(TBMSharedIteratorState *istate)
|
||||
{
|
||||
if (pg_atomic_sub_fetch_u32(&istate->pagetableRefcount, 1) == 0) {
|
||||
hash_destroy(istate->pagetable);
|
||||
}
|
||||
if (pg_atomic_sub_fetch_u32(&istate->pagesRefcount, 1) == 0) {
|
||||
pfree_ext(istate->spages);
|
||||
}
|
||||
if (pg_atomic_sub_fetch_u32(&istate->chunksRefcount, 1) == 0) {
|
||||
pfree_ext(istate->schunks);
|
||||
}
|
||||
pfree_ext(istate);
|
||||
}
|
||||
|
||||
/*
|
||||
* tbm_add_tuples - add some tuple IDs to a TIDBitmap
|
||||
*
|
||||
|
@ -277,7 +351,7 @@ void tbm_add_tuples(TIDBitmap* tbm, const ItemPointer tids, int ntids, bool rech
|
|||
{
|
||||
int i;
|
||||
|
||||
Assert(!tbm->iterating);
|
||||
Assert(tbm->iterating == TBM_NOT_ITERATING);
|
||||
for (i = 0; i < ntids; i++) {
|
||||
BlockNumber blk = ItemPointerGetBlockNumber(tids + i);
|
||||
OffsetNumber off = ItemPointerGetOffsetNumber(tids + i);
|
||||
|
@ -340,7 +414,7 @@ void tbm_add_page(TIDBitmap* tbm, BlockNumber pageno, Oid partitionOid)
|
|||
*/
|
||||
void tbm_union(TIDBitmap* a, const TIDBitmap* b)
|
||||
{
|
||||
Assert(!a->iterating);
|
||||
Assert(a->iterating == TBM_NOT_ITERATING);
|
||||
/* Nothing to do if b is empty */
|
||||
if (b->nentries == 0) {
|
||||
return;
|
||||
|
@ -414,7 +488,7 @@ static void tbm_union_page(TIDBitmap* a, const PagetableEntry* bpage)
|
|||
*/
|
||||
void tbm_intersect(TIDBitmap* a, const TIDBitmap* b)
|
||||
{
|
||||
Assert(!a->iterating);
|
||||
Assert(a->iterating == TBM_NOT_ITERATING);
|
||||
/* Nothing to do if a is empty */
|
||||
if (a->nentries == 0) {
|
||||
return;
|
||||
|
@ -533,6 +607,39 @@ bool tbm_is_empty(const TIDBitmap* tbm)
|
|||
return (tbm->nentries == 0);
|
||||
}
|
||||
|
||||
static void tbm_sort_pages(TIDBitmap* tbm)
|
||||
{
|
||||
HASH_SEQ_STATUS status;
|
||||
PagetableEntry* page = NULL;
|
||||
int npages;
|
||||
int nchunks;
|
||||
|
||||
if (tbm->spages == NULL && tbm->npages > 0) {
|
||||
tbm->spages = (PagetableEntry**)MemoryContextAlloc(tbm->mcxt, tbm->npages * sizeof(PagetableEntry*));
|
||||
}
|
||||
if ((tbm->schunks == NULL) && tbm->nchunks > 0) {
|
||||
tbm->schunks = (PagetableEntry**)MemoryContextAlloc(tbm->mcxt, tbm->nchunks * sizeof(PagetableEntry*));
|
||||
}
|
||||
|
||||
hash_seq_init(&status, tbm->pagetable);
|
||||
npages = nchunks = 0;
|
||||
while ((page = (PagetableEntry*)hash_seq_search(&status)) != NULL) {
|
||||
if (page->ischunk) {
|
||||
tbm->schunks[nchunks++] = page;
|
||||
} else {
|
||||
tbm->spages[npages++] = page;
|
||||
}
|
||||
}
|
||||
Assert(npages == tbm->npages);
|
||||
Assert(nchunks == tbm->nchunks);
|
||||
if (npages > 1) {
|
||||
qsort(tbm->spages, npages, sizeof(PagetableEntry*), tbm_comparator);
|
||||
}
|
||||
if (nchunks > 1) {
|
||||
qsort(tbm->schunks, nchunks, sizeof(PagetableEntry*), tbm_comparator);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* tbm_begin_iterate - prepare to iterate through a TIDBitmap
|
||||
*
|
||||
|
@ -548,6 +655,7 @@ bool tbm_is_empty(const TIDBitmap* tbm)
|
|||
*/
|
||||
TBMIterator* tbm_begin_iterate(TIDBitmap* tbm)
|
||||
{
|
||||
Assert(tbm->iterating != TBM_ITERATING_SHARED);
|
||||
TBMIterator* iterator = NULL;
|
||||
|
||||
/*
|
||||
|
@ -570,43 +678,134 @@ TBMIterator* tbm_begin_iterate(TIDBitmap* tbm)
|
|||
* attached to the bitmap not the iterator, so they can be used by more
|
||||
* than one iterator.
|
||||
*/
|
||||
if (tbm->status == TBM_HASH && !tbm->iterating) {
|
||||
HASH_SEQ_STATUS status;
|
||||
PagetableEntry* page = NULL;
|
||||
int npages;
|
||||
int nchunks;
|
||||
|
||||
if (tbm->spages == NULL && tbm->npages > 0) {
|
||||
tbm->spages = (PagetableEntry**)MemoryContextAlloc(tbm->mcxt, tbm->npages * sizeof(PagetableEntry*));
|
||||
}
|
||||
if ((tbm->schunks == NULL) && tbm->nchunks > 0) {
|
||||
tbm->schunks = (PagetableEntry**)MemoryContextAlloc(tbm->mcxt, tbm->nchunks * sizeof(PagetableEntry*));
|
||||
}
|
||||
|
||||
hash_seq_init(&status, tbm->pagetable);
|
||||
npages = nchunks = 0;
|
||||
while ((page = (PagetableEntry*)hash_seq_search(&status)) != NULL) {
|
||||
if (page->ischunk) {
|
||||
tbm->schunks[nchunks++] = page;
|
||||
} else {
|
||||
tbm->spages[npages++] = page;
|
||||
}
|
||||
}
|
||||
Assert(npages == tbm->npages);
|
||||
Assert(nchunks == tbm->nchunks);
|
||||
if (npages > 1) {
|
||||
qsort(tbm->spages, npages, sizeof(PagetableEntry*), tbm_comparator);
|
||||
}
|
||||
if (nchunks > 1) {
|
||||
qsort(tbm->schunks, nchunks, sizeof(PagetableEntry*), tbm_comparator);
|
||||
}
|
||||
if (tbm->status == TBM_HASH && tbm->iterating == TBM_NOT_ITERATING) {
|
||||
tbm_sort_pages(tbm);
|
||||
}
|
||||
|
||||
tbm->iterating = true;
|
||||
tbm->iterating = TBM_ITERATING_PRIVATE;
|
||||
|
||||
return iterator;
|
||||
}
|
||||
|
||||
/*
|
||||
* tbm_prepare_shared_iterate - prepare shared iteration state for a TIDBitmap.
|
||||
*
|
||||
* The necessary shared state will be allocated from the DSA passed to
|
||||
* tbm_create, so that multiple processes can attach to it and iterate jointly.
|
||||
*
|
||||
* This will convert the pagetable hash into page and chunk array of the index
|
||||
* into pagetable array.
|
||||
*/
|
||||
TBMSharedIteratorState* tbm_prepare_shared_iterate(TIDBitmap *tbm)
|
||||
{
|
||||
Assert(tbm->iterating != TBM_ITERATING_PRIVATE);
|
||||
|
||||
/*
|
||||
* Allocate TBMSharedIteratorState from DSA to hold the shared members and
|
||||
* lock, this will also be used by multiple worker for shared iterate.
|
||||
*/
|
||||
TBMSharedIteratorState *istate = (TBMSharedIteratorState*)MemoryContextAllocZero(tbm->mcxt,
|
||||
sizeof(TBMSharedIteratorState));
|
||||
|
||||
/*
|
||||
* If we have a hashtable, create and fill the sorted page lists, unless
|
||||
* we already did that for a previous iterator. Note that the lists are
|
||||
* attached to the bitmap not the iterator, so they can be used by more
|
||||
* than one iterator.
|
||||
*/
|
||||
if (tbm->iterating == TBM_NOT_ITERATING) {
|
||||
if (tbm->status == TBM_HASH) {
|
||||
tbm_sort_pages(tbm);
|
||||
}
|
||||
pg_atomic_init_u32(&istate->pagetableRefcount, 0);
|
||||
pg_atomic_init_u32(&istate->pagesRefcount, 0);
|
||||
pg_atomic_init_u32(&istate->chunksRefcount, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Store the TBM members in the shared state so that we can share them
|
||||
* across multiple processes.
|
||||
*/
|
||||
istate->nentries = tbm->nentries;
|
||||
istate->maxentries = tbm->maxentries;
|
||||
istate->npages = tbm->npages;
|
||||
istate->nchunks = tbm->nchunks;
|
||||
istate->pagetable = tbm->pagetable;
|
||||
istate->spages = tbm->spages;
|
||||
istate->schunks = tbm->schunks;
|
||||
istate->status = tbm->status;
|
||||
int rc = memcpy_s(&istate->entry1, sizeof(PagetableEntry), &tbm->entry1, sizeof(PagetableEntry));
|
||||
securec_check(rc, "", "");
|
||||
/*
|
||||
* For every shared iterator, referring to pagetable and iterator array,
|
||||
* increase the refcount by 1 so that while freeing the shared iterator we
|
||||
* don't free pagetable and iterator array until its refcount becomes 0.
|
||||
*/
|
||||
(void)pg_atomic_add_fetch_u32(&istate->pagetableRefcount, 1);
|
||||
(void)pg_atomic_add_fetch_u32(&istate->pagesRefcount, 1);
|
||||
(void)pg_atomic_add_fetch_u32(&istate->chunksRefcount, 1);
|
||||
/* Initialize the iterator lock */
|
||||
LWLockInitialize(&istate->lock, LWTRANCHE_TBM);
|
||||
|
||||
/* Initialize the shared iterator state */
|
||||
istate->schunkbit = 0;
|
||||
istate->schunkptr = 0;
|
||||
istate->spageptr = 0;
|
||||
|
||||
tbm->iterating = TBM_ITERATING_SHARED;
|
||||
|
||||
return istate;
|
||||
}
|
||||
|
||||
/*
|
||||
* tbm_extract_page_tuple - extract the tuple offsets from a page
|
||||
*
|
||||
* The extracted offsets are stored into TBMIterateResult.
|
||||
*/
|
||||
static inline int tbm_extract_page_tuple(const PagetableEntry *page, TBMIterateResult *output)
|
||||
{
|
||||
int ntuples = 0;
|
||||
|
||||
for (int wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++) {
|
||||
bitmapword w = page->words[wordnum];
|
||||
|
||||
if (w != 0) {
|
||||
int off = wordnum * BITS_PER_BITMAPWORD + 1;
|
||||
|
||||
while (w != 0) {
|
||||
if (w & 1) {
|
||||
output->offsets[ntuples++] = (OffsetNumber)off;
|
||||
}
|
||||
off++;
|
||||
w >>= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ntuples;
|
||||
}
|
||||
|
||||
/*
|
||||
* tbm_advance_schunkbit - Advance the schunkbit
|
||||
*/
|
||||
static inline void tbm_advance_schunkbit(const PagetableEntry *chunk, int *schunkbitp)
|
||||
{
|
||||
int schunkbit = *schunkbitp;
|
||||
|
||||
while (schunkbit < PAGES_PER_CHUNK) {
|
||||
int wordnum = WORDNUM(schunkbit);
|
||||
int bitnum = BITNUM(schunkbit);
|
||||
|
||||
if ((chunk->words[wordnum] & ((bitmapword)1 << (unsigned int)bitnum)) != 0) {
|
||||
break;
|
||||
}
|
||||
schunkbit++;
|
||||
}
|
||||
|
||||
*schunkbitp = schunkbit;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* tbm_iterate - scan through next page of a TIDBitmap
|
||||
*
|
||||
|
@ -624,7 +823,7 @@ TBMIterateResult* tbm_iterate(TBMIterator* iterator)
|
|||
TIDBitmap* tbm = iterator->tbm;
|
||||
TBMIterateResult* output = &(iterator->output);
|
||||
|
||||
Assert(tbm->iterating);
|
||||
Assert(tbm->iterating == TBM_ITERATING_PRIVATE);
|
||||
|
||||
/*
|
||||
* If lossy chunk pages remain, make sure we've advanced schunkptr/
|
||||
|
@ -634,15 +833,7 @@ TBMIterateResult* tbm_iterate(TBMIterator* iterator)
|
|||
PagetableEntry* chunk = tbm->schunks[iterator->schunkptr];
|
||||
int schunkbit = iterator->schunkbit;
|
||||
|
||||
while (schunkbit < PAGES_PER_CHUNK) {
|
||||
int wordnum = WORDNUM(schunkbit);
|
||||
int bitnum = BITNUM(schunkbit);
|
||||
|
||||
if ((chunk->words[wordnum] & ((bitmapword)1 << (unsigned int)bitnum)) != 0) {
|
||||
break;
|
||||
}
|
||||
schunkbit++;
|
||||
}
|
||||
tbm_advance_schunkbit(chunk, &schunkbit);
|
||||
if (schunkbit < PAGES_PER_CHUNK) {
|
||||
iterator->schunkbit = schunkbit;
|
||||
break;
|
||||
|
@ -676,7 +867,6 @@ TBMIterateResult* tbm_iterate(TBMIterator* iterator)
|
|||
if (iterator->spageptr < tbm->npages) {
|
||||
PagetableEntry* page = NULL;
|
||||
int ntuples;
|
||||
int wordnum;
|
||||
|
||||
/* In ONE_PAGE state, we don't allocate an spages[] array */
|
||||
if (tbm->status == TBM_ONE_PAGE) {
|
||||
|
@ -686,22 +876,7 @@ TBMIterateResult* tbm_iterate(TBMIterator* iterator)
|
|||
}
|
||||
|
||||
/* scan bitmap to extract individual offset numbers */
|
||||
ntuples = 0;
|
||||
for (wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++) {
|
||||
bitmapword w = page->words[wordnum];
|
||||
|
||||
if (w != 0) {
|
||||
int off = wordnum * BITS_PER_BITMAPWORD + 1;
|
||||
|
||||
while (w != 0) {
|
||||
if (w & 1) {
|
||||
output->offsets[ntuples++] = (OffsetNumber)off;
|
||||
}
|
||||
off++;
|
||||
w >>= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
ntuples = tbm_extract_page_tuple(page, output);
|
||||
output->blockno = page->entryNode.blockNo;
|
||||
output->partitionOid = page->entryNode.partitionOid;
|
||||
output->ntuples = ntuples;
|
||||
|
@ -714,6 +889,91 @@ TBMIterateResult* tbm_iterate(TBMIterator* iterator)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* tbm_shared_iterate - scan through next page of a TIDBitmap
|
||||
*
|
||||
* As above, but this will iterate using an iterator which is shared
|
||||
* across multiple processes. We need to acquire the iterator LWLock,
|
||||
* before accessing the shared members.
|
||||
*/
|
||||
TBMIterateResult *tbm_shared_iterate(TBMSharedIterator *iterator)
|
||||
{
|
||||
TBMIterateResult *output = &iterator->output;
|
||||
TBMSharedIteratorState *istate = iterator->state;
|
||||
|
||||
/* Acquire the LWLock before accessing the shared members */
|
||||
(void)LWLockAcquire(&istate->lock, LW_EXCLUSIVE);
|
||||
|
||||
/*
|
||||
* If lossy chunk pages remain, make sure we've advanced schunkptr/
|
||||
* schunkbit to the next set bit.
|
||||
*/
|
||||
while (istate->schunkptr < istate->nchunks) {
|
||||
PagetableEntry *chunk = istate->schunks[istate->schunkptr];
|
||||
int schunkbit = istate->schunkbit;
|
||||
|
||||
tbm_advance_schunkbit(chunk, &schunkbit);
|
||||
if (schunkbit < PAGES_PER_CHUNK) {
|
||||
istate->schunkbit = schunkbit;
|
||||
break;
|
||||
}
|
||||
/* advance to next chunk */
|
||||
istate->schunkptr++;
|
||||
istate->schunkbit = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If both chunk and per-page data remain, must output the numerically
|
||||
* earlier page.
|
||||
*/
|
||||
if (istate->schunkptr < istate->nchunks) {
|
||||
PagetableEntry *chunk = istate->schunks[istate->schunkptr];
|
||||
PagetableEntryNode pnode;
|
||||
pnode.blockNo = chunk->entryNode.blockNo + istate->schunkbit;
|
||||
pnode.partitionOid = chunk->entryNode.partitionOid;
|
||||
|
||||
if (istate->spageptr >= istate->npages ||
|
||||
IS_CHUNK_BEFORE_PAGE(pnode, istate->spages[istate->spageptr]->entryNode)) {
|
||||
/* Return a lossy page indicator from the chunk */
|
||||
output->blockno = pnode.blockNo;
|
||||
output->partitionOid = pnode.partitionOid;
|
||||
output->ntuples = -1;
|
||||
output->recheck = true;
|
||||
istate->schunkbit++;
|
||||
|
||||
LWLockRelease(&istate->lock);
|
||||
return output;
|
||||
}
|
||||
}
|
||||
|
||||
if (istate->spageptr < istate->npages) {
|
||||
PagetableEntry *page = NULL;
|
||||
|
||||
/* In ONE_PAGE state, we don't allocate an spages[] array */
|
||||
if (istate->status == TBM_ONE_PAGE) {
|
||||
page = &istate->entry1;
|
||||
} else {
|
||||
page = istate->spages[istate->spageptr];
|
||||
}
|
||||
|
||||
/* scan bitmap to extract individual offset numbers */
|
||||
int ntuples = tbm_extract_page_tuple(page, output);
|
||||
output->blockno = page->entryNode.blockNo;
|
||||
output->partitionOid = page->entryNode.partitionOid;
|
||||
output->ntuples = ntuples;
|
||||
output->recheck = page->recheck;
|
||||
istate->spageptr++;
|
||||
|
||||
LWLockRelease(&istate->lock);
|
||||
return output;
|
||||
}
|
||||
|
||||
LWLockRelease(&istate->lock);
|
||||
|
||||
/* Nothing more in the bitmap */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* tbm_end_iterate - finish an iteration over a TIDBitmap
|
||||
*
|
||||
|
@ -726,6 +986,17 @@ void tbm_end_iterate(TBMIterator* iterator)
|
|||
pfree_ext(iterator);
|
||||
}
|
||||
|
||||
/*
|
||||
* tbm_end_shared_iterate - finish a shared iteration over a TIDBitmap
|
||||
*
|
||||
* This doesn't free any of the shared state associated with the iterator,
|
||||
* just our backend-private state.
|
||||
*/
|
||||
void tbm_end_shared_iterate(TBMSharedIterator *iterator)
|
||||
{
|
||||
pfree_ext(iterator);
|
||||
}
|
||||
|
||||
/*
|
||||
* tbm_find_pageentry - find a PagetableEntry for the pageno
|
||||
*
|
||||
|
@ -919,7 +1190,7 @@ static void tbm_lossify(TIDBitmap* tbm)
|
|||
* push nentries down to significantly less than maxentries, or else we'll
|
||||
* just end up doing this again very soon. We shoot for maxentries/2.
|
||||
*/
|
||||
Assert(!tbm->iterating);
|
||||
Assert(tbm->iterating == TBM_NOT_ITERATING);
|
||||
Assert(tbm->status == TBM_HASH);
|
||||
|
||||
hash_seq_init(&status, tbm->pagetable);
|
||||
|
@ -986,7 +1257,29 @@ static int tbm_comparator(const void* left, const void* right)
|
|||
return 0;
|
||||
}
|
||||
|
||||
bool tbm_is_global(const TIDBitmap* tbm)
|
||||
/*
|
||||
* tbm_attach_shared_iterate
|
||||
*
|
||||
* Allocate a backend-private iterator and attach the shared iterator state
|
||||
* to it so that multiple processed can iterate jointly.
|
||||
*
|
||||
* We also converts the DSA pointers to local pointers and store them into
|
||||
* our private iterator.
|
||||
*/
|
||||
TBMSharedIterator *tbm_attach_shared_iterate(TBMSharedIteratorState *istate)
|
||||
{
|
||||
/*
|
||||
* Create the TBMSharedIterator struct, with enough trailing space to
|
||||
* serve the needs of the TBMIterateResult sub-struct.
|
||||
*/
|
||||
TBMSharedIterator *iterator = (TBMSharedIterator *)palloc0(sizeof(TBMSharedIterator) +
|
||||
MAX_TUPLES_PER_PAGE * sizeof(OffsetNumber));
|
||||
|
||||
iterator->state = istate;
|
||||
return iterator;
|
||||
}
|
||||
|
||||
bool tbm_is_global(const TIDBitmap *tbm)
|
||||
{
|
||||
return tbm->isGlobalPart;
|
||||
}
|
||||
|
|
|
@ -1072,10 +1072,10 @@ static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, RangeT
|
|||
break;
|
||||
|
||||
case RTE_VALUES:
|
||||
/*
|
||||
* The data for a VALUES clause is stored in the plan tree itself,
|
||||
* so scanning it in a worker is fine.
|
||||
*/
|
||||
/* Check for parallel-restricted functions. */
|
||||
if (has_parallel_hazard((Node *)rte->values_lists, false)) {
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case RTE_CTE:
|
||||
|
@ -1102,6 +1102,14 @@ static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, RangeT
|
|||
if (has_parallel_hazard((Node *)rel->baserestrictinfo, false))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Likewise, if the relation's outputs are not parallel-safe, give up.
|
||||
* (Usually, they're just Vars, but sometimes they're not.)
|
||||
*/
|
||||
if (has_parallel_hazard((Node *)rel->reltargetlist, false)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* We have a winner. */
|
||||
rel->consider_parallel = true;
|
||||
}
|
||||
|
@ -2918,6 +2926,26 @@ static void recurse_push_qual(Node* setOp, Query* topquery, RangeTblEntry* rte,
|
|||
(int)nodeTag(setOp))));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* create_partial_bitmap_paths
|
||||
* Build partial bitmap heap path for the relation
|
||||
*/
|
||||
void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel, Path *bitmapqual)
|
||||
{
|
||||
/* Compute heap pages for bitmap heap scan */
|
||||
double pages_fetched = compute_bitmap_pages(root, rel, bitmapqual, 1.0, NULL, NULL, rel->isPartitionedTable);
|
||||
int parallel_workers = compute_parallel_worker(rel, pages_fetched, -1,
|
||||
u_sess->attr.attr_sql.max_parallel_workers_per_gather);
|
||||
|
||||
if (parallel_workers <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
add_partial_path(rel,
|
||||
(Path *)create_bitmap_heap_path(root, rel, bitmapqual, NULL, 1.0, parallel_workers));
|
||||
}
|
||||
|
||||
/*
|
||||
* partIterator tries to inherit pathkeys from scan path
|
||||
*
|
||||
|
|
|
@ -1444,10 +1444,10 @@ void cost_bitmap_heap_scan(
|
|||
Cost startup_cost = 0;
|
||||
Cost run_cost = 0;
|
||||
Cost indexTotalCost;
|
||||
Selectivity indexSelectivity;
|
||||
QualCost qpqual_cost;
|
||||
Cost cpu_per_tuple = 0.0;
|
||||
Cost cost_per_page;
|
||||
Cost cpu_run_cost;
|
||||
double tuples_fetched;
|
||||
double pages_fetched;
|
||||
double spc_seq_page_cost, spc_random_page_cost;
|
||||
|
@ -1493,52 +1493,15 @@ void cost_bitmap_heap_scan(
|
|||
startup_cost += g_instance.cost_cxt.disable_cost;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fetch total cost of obtaining the bitmap, as well as its total
|
||||
* selectivity.
|
||||
*/
|
||||
cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
|
||||
pages_fetched = compute_bitmap_pages(root, baserel, bitmapqual, loop_count,
|
||||
&indexTotalCost, &tuples_fetched, ispartitionedindex);
|
||||
|
||||
startup_cost += indexTotalCost;
|
||||
|
||||
/* Fetch estimated page costs for tablespace containing table. */
|
||||
get_tablespace_page_costs(baserel->reltablespace, &spc_random_page_cost, &spc_seq_page_cost);
|
||||
|
||||
/*
|
||||
* Estimate number of main-table pages fetched.
|
||||
*/
|
||||
tuples_fetched = clamp_row_est(indexSelectivity * RELOPTINFO_LOCAL_FIELD(root, baserel, tuples));
|
||||
|
||||
T = (baserel->pages > 1) ? (double)baserel->pages : 1.0;
|
||||
|
||||
if (loop_count > 1) {
|
||||
/*
|
||||
* For repeated bitmap scans, scale up the number of tuples fetched in
|
||||
* the Mackert and Lohman formula by the number of scans, so that we
|
||||
* estimate the number of pages fetched by all the scans. Then
|
||||
* pro-rate for one scan.
|
||||
*/
|
||||
pages_fetched = index_pages_fetched(tuples_fetched * loop_count,
|
||||
(BlockNumber)baserel->pages,
|
||||
get_indexpath_pages(bitmapqual),
|
||||
root,
|
||||
ispartitionedindex);
|
||||
|
||||
pages_fetched /= loop_count;
|
||||
} else {
|
||||
/*
|
||||
* For a single scan, the number of heap pages that need to be fetched
|
||||
* is the same as the Mackert and Lohman formula for the case T <= b
|
||||
* (ie, no re-reads needed).
|
||||
*/
|
||||
pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
|
||||
}
|
||||
if (pages_fetched >= T) {
|
||||
pages_fetched = T;
|
||||
} else {
|
||||
pages_fetched = ceil(pages_fetched);
|
||||
}
|
||||
|
||||
/*
|
||||
* For small numbers of pages we should charge spc_random_page_cost
|
||||
* apiece, while if nearly all the table's pages are being read, it's more
|
||||
|
@ -1566,8 +1529,19 @@ void cost_bitmap_heap_scan(
|
|||
|
||||
startup_cost += qpqual_cost.startup;
|
||||
cpu_per_tuple = u_sess->attr.attr_sql.cpu_tuple_cost + qpqual_cost.per_tuple;
|
||||
cpu_run_cost = cpu_per_tuple * tuples_fetched;
|
||||
|
||||
run_cost += cpu_per_tuple * tuples_fetched;
|
||||
/* Adjust costing for parallelism, if used. */
|
||||
if (path->parallel_workers > 0) {
|
||||
double parallel_divisor = get_parallel_divisor(path);
|
||||
|
||||
/* The CPU cost is divided among all the workers. */
|
||||
cpu_run_cost /= parallel_divisor;
|
||||
|
||||
path->rows = clamp_row_est(path->rows / parallel_divisor);
|
||||
}
|
||||
|
||||
run_cost += cpu_run_cost;
|
||||
|
||||
path->startup_cost = startup_cost;
|
||||
path->total_cost = startup_cost + run_cost;
|
||||
|
@ -5932,6 +5906,66 @@ static double get_parallel_divisor(Path* path)
|
|||
return parallel_divisor;
|
||||
}
|
||||
|
||||
/*
|
||||
* compute_bitmap_pages
|
||||
*
|
||||
* compute number of pages fetched from heap in bitmap heap scan.
|
||||
*/
|
||||
double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
|
||||
double loop_count, Cost *cost, double *tuple, bool ispartitionedindex)
|
||||
{
|
||||
Cost indexTotalCost;
|
||||
Selectivity indexSelectivity;
|
||||
double pages_fetched;
|
||||
double T = (baserel->pages > 1) ? (double)baserel->pages : 1.0;
|
||||
|
||||
/*
|
||||
* Fetch total cost of obtaining the bitmap, as well as its total
|
||||
* selectivity.
|
||||
*/
|
||||
cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
|
||||
/*
|
||||
* Estimate number of main-table pages fetched.
|
||||
*/
|
||||
double tuples_fetched = clamp_row_est(indexSelectivity * RELOPTINFO_LOCAL_FIELD(root, baserel, tuples));
|
||||
|
||||
if (loop_count > 1) {
|
||||
/*
|
||||
* For repeated bitmap scans, scale up the number of tuples fetched in
|
||||
* the Mackert and Lohman formula by the number of scans, so that we
|
||||
* estimate the number of pages fetched by all the scans. Then
|
||||
* pro-rate for one scan.
|
||||
*/
|
||||
pages_fetched = index_pages_fetched(tuples_fetched * loop_count,
|
||||
(BlockNumber)baserel->pages,
|
||||
get_indexpath_pages(bitmapqual),
|
||||
root,
|
||||
ispartitionedindex);
|
||||
|
||||
pages_fetched /= loop_count;
|
||||
} else {
|
||||
/*
|
||||
* For a single scan, the number of heap pages that need to be fetched
|
||||
* is the same as the Mackert and Lohman formula for the case T <= b
|
||||
* (ie, no re-reads needed).
|
||||
*/
|
||||
pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
|
||||
}
|
||||
if (pages_fetched >= T) {
|
||||
pages_fetched = T;
|
||||
} else {
|
||||
pages_fetched = ceil(pages_fetched);
|
||||
}
|
||||
|
||||
if (cost != NULL) {
|
||||
*cost = indexTotalCost;
|
||||
}
|
||||
if (tuple != NULL) {
|
||||
*tuple = tuples_fetched;
|
||||
}
|
||||
|
||||
return pages_fetched;
|
||||
}
|
||||
/* it used to compute page_size in createplan.cpp */
|
||||
double cost_page_size(double tuples, int width)
|
||||
{
|
||||
|
|
|
@ -279,8 +279,13 @@ void create_index_paths(PlannerInfo* root, RelOptInfo* rel)
|
|||
BitmapHeapPath* bpath = NULL;
|
||||
|
||||
bitmapqual = choose_bitmap_and(root, rel, bitindexpaths);
|
||||
bpath = create_bitmap_heap_path(root, rel, bitmapqual, NULL, 1.0);
|
||||
bpath = create_bitmap_heap_path(root, rel, bitmapqual, NULL, 1.0, 0);
|
||||
add_path(root, rel, (Path*)bpath);
|
||||
|
||||
/* create a partial bitmap heap path */
|
||||
if (rel->consider_parallel) {
|
||||
create_partial_bitmap_paths(root, rel, bitmapqual);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -348,7 +353,7 @@ void create_index_paths(PlannerInfo* root, RelOptInfo* rel)
|
|||
/* And push that path into the mix */
|
||||
required_outer = get_bitmap_tree_required_outer(bitmapqual);
|
||||
loop_count = get_loop_count(root, required_outer);
|
||||
bpath = create_bitmap_heap_path(root, rel, bitmapqual, required_outer, loop_count);
|
||||
bpath = create_bitmap_heap_path(root, rel, bitmapqual, required_outer, loop_count, 0);
|
||||
add_path(root, rel, (Path*)bpath);
|
||||
}
|
||||
}
|
||||
|
@ -1650,6 +1655,11 @@ static Cost bitmap_scan_cost_est(PlannerInfo* root, RelOptInfo* rel, Path* ipath
|
|||
bpath.path.pathkeys = NIL;
|
||||
bpath.bitmapqual = ipath;
|
||||
|
||||
/*
|
||||
* Check the cost of temporary path without considering parallelism.
|
||||
* Parallel bitmap heap path will be considered at later stage.
|
||||
*/
|
||||
bpath.path.parallel_workers = 0;
|
||||
cost_bitmap_heap_scan(&bpath.path, root, rel, bpath.path.param_info, ipath, get_loop_count(root, required_outer));
|
||||
|
||||
return bpath.path.total_cost;
|
||||
|
@ -1685,6 +1695,11 @@ static Cost bitmap_and_cost_est(PlannerInfo* root, RelOptInfo* rel, List* paths)
|
|||
bpath.path.pathkeys = NIL;
|
||||
bpath.bitmapqual = (Path*)&apath;
|
||||
|
||||
/*
|
||||
* Check the cost of temporary path without considering parallelism.
|
||||
* Parallel bitmap heap path will be considered at later stage.
|
||||
*/
|
||||
bpath.path.parallel_workers = 0;
|
||||
/* Now we can do cost_bitmap_heap_scan */
|
||||
cost_bitmap_heap_scan(
|
||||
&bpath.path, root, rel, bpath.path.param_info, (Path*)&apath, get_loop_count(root, required_outer));
|
||||
|
|
|
@ -43,6 +43,10 @@
|
|||
#include "pgxc/pgxc.h"
|
||||
#include "parser/parsetree.h"
|
||||
|
||||
#define IS_JOIN_TYPE_PARALLEL_SAFE(jointype) ((jointype) != JOIN_UNIQUE_OUTER && (jointype) != JOIN_FULL && \
|
||||
(jointype) != JOIN_RIGHT && (jointype) != JOIN_RIGHT_SEMI && (jointype) != JOIN_RIGHT_ANTI && \
|
||||
(jointype) != JOIN_RIGHT_ANTI_FULL)
|
||||
|
||||
static void try_partial_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
|
||||
SpecialJoinInfo *sjinfo, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys,
|
||||
List *mergeclauses, List *outersortkeys, List *innersortkeys);
|
||||
|
@ -1098,8 +1102,8 @@ static void sort_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
|
|||
* JOIN_RIGHT, because they can produce false null extended rows. Also,
|
||||
* the resulting path must not be parameterized.
|
||||
*/
|
||||
if (joinrel->consider_parallel && save_jointype != JOIN_UNIQUE_OUTER && save_jointype != JOIN_FULL &&
|
||||
save_jointype != JOIN_RIGHT && outerrel->partial_pathlist != NIL) {
|
||||
if (joinrel->consider_parallel && IS_JOIN_TYPE_PARALLEL_SAFE(save_jointype)
|
||||
&& outerrel->partial_pathlist != NIL) {
|
||||
cheapest_partial_outer = (Path *)linitial(outerrel->partial_pathlist);
|
||||
|
||||
if (inner_path->parallel_safe) {
|
||||
|
@ -1446,8 +1450,8 @@ static void match_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
|
|||
* parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT,
|
||||
* because they can produce false null extended rows.
|
||||
*/
|
||||
if (joinrel->consider_parallel && save_jointype != JOIN_UNIQUE_OUTER && save_jointype != JOIN_FULL &&
|
||||
save_jointype != JOIN_RIGHT && outerrel->partial_pathlist != NIL) {
|
||||
if (joinrel->consider_parallel && IS_JOIN_TYPE_PARALLEL_SAFE(save_jointype) &&
|
||||
outerrel->partial_pathlist != NIL) {
|
||||
if (nestjoinOK) {
|
||||
consider_parallel_nestloop(root, joinrel, outerrel, innerrel, save_jointype, extra);
|
||||
}
|
||||
|
@ -1525,8 +1529,13 @@ static void consider_parallel_mergejoin(PlannerInfo *root, RelOptInfo *joinrel,
|
|||
static void consider_parallel_nestloop(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel,
|
||||
RelOptInfo* innerrel, JoinType jointype, JoinPathExtraData* extra)
|
||||
{
|
||||
JoinType saveJointype = jointype;
|
||||
ListCell* lc1 = NULL;
|
||||
|
||||
if (jointype == JOIN_UNIQUE_INNER) {
|
||||
jointype = JOIN_INNER;
|
||||
}
|
||||
|
||||
foreach (lc1, outerrel->partial_pathlist) {
|
||||
Path* outerpath = (Path*)lfirst(lc1);
|
||||
List* pathkeys;
|
||||
|
@ -1549,12 +1558,13 @@ static void consider_parallel_nestloop(PlannerInfo* root, RelOptInfo* joinrel, R
|
|||
continue;
|
||||
|
||||
/*
|
||||
* Like match_unsorted_outer, we only consider a single nestloop
|
||||
* path when the jointype is JOIN_UNIQUE_INNER. But we have to scan
|
||||
* cheapest_parameterized_paths to find the one we want to consider,
|
||||
* because cheapest_total_path might not be parallel-safe.
|
||||
* If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
|
||||
* cheapest_total_path, and we have to unique-ify it. (We might
|
||||
* be able to relax this to allow other safe, unparameterized
|
||||
* inner paths, but right now create_unique_path is not on board
|
||||
* with that.)
|
||||
*/
|
||||
if (jointype == JOIN_UNIQUE_INNER) {
|
||||
if (saveJointype == JOIN_UNIQUE_INNER) {
|
||||
if (!bms_is_empty(PATH_REQ_OUTER(innerpath)))
|
||||
continue;
|
||||
innerpath = (Path*)create_unique_path(root, innerrel, innerpath, extra->sjinfo);
|
||||
|
@ -1795,8 +1805,8 @@ static void hash_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
|
|||
* single set of match bits for each batch, but that will require
|
||||
* figuring out a deadlock-free way to wait for the probe to finish.
|
||||
*/
|
||||
if (joinrel->consider_parallel && jointype != JOIN_UNIQUE_OUTER && jointype != JOIN_FULL &&
|
||||
jointype != JOIN_RIGHT && outerrel->partial_pathlist != NIL) {
|
||||
if (joinrel->consider_parallel && IS_JOIN_TYPE_PARALLEL_SAFE(save_jointype) &&
|
||||
outerrel->partial_pathlist != NIL) {
|
||||
Path* cheapest_partial_outer = NULL;
|
||||
Path* cheapest_partial_inner = NULL;
|
||||
Path* cheapest_safe_inner = NULL;
|
||||
|
@ -1805,9 +1815,11 @@ static void hash_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
|
|||
|
||||
/*
|
||||
* Can we use a partial inner plan too, so that we can build a
|
||||
* shared hash table in parallel?
|
||||
* shared hash table in parallel? We can't handle
|
||||
* JOIN_UNIQUE_INNER because we can't guarantee uniqueness.
|
||||
*/
|
||||
if (innerrel->partial_pathlist != NIL && u_sess->attr.attr_sql.enable_parallel_hash) {
|
||||
if (innerrel->partial_pathlist != NIL && save_jointype != JOIN_UNIQUE_INNER &&
|
||||
u_sess->attr.attr_sql.enable_parallel_hash) {
|
||||
cheapest_partial_inner = (Path*)linitial(innerrel->partial_pathlist);
|
||||
try_partial_hashjoin_path(root,
|
||||
joinrel,
|
||||
|
@ -1822,22 +1834,14 @@ static void hash_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
|
|||
/*
|
||||
* Normally, given that the joinrel is parallel-safe, the cheapest
|
||||
* total inner path will also be parallel-safe, but if not, we'll
|
||||
* have to search cheapest_parameterized_paths for the cheapest
|
||||
* unparameterized inner path.
|
||||
* have to search for the cheapest safe, unparameterized inner
|
||||
* path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
|
||||
* inner path.
|
||||
*/
|
||||
if (cheapest_total_inner->parallel_safe) {
|
||||
cheapest_safe_inner = cheapest_total_inner;
|
||||
} else {
|
||||
ListCell* lc;
|
||||
|
||||
foreach (lc, innerrel->cheapest_parameterized_paths) {
|
||||
Path* innerpath = (Path*)lfirst(lc);
|
||||
|
||||
if (innerpath->parallel_safe && bms_is_empty(PATH_REQ_OUTER(innerpath))) {
|
||||
cheapest_safe_inner = innerpath;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (save_jointype != JOIN_UNIQUE_INNER) {
|
||||
cheapest_safe_inner = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
|
||||
}
|
||||
|
||||
if (cheapest_safe_inner != NULL) {
|
||||
|
|
|
@ -100,6 +100,7 @@ static Scan* create_indexscan_plan(
|
|||
static BitmapHeapScan* create_bitmap_scan_plan(
|
||||
PlannerInfo* root, BitmapHeapPath* best_path, List* tlist, List* scan_clauses);
|
||||
static Plan* create_bitmap_subplan(PlannerInfo* root, Path* bitmapqual, List** qual, List** indexqual, List** indexECs);
|
||||
static void bitmap_subplan_mark_shared(Plan *plan);
|
||||
static TidScan* create_tidscan_plan(PlannerInfo* root, TidPath* best_path, List* tlist, List* scan_clauses);
|
||||
static SubqueryScan* create_subqueryscan_plan(PlannerInfo* root, Path* best_path, List* tlist, List* scan_clauses);
|
||||
static FunctionScan* create_functionscan_plan(PlannerInfo* root, Path* best_path, List* tlist, List* scan_clauses);
|
||||
|
@ -1837,7 +1838,12 @@ static Gather* create_gather_plan(PlannerInfo* root, GatherPath* best_path)
|
|||
|
||||
disuse_physical_tlist(subplan, best_path->subpath);
|
||||
|
||||
Gather* gather_plan = make_gather(subplan->targetlist,
|
||||
/*
|
||||
* Copy a new target list for gather, since in merge join case, it will change the targetlist.
|
||||
* If we just use a pointer to subplan->targetlist, then it will change the subplan's targetlist
|
||||
* at same time, which we don't want. Check prepare_sort_from_pathkeys for the targetlist.
|
||||
*/
|
||||
Gather* gather_plan = make_gather(list_copy(subplan->targetlist),
|
||||
NIL,
|
||||
best_path->path.parallel_workers,
|
||||
SS_assign_special_param(root),
|
||||
|
@ -1854,6 +1860,7 @@ static Gather* create_gather_plan(PlannerInfo* root, GatherPath* best_path)
|
|||
case T_HashJoin:
|
||||
case T_MergeJoin:
|
||||
case T_NestLoop:
|
||||
case T_BitmapHeapScan:
|
||||
inherit_plan_locator_info(&gather_plan->plan, subplan);
|
||||
break;
|
||||
default:
|
||||
|
@ -2621,20 +2628,27 @@ static BitmapHeapScan* create_bitmap_scan_plan(
|
|||
PlannerInfo* root, BitmapHeapPath* best_path, List* tlist, List* scan_clauses)
|
||||
{
|
||||
Index baserelid = best_path->path.parent->relid;
|
||||
Plan* bitmapqualplan = NULL;
|
||||
List* bitmapqualorig = NIL;
|
||||
List* indexquals = NIL;
|
||||
List* indexECs = NIL;
|
||||
List* qpqual = NIL;
|
||||
ListCell* l = NULL;
|
||||
BitmapHeapScan* scan_plan = NULL;
|
||||
bool isGlobal = false;
|
||||
|
||||
/* it should be a base rel... */
|
||||
Assert(baserelid > 0);
|
||||
Assert(best_path->path.parent->rtekind == RTE_RELATION);
|
||||
|
||||
/* Process the bitmapqual tree into a Plan tree and qual lists */
|
||||
bitmapqualplan = create_bitmap_subplan(root, best_path->bitmapqual, &bitmapqualorig, &indexquals, &indexECs);
|
||||
Plan *bitmapqualplan = create_bitmap_subplan(root, best_path->bitmapqual, &bitmapqualorig, &indexquals, &indexECs);
|
||||
|
||||
if (IsA(best_path->bitmapqual, IndexPath)) {
|
||||
isGlobal = CheckIndexPathUseGPI((IndexPath*)best_path->bitmapqual);
|
||||
}
|
||||
/* Don't support parallel bitmap scan for global partition index or adio is enabled */
|
||||
if (!g_instance.attr.attr_storage.enable_adio_function && !isGlobal && best_path->path.parallel_aware) {
|
||||
bitmap_subplan_mark_shared(bitmapqualplan);
|
||||
}
|
||||
|
||||
/*
|
||||
* The qpqual list must contain all restrictions not automatically handled
|
||||
|
@ -2661,7 +2675,7 @@ static BitmapHeapScan* create_bitmap_scan_plan(
|
|||
* to do it that way because predicate conditions need to be rechecked if
|
||||
* the scan becomes lossy, so they have to be included in bitmapqualorig.
|
||||
*/
|
||||
qpqual = NIL;
|
||||
List *qpqual = NIL;
|
||||
foreach (l, scan_clauses) {
|
||||
RestrictInfo* rinfo = (RestrictInfo*)lfirst(l);
|
||||
Node* clause = (Node*)rinfo->clause;
|
||||
|
@ -5293,13 +5307,32 @@ void copy_plan_costsize(Plan* dest, Plan* src)
|
|||
}
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
/*
|
||||
* bitmap_subplan_mark_shared
|
||||
* Set isshared flag in bitmap subplan so that it will be created in
|
||||
* shared memory.
|
||||
*/
|
||||
static void bitmap_subplan_mark_shared(Plan *plan)
|
||||
{
|
||||
if (IsA(plan, BitmapAnd)) {
|
||||
bitmap_subplan_mark_shared((Plan*)linitial(((BitmapAnd *)plan)->bitmapplans));
|
||||
} else if (IsA(plan, BitmapOr)) {
|
||||
((BitmapOr *)plan)->isshared = true;
|
||||
bitmap_subplan_mark_shared((Plan*)linitial(((BitmapOr *)plan)->bitmapplans));
|
||||
} else if (IsA(plan, BitmapIndexScan)) {
|
||||
((BitmapIndexScan *)plan)->isshared = true;
|
||||
} else {
|
||||
/* Maybe CStore index scan(T_CStoreIndexCtidScan), don't support parallel */
|
||||
}
|
||||
}
|
||||
|
||||
/* ****************************************************************************
|
||||
*
|
||||
* PLAN NODE BUILDING ROUTINES
|
||||
* PLAN NODE BUILDING ROUTINES
|
||||
*
|
||||
* Some of these are exported because they are called to build plan nodes
|
||||
* in contexts where we're not deriving the plan node from a path node.
|
||||
*****************************************************************************/
|
||||
* *************************************************************************** */
|
||||
static SeqScan* make_seqscan(List* qptlist, List* qpqual, Index scanrelid)
|
||||
{
|
||||
SeqScan* node = makeNode(SeqScan);
|
||||
|
|
|
@ -488,7 +488,7 @@ void finalize_node_id(Plan* result_plan, int* plan_node_id, int* parent_node_id,
|
|||
int save_parent_id = *parent_node_id;
|
||||
(*plan_node_id)++;
|
||||
(*num_plannodes)++;
|
||||
if (result_plan->initPlan && IS_STREAM_PLAN) {
|
||||
if (result_plan->initPlan && (IS_SINGLE_NODE || IS_STREAM_PLAN)) {
|
||||
List* cteLinkList = NIL;
|
||||
ListCell* lc = NULL;
|
||||
foreach (lc, result_plan->initPlan) {
|
||||
|
@ -496,6 +496,7 @@ void finalize_node_id(Plan* result_plan, int* plan_node_id, int* parent_node_id,
|
|||
if (plan->subLinkType == CTE_SUBLINK)
|
||||
cteLinkList = lappend(cteLinkList, plan);
|
||||
}
|
||||
#ifdef ENABLE_MULTIPLE_NODES
|
||||
if (cteLinkList != NIL) {
|
||||
if (IsA(result_plan, ValuesScan)) {
|
||||
sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
|
||||
|
@ -505,7 +506,8 @@ void finalize_node_id(Plan* result_plan, int* plan_node_id, int* parent_node_id,
|
|||
mark_stream_unsupport();
|
||||
}
|
||||
}
|
||||
if (IS_STREAM_PLAN)
|
||||
#endif
|
||||
if (IS_SINGLE_NODE || IS_STREAM_PLAN)
|
||||
*initplans = list_concat(*initplans, list_copy(result_plan->initPlan));
|
||||
}
|
||||
switch (nodeTag(result_plan)) {
|
||||
|
@ -876,7 +878,7 @@ void finalize_node_id(Plan* result_plan, int* plan_node_id, int* parent_node_id,
|
|||
}
|
||||
break;
|
||||
}
|
||||
if (IS_STREAM_PLAN) {
|
||||
if (IS_STREAM_PLAN || IS_SINGLE_NODE) {
|
||||
if (is_replicated_plan(result_plan) && is_execute_on_multinodes(result_plan)) {
|
||||
List* nodelist = check_random_expr(result_plan);
|
||||
if (list_length(nodelist) > 0) {
|
||||
|
@ -924,6 +926,7 @@ void finalize_node_id(Plan* result_plan, int* plan_node_id, int* parent_node_id,
|
|||
} else
|
||||
subplan_ids[subplan->plan_id] = subplan_ids[0];
|
||||
if (!has_finalized) {
|
||||
#ifdef ENABLE_MULTIPLE_NODES
|
||||
if (is_execute_on_coordinator(result_plan) ||
|
||||
(is_execute_on_allnodes(result_plan) && !is_data_node_exec)) {
|
||||
Plan* child_plan = NULL;
|
||||
|
@ -973,6 +976,7 @@ void finalize_node_id(Plan* result_plan, int* plan_node_id, int* parent_node_id,
|
|||
}
|
||||
pushdown_execnodes(plan, result_plan->exec_nodes, false, true);
|
||||
}
|
||||
#endif
|
||||
if (check_stream_support()) {
|
||||
PlannerInfo* subroot = NULL;
|
||||
Plan* child_root = NULL;
|
||||
|
|
|
@ -91,6 +91,7 @@ typedef struct {
|
|||
|
||||
typedef struct {
|
||||
bool allow_restricted;
|
||||
List *safe_param_ids; /* PARAM_EXEC Param IDs to treat as safe */
|
||||
} has_parallel_hazard_arg;
|
||||
|
||||
|
||||
|
@ -1168,6 +1169,7 @@ bool has_parallel_hazard(Node *node, bool allow_restricted)
|
|||
has_parallel_hazard_arg context;
|
||||
|
||||
context.allow_restricted = allow_restricted;
|
||||
context.safe_param_ids = NIL;
|
||||
return has_parallel_hazard_walker(node, &context);
|
||||
}
|
||||
|
||||
|
@ -1196,8 +1198,53 @@ static bool has_parallel_hazard_walker(Node *node, has_parallel_hazard_arg *cont
|
|||
|
||||
/* Recurse into subselects */
|
||||
return query_tree_walker(query, (bool (*)())has_parallel_hazard_walker, context, 0);
|
||||
} else if (IsA(node, SubPlan) || IsA(node, SubLink) || IsA(node, AlternativeSubPlan) || IsA(node, Param)) {
|
||||
return true;
|
||||
} else if (IsA(node, SubLink) || IsA(node, AlternativeSubPlan)) {
|
||||
if (!context->allow_restricted) {
|
||||
return true;
|
||||
}
|
||||
} else if (IsA(node, SubPlan)) {
|
||||
/*
|
||||
* Only parallel-safe SubPlans can be sent to workers. Within the
|
||||
* testexpr of the SubPlan, Params representing the output columns of the
|
||||
* subplan can be treated as parallel-safe, so temporarily add their IDs
|
||||
* to the safe_param_ids list while examining the testexpr.
|
||||
*/
|
||||
SubPlan *subplan = (SubPlan *)node;
|
||||
|
||||
if (!subplan->parallel_safe && !context->allow_restricted) {
|
||||
return true;
|
||||
}
|
||||
List *saveSafeParamIds = context->safe_param_ids;
|
||||
context->safe_param_ids = list_concat(list_copy(context->safe_param_ids), list_copy(subplan->paramIds));
|
||||
if (has_parallel_hazard_walker(subplan->testexpr, context)) {
|
||||
return true; /* no need to restore safe_param_ids */
|
||||
}
|
||||
list_free(context->safe_param_ids);
|
||||
context->safe_param_ids = saveSafeParamIds;
|
||||
/* we must also check args, but no special Param treatment there */
|
||||
if (has_parallel_hazard_walker((Node *) subplan->args, context)) {
|
||||
return true;
|
||||
}
|
||||
/* don't want to recurse normally, so we're done */
|
||||
return false;
|
||||
} else if (IsA(node, Param)) {
|
||||
/*
|
||||
* We can't pass Params to workers at the moment either, so they are also
|
||||
* parallel-restricted, unless they are PARAM_EXTERN Params or are
|
||||
* PARAM_EXEC Params listed in safe_param_ids, meaning they could be
|
||||
* either generated within workers or can be computed by the leader and
|
||||
* then their value can be passed to workers.
|
||||
*/
|
||||
Param *param = (Param *)node;
|
||||
if (param->paramkind == PARAM_EXTERN) {
|
||||
return false;
|
||||
}
|
||||
if (param->paramkind != PARAM_EXEC || !list_member_int(context->safe_param_ids, param->paramid)) {
|
||||
if (!context->allow_restricted) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false; /* nothing to recurse to */
|
||||
}
|
||||
|
||||
/* This is just a notational convenience for callers. */
|
||||
|
|
|
@ -2058,8 +2058,8 @@ IndexPath* create_index_path(PlannerInfo* root, IndexOptInfo* index, List* index
|
|||
* loop_count should match the value used when creating the component
|
||||
* IndexPaths.
|
||||
*/
|
||||
BitmapHeapPath* create_bitmap_heap_path(
|
||||
PlannerInfo* root, RelOptInfo* rel, Path* bitmapqual, Relids required_outer, double loop_count)
|
||||
BitmapHeapPath* create_bitmap_heap_path(PlannerInfo* root, RelOptInfo* rel, Path* bitmapqual,
|
||||
Relids required_outer, double loop_count, int parallel_degree)
|
||||
{
|
||||
BitmapHeapPath* pathnode = makeNode(BitmapHeapPath);
|
||||
|
||||
|
@ -2067,15 +2067,23 @@ BitmapHeapPath* create_bitmap_heap_path(
|
|||
pathnode->path.parent = rel;
|
||||
pathnode->path.param_info = get_baserel_parampathinfo(root, rel, required_outer);
|
||||
pathnode->path.pathkeys = NIL; /* always unordered */
|
||||
pathnode->path.parallel_aware = parallel_degree > 0 ? true : false;
|
||||
pathnode->path.parallel_safe = rel->consider_parallel;
|
||||
pathnode->path.parallel_workers = parallel_degree;
|
||||
|
||||
pathnode->bitmapqual = bitmapqual;
|
||||
|
||||
cost_bitmap_heap_scan(&pathnode->path, root, rel, pathnode->path.param_info, bitmapqual, loop_count);
|
||||
|
||||
#ifdef STREAMPLAN
|
||||
/*
|
||||
* We need to set locator_type for parallel query, cause we may send
|
||||
* this value to bg worker. If not, locator_type is the initial value '\0',
|
||||
* which make the later serialized plan truncated.
|
||||
*/
|
||||
pathnode->path.locator_type = rel->locator_type;
|
||||
if (IS_STREAM_PLAN) {
|
||||
pathnode->path.distribute_keys = rel->distribute_keys;
|
||||
pathnode->path.locator_type = rel->locator_type;
|
||||
|
||||
/* add location information for bitmap heap path */
|
||||
RangeTblEntry* rte = root->simple_rte_array[rel->relid];
|
||||
|
@ -3971,7 +3979,7 @@ Path* reparameterize_path(PlannerInfo* root, Path* path, Relids required_outer,
|
|||
case T_BitmapHeapScan: {
|
||||
BitmapHeapPath* bpath = (BitmapHeapPath*)path;
|
||||
|
||||
return (Path*)create_bitmap_heap_path(root, rel, bpath->bitmapqual, required_outer, loop_count);
|
||||
return (Path*)create_bitmap_heap_path(root, rel, bpath->bitmapqual, required_outer, loop_count, 0);
|
||||
}
|
||||
case T_SubqueryScan:
|
||||
return create_subqueryscan_path(root, rel, path->pathkeys, required_outer);
|
||||
|
|
|
@ -647,7 +647,8 @@ RelOptInfo* build_join_rel(PlannerInfo* root, Relids joinrelids, RelOptInfo* out
|
|||
* here.
|
||||
*/
|
||||
if (inner_rel->consider_parallel && outer_rel->consider_parallel &&
|
||||
!has_parallel_hazard((Node *)restrictlist, false)) {
|
||||
!has_parallel_hazard((Node *)restrictlist, false) &&
|
||||
!has_parallel_hazard((Node *)joinrel->reltargetlist, false)) {
|
||||
joinrel->consider_parallel = true;
|
||||
}
|
||||
|
||||
|
|
|
@ -1432,6 +1432,7 @@ void knl_t_bgworker_init(knl_t_bgworker_context* bgworker_cxt)
|
|||
bgworker_cxt->pcxt_list = DLIST_STATIC_INIT(bgworker_cxt->pcxt_list);
|
||||
bgworker_cxt->save_pgBufferUsage = NULL;
|
||||
bgworker_cxt->hpm_context = NULL;
|
||||
bgworker_cxt->memCxt = NULL;
|
||||
}
|
||||
|
||||
void knl_t_msqueue_init(knl_t_msqueue_context* msqueue_cxt)
|
||||
|
|
|
@ -2066,7 +2066,6 @@ static void ExecutePlan(EState *estate, PlanState *planstate, bool use_parallel_
|
|||
* process so we just end the loop...
|
||||
*/
|
||||
if (TupIsNull(slot)) {
|
||||
(void)ExecShutdownNode(planstate);
|
||||
ExecEarlyFree(planstate);
|
||||
break;
|
||||
}
|
||||
|
@ -2136,6 +2135,14 @@ static void ExecutePlan(EState *estate, PlanState *planstate, bool use_parallel_
|
|||
u_sess->instr_cxt.global_instr->SetPeakNodeMemory(planstate->plan->plan_node_id, peak_memory);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we know we won't need to back up, we can release resources at this
|
||||
* point.
|
||||
*/
|
||||
if (!(estate->es_top_eflags & EXEC_FLAG_BACKWARD)) {
|
||||
(void)ExecShutdownNode(planstate);
|
||||
}
|
||||
|
||||
if (use_parallel_mode) {
|
||||
ExitParallelMode();
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "executor/execParallel.h"
|
||||
#include "executor/executor.h"
|
||||
#include "executor/hashjoin.h"
|
||||
#include "executor/nodeBitmapHeapscan.h"
|
||||
#include "executor/nodeSeqscan.h"
|
||||
#include "executor/nodeAppend.h"
|
||||
#include "executor/nodeIndexscan.h"
|
||||
|
@ -183,6 +184,9 @@ static bool ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateConte
|
|||
case T_AppendState:
|
||||
ExecAppendEstimate((AppendState*)planstate, e->pcxt);
|
||||
break;
|
||||
case T_BitmapHeapScanState:
|
||||
ExecBitmapHeapEstimate((BitmapHeapScanState*)planstate, e->pcxt);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -244,6 +248,13 @@ static bool ExecParallelInitializeDSM(PlanState *planstate, ExecParallelInitiali
|
|||
cxt->pwCtx->queryInfo.pappend_num++;
|
||||
}
|
||||
break;
|
||||
case T_BitmapHeapScanState:
|
||||
if (planstate->plan->parallel_aware) {
|
||||
ExecBitmapHeapInitializeDSM((BitmapHeapScanState*)planstate,
|
||||
d->pcxt, cxt->pwCtx->queryInfo.bmscan_num);
|
||||
cxt->pwCtx->queryInfo.bmscan_num++;
|
||||
}
|
||||
break;
|
||||
case T_HashJoinState:
|
||||
if (planstate->plan->parallel_aware) {
|
||||
ExecHashJoinInitializeDSM((HashJoinState*)planstate, d->pcxt, cxt->pwCtx->queryInfo.jstate_num);
|
||||
|
@ -425,6 +436,7 @@ ParallelExecutorInfo *ExecInitParallelPlan(PlanState *planstate, EState *estate,
|
|||
queryInfo.pappend = (ParallelAppendState**)palloc0(sizeof(ParallelAppendState*) * e.nnodes);
|
||||
queryInfo.jstate = (ParallelHashJoinState**)palloc0(sizeof(ParallelHashJoinState*) * e.nnodes);
|
||||
queryInfo.shared_info = (SharedHashInfo**)palloc0(sizeof(SharedHashInfo*) * e.nnodes);
|
||||
queryInfo.bmscan = (ParallelBitmapHeapState **)palloc0(sizeof(ParallelBitmapHeapState *) * e.nnodes);
|
||||
|
||||
/*
|
||||
* Give parallel-aware nodes a chance to initialize their shared data.
|
||||
|
@ -488,6 +500,11 @@ static bool ExecParallelReInitializeDSM(PlanState* planstate, ParallelContext* p
|
|||
ExecAppendReInitializeDSM((AppendState*)planstate, pcxt);
|
||||
}
|
||||
break;
|
||||
case T_BitmapHeapScanState:
|
||||
if (planstate->plan->parallel_aware) {
|
||||
ExecBitmapHeapReInitializeDSM((BitmapHeapScanState*)planstate, pcxt);
|
||||
}
|
||||
break;
|
||||
case T_HashJoinState:
|
||||
if (planstate->plan->parallel_aware) {
|
||||
ExecHashJoinReInitializeDSM((HashJoinState*)planstate, pcxt);
|
||||
|
@ -735,6 +752,11 @@ static bool ExecParallelInitializeWorker(PlanState *planstate, void *context)
|
|||
ExecAppendInitializeWorker((AppendState*)planstate, context);
|
||||
}
|
||||
break;
|
||||
case T_BitmapHeapScanState:
|
||||
if (planstate->plan->parallel_aware) {
|
||||
ExecBitmapHeapInitializeWorker((BitmapHeapScanState *)planstate, context);
|
||||
}
|
||||
break;
|
||||
case T_HashState:
|
||||
/* even when not parallel-aware, for EXPLAIN ANALYZE */
|
||||
ExecHashInitializeWorker((HashState*)planstate, context);
|
||||
|
@ -778,6 +800,8 @@ void ParallelQueryMain(void *seg)
|
|||
|
||||
/* Start up the executor, have it run the plan, and then shut it down. */
|
||||
(void)ExecutorStart(queryDesc, cxt->pwCtx->queryInfo.eflags);
|
||||
/* Special executor initialization steps for parallel workers */
|
||||
Assert(t_thrd.bgworker_cxt.memCxt != NULL);
|
||||
ExecParallelInitializeWorker(queryDesc->planstate, seg);
|
||||
|
||||
/* Pass down any tuple bound */
|
||||
|
|
|
@ -52,13 +52,22 @@
|
|||
#include "gstrace/gstrace_infra.h"
|
||||
#include "gstrace/access_gstrace.h"
|
||||
|
||||
#define WAIT_BITMAP_INIT_TIMEOUT 1 // 1s timeout for pthread_cond_timedwait
|
||||
|
||||
static TupleTableSlot* BitmapHbucketTblNext(BitmapHeapScanState* node);
|
||||
static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node);
|
||||
static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
|
||||
#ifdef USE_PREFETCH
|
||||
static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, const TBMIterateResult *tbmres,
|
||||
TBMIterator *prefetch_iterator, TBMSharedIterator *shared_prefetch_it);
|
||||
static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
|
||||
#endif
|
||||
static void bitgetpage(HeapScanDesc scan, TBMIterateResult* tbmres);
|
||||
static void ExecInitPartitionForBitmapHeapScan(BitmapHeapScanState* scanstate, EState* estate);
|
||||
static void ExecInitNextPartitionForBitmapHeapScan(BitmapHeapScanState* node);
|
||||
static void BitmapHeapPrefetchNext(
|
||||
BitmapHeapScanState* node, HeapScanDesc scan, const TIDBitmap* tbm, TBMIterator** prefetch_iterator);
|
||||
static void BitmapHeapPrefetchNext(BitmapHeapScanState* node, HeapScanDesc scan, const TIDBitmap* tbm,
|
||||
TBMIterator** prefetch_iterator, TBMSharedIterator** shared_prefetch_it);
|
||||
static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
|
||||
|
||||
/* This struct is used for partition switch while prefetch pages */
|
||||
typedef struct PrefetchNode {
|
||||
|
@ -76,11 +85,20 @@ void BitmapHeapFree(BitmapHeapScanState* node)
|
|||
tbm_end_iterate(node->prefetch_iterator);
|
||||
node->prefetch_iterator = NULL;
|
||||
}
|
||||
if (node->shared_tbmiterator != NULL) {
|
||||
tbm_end_shared_iterate(node->shared_tbmiterator);
|
||||
node->shared_tbmiterator = NULL;
|
||||
}
|
||||
if (node->shared_prefetch_iterator != NULL) {
|
||||
tbm_end_shared_iterate(node->shared_prefetch_iterator);
|
||||
node->shared_prefetch_iterator = NULL;
|
||||
}
|
||||
if (node->tbm != NULL) {
|
||||
tbm_free(node->tbm);
|
||||
node->tbm = NULL;
|
||||
}
|
||||
node->tbmres = NULL;
|
||||
node->initialized = false;
|
||||
}
|
||||
static TupleTableSlot* BitmapHbucketTblNext(BitmapHeapScanState* node)
|
||||
{
|
||||
|
@ -101,10 +119,104 @@ static TupleTableSlot* BitmapHbucketTblNext(BitmapHeapScanState* node)
|
|||
BitmapHeapFree(node);
|
||||
}
|
||||
}
|
||||
/* ----------------------------------------------------------------
|
||||
* BitmapHeapNext
|
||||
|
||||
#ifdef USE_PREFETCH
|
||||
/*
|
||||
* BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
|
||||
*/
|
||||
static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, const TBMIterateResult *tbmres,
|
||||
TBMIterator *prefetch_iterator, TBMSharedIterator *shared_prefetch_it)
|
||||
{
|
||||
ParallelBitmapHeapState *pstate = node->pstate;
|
||||
|
||||
if (pstate == NULL) {
|
||||
if (node->prefetch_pages > 0) {
|
||||
/* The main iterator has closed the distance by one page */
|
||||
node->prefetch_pages--;
|
||||
} else if (prefetch_iterator != NULL) {
|
||||
/* Do not let the prefetch iterator get behind the main one */
|
||||
TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
|
||||
|
||||
if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno) {
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmodule(MOD_EXECUTOR),
|
||||
errmsg("prefetch and main iterators are out of sync for BitmapHeapScan.")));
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (u_sess->storage_cxt.target_prefetch_pages > 0) {
|
||||
(void)pthread_mutex_lock(&pstate->cv_mtx);
|
||||
if (pstate->prefetch_pages > 0) {
|
||||
pstate->prefetch_pages--;
|
||||
(void)pthread_mutex_unlock(&pstate->cv_mtx);
|
||||
} else {
|
||||
/* Release the mutex before iterating */
|
||||
(void)pthread_mutex_unlock(&pstate->cv_mtx);
|
||||
|
||||
/*
|
||||
* In case of shared mode, we can not ensure that the current
|
||||
* blockno of the main iterator and that of the prefetch iterator
|
||||
* are same. It's possible that whatever blockno we are
|
||||
* prefetching will be processed by another process. Therefore,
|
||||
* we don't validate the blockno here as we do in non-parallel
|
||||
* case.
|
||||
*/
|
||||
if (shared_prefetch_it != NULL) {
|
||||
(void)tbm_shared_iterate(shared_prefetch_it);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* BitmapAdjustPrefetchTarget - Adjust the prefetch target
|
||||
*
|
||||
* Retrieve next tuple from the BitmapHeapScan node's currentRelation
|
||||
* Increase prefetch target if it's not yet at the max. Note that
|
||||
* we will increase it to zero after fetching the very first
|
||||
* page/tuple, then to one after the second tuple is fetched, then
|
||||
* it doubles as later pages are fetched.
|
||||
*/
|
||||
static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
|
||||
{
|
||||
ParallelBitmapHeapState *pstate = node->pstate;
|
||||
|
||||
if (pstate == NULL) {
|
||||
if (node->prefetch_target >= u_sess->storage_cxt.target_prefetch_pages) {
|
||||
/* don't increase any further */
|
||||
} else if (node->prefetch_target >= u_sess->storage_cxt.target_prefetch_pages / 2) {
|
||||
node->prefetch_target = u_sess->storage_cxt.target_prefetch_pages;
|
||||
} else if (node->prefetch_target > 0) {
|
||||
node->prefetch_target *= 2;
|
||||
} else {
|
||||
node->prefetch_target++;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Do an unlocked check first to save spinlock acquisitions. */
|
||||
if (pstate->prefetch_target < u_sess->storage_cxt.target_prefetch_pages) {
|
||||
(void)pthread_mutex_lock(&pstate->cv_mtx);
|
||||
if (pstate->prefetch_target >= u_sess->storage_cxt.target_prefetch_pages) {
|
||||
/* don't increase any further */
|
||||
} else if (pstate->prefetch_target >= u_sess->storage_cxt.target_prefetch_pages / 2) {
|
||||
pstate->prefetch_target = u_sess->storage_cxt.target_prefetch_pages;
|
||||
} else if (pstate->prefetch_target > 0) {
|
||||
pstate->prefetch_target *= 2;
|
||||
} else {
|
||||
pstate->prefetch_target++;
|
||||
}
|
||||
(void)pthread_mutex_unlock(&pstate->cv_mtx);
|
||||
}
|
||||
}
|
||||
#endif /* USE_PREFETCH */
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* BitmapHeapNext
|
||||
*
|
||||
* Retrieve next tuple from the BitmapHeapScan node's currentRelation
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
|
||||
|
@ -113,10 +225,13 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
|
|||
HeapScanDesc scan = NULL;
|
||||
TIDBitmap* tbm = NULL;
|
||||
TBMIterator* tbmiterator = NULL;
|
||||
TBMSharedIterator *shared_tbmiterator = NULL;
|
||||
TBMIterateResult* tbmres = NULL;
|
||||
ParallelBitmapHeapState *pstate = node->pstate;
|
||||
|
||||
#ifdef USE_PREFETCH
|
||||
TBMIterator* prefetch_iterator = NULL;
|
||||
TBMSharedIterator* shared_prefetch_it = NULL;
|
||||
#endif
|
||||
OffsetNumber targoffset;
|
||||
TupleTableSlot* slot = NULL;
|
||||
|
@ -128,10 +243,15 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
|
|||
slot = node->ss.ss_ScanTupleSlot;
|
||||
scan = GetHeapScanDesc(node->ss.ss_currentScanDesc);
|
||||
tbm = node->tbm;
|
||||
tbmiterator = node->tbmiterator;
|
||||
if (pstate == NULL) {
|
||||
tbmiterator = node->tbmiterator;
|
||||
} else {
|
||||
shared_tbmiterator = node->shared_tbmiterator;
|
||||
}
|
||||
tbmres = node->tbmres;
|
||||
#ifdef USE_PREFETCH
|
||||
prefetch_iterator = node->prefetch_iterator;
|
||||
shared_prefetch_it = node->shared_prefetch_iterator;
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -147,27 +267,79 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
|
|||
* a lot of prefetching in a scan that stops after a few tuples because of
|
||||
* a LIMIT.
|
||||
*/
|
||||
if (tbm == NULL) {
|
||||
tbm = (TIDBitmap*)MultiExecProcNode(outerPlanState(node));
|
||||
if (!node->initialized) {
|
||||
if (pstate == NULL) {
|
||||
tbm = (TIDBitmap*)MultiExecProcNode(outerPlanState(node));
|
||||
|
||||
if (tbm == NULL || !IsA(tbm, TIDBitmap)) {
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
|
||||
errmodule(MOD_EXECUTOR),
|
||||
errmsg("unrecognized result from subplan for BitmapHeapScan.")));
|
||||
}
|
||||
if (tbm == NULL || !IsA(tbm, TIDBitmap)) {
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
|
||||
errmodule(MOD_EXECUTOR),
|
||||
errmsg("unrecognized result from subplan for BitmapHeapScan.")));
|
||||
}
|
||||
|
||||
node->tbm = tbm;
|
||||
node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
|
||||
node->tbmres = tbmres = NULL;
|
||||
node->tbm = tbm;
|
||||
node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
|
||||
node->tbmres = tbmres = NULL;
|
||||
|
||||
#ifdef USE_PREFETCH
|
||||
if (u_sess->storage_cxt.target_prefetch_pages > 0) {
|
||||
node->prefetch_iterator = prefetch_iterator = tbm_begin_iterate(tbm);
|
||||
node->prefetch_pages = 0;
|
||||
node->prefetch_target = -1;
|
||||
}
|
||||
if (u_sess->storage_cxt.target_prefetch_pages > 0) {
|
||||
node->prefetch_iterator = prefetch_iterator = tbm_begin_iterate(tbm);
|
||||
node->prefetch_pages = 0;
|
||||
node->prefetch_target = -1;
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
/*
|
||||
* The leader will immediately come out of the function, but
|
||||
* others will be blocked until leader populates the TBM and wakes
|
||||
* them up.
|
||||
*/
|
||||
if (BitmapShouldInitializeSharedState(pstate)) {
|
||||
tbm = (TIDBitmap *)MultiExecProcNode(outerPlanState(node));
|
||||
if (tbm == NULL || !IsA(tbm, TIDBitmap)) {
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
|
||||
errmodule(MOD_EXECUTOR),
|
||||
errmsg("unrecognized result from subplan for BitmapHeapScan.")));
|
||||
}
|
||||
|
||||
node->tbm = tbm;
|
||||
|
||||
/*
|
||||
* Prepare to iterate over the TBM. This will return the
|
||||
* dsa_pointer of the iterator state which will be used by
|
||||
* multiple processes to iterate jointly.
|
||||
*/
|
||||
pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
|
||||
#ifdef USE_PREFETCH
|
||||
if (u_sess->storage_cxt.target_prefetch_pages > 0) {
|
||||
pstate->prefetch_iterator = tbm_prepare_shared_iterate(tbm);
|
||||
|
||||
/*
|
||||
* We don't need the mutex here as we haven't yet woke up
|
||||
* others.
|
||||
*/
|
||||
pstate->prefetch_pages = 0;
|
||||
pstate->prefetch_target = -1;
|
||||
}
|
||||
#endif
|
||||
/* We have initialized the shared state so wake up others. */
|
||||
BitmapDoneInitializingSharedState(pstate);
|
||||
}
|
||||
|
||||
/* Allocate a private iterator and attach the shared state to it */
|
||||
node->shared_tbmiterator = shared_tbmiterator = tbm_attach_shared_iterate(pstate->tbmiterator);
|
||||
node->tbmres = tbmres = NULL;
|
||||
|
||||
#ifdef USE_PREFETCH
|
||||
if (u_sess->storage_cxt.target_prefetch_pages > 0) {
|
||||
node->shared_prefetch_iterator = tbm_attach_shared_iterate(pstate->prefetch_iterator);
|
||||
shared_prefetch_it = node->shared_prefetch_iterator;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
node->initialized = true;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
|
@ -178,37 +350,28 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
|
|||
* Get next page of results if needed
|
||||
*/
|
||||
if (tbmres == NULL) {
|
||||
node->tbmres = tbmres = tbm_iterate(tbmiterator);
|
||||
if (pstate == NULL) {
|
||||
node->tbmres = tbmres = tbm_iterate(tbmiterator);
|
||||
} else {
|
||||
node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
|
||||
}
|
||||
if (tbmres == NULL) {
|
||||
/* no more entries in the bitmap */
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef USE_PREFETCH
|
||||
if (node->prefetch_pages > 0) {
|
||||
/* The main iterator has closed the distance by one page */
|
||||
node->prefetch_pages--;
|
||||
} else if (prefetch_iterator != NULL) {
|
||||
/* Do not let the prefetch iterator get behind the main one */
|
||||
TBMIterateResult* tbmpre = tbm_iterate(prefetch_iterator);
|
||||
|
||||
if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno) {
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmodule(MOD_EXECUTOR),
|
||||
errmsg("prefetch and main iterators are out of sync for BitmapHeapScan.")));
|
||||
}
|
||||
}
|
||||
BitmapAdjustPrefetchIterator(node, tbmres, prefetch_iterator, shared_prefetch_it);
|
||||
#endif /* USE_PREFETCH */
|
||||
|
||||
/* Check whether switch partition-fake-rel, use rd_rel save */
|
||||
if (BitmapNodeNeedSwitchPartRel(node)) {
|
||||
if (pstate == NULL && BitmapNodeNeedSwitchPartRel(node)) {
|
||||
GPISetCurrPartOid(node->gpi_scan, node->tbmres->partitionOid);
|
||||
if (!GPIGetNextPartRelation(node->gpi_scan, CurrentMemoryContext, AccessShareLock)) {
|
||||
/* If the current partition is invalid, the next page is directly processed */
|
||||
tbmres = NULL;
|
||||
#ifdef USE_PREFETCH
|
||||
BitmapHeapPrefetchNext(node, scan, tbm, &prefetch_iterator);
|
||||
BitmapHeapPrefetchNext(node, scan, tbm, &prefetch_iterator, &shared_prefetch_it);
|
||||
#endif /* USE_PREFETCH */
|
||||
continue;
|
||||
}
|
||||
|
@ -256,14 +419,7 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
|
|||
* page/tuple, then to one after the second tuple is fetched, then
|
||||
* it doubles as later pages are fetched.
|
||||
*/
|
||||
if (node->prefetch_target >= u_sess->storage_cxt.target_prefetch_pages)
|
||||
/* don't increase any further */;
|
||||
else if (node->prefetch_target >= u_sess->storage_cxt.target_prefetch_pages / 2)
|
||||
node->prefetch_target = u_sess->storage_cxt.target_prefetch_pages;
|
||||
else if (node->prefetch_target > 0)
|
||||
node->prefetch_target *= 2;
|
||||
else
|
||||
node->prefetch_target++;
|
||||
BitmapAdjustPrefetchTarget(node);
|
||||
#endif /* USE_PREFETCH */
|
||||
} else {
|
||||
/*
|
||||
|
@ -277,8 +433,18 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
|
|||
* Try to prefetch at least a few pages even before we get to the
|
||||
* second page if we don't stop reading after the first tuple.
|
||||
*/
|
||||
if (node->prefetch_target < u_sess->storage_cxt.target_prefetch_pages)
|
||||
node->prefetch_target++;
|
||||
if (pstate == NULL) {
|
||||
if (node->prefetch_target < u_sess->storage_cxt.target_prefetch_pages) {
|
||||
node->prefetch_target++;
|
||||
}
|
||||
} else if (pstate->prefetch_target < u_sess->storage_cxt.target_prefetch_pages) {
|
||||
/* take spinlock while updating shared state */
|
||||
(void)pthread_mutex_lock(&pstate->cv_mtx);
|
||||
if (pstate->prefetch_target < u_sess->storage_cxt.target_prefetch_pages) {
|
||||
pstate->prefetch_target++;
|
||||
}
|
||||
(void)pthread_mutex_unlock(&pstate->cv_mtx);
|
||||
}
|
||||
#endif /* USE_PREFETCH */
|
||||
}
|
||||
|
||||
|
@ -291,7 +457,7 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
|
|||
}
|
||||
|
||||
#ifdef USE_PREFETCH
|
||||
BitmapHeapPrefetchNext(node, scan, tbm, &prefetch_iterator);
|
||||
BitmapHeapPrefetchNext(node, scan, tbm, &prefetch_iterator, &shared_prefetch_it);
|
||||
#endif /* USE_PREFETCH */
|
||||
|
||||
/*
|
||||
|
@ -343,6 +509,20 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
|
|||
return ExecClearTuple(slot);
|
||||
}
|
||||
|
||||
/*
|
||||
* BitmapDoneInitializingSharedState - Shared state is initialized
|
||||
*
|
||||
* By this time the leader has already populated the TBM and initialized the
|
||||
* shared state so wake up other processes.
|
||||
*/
|
||||
static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
|
||||
{
|
||||
(void)pthread_mutex_lock(&pstate->cv_mtx);
|
||||
pstate->state = BM_FINISHED;
|
||||
(void)pthread_cond_broadcast(&pstate->cv);
|
||||
(void)pthread_mutex_unlock(&pstate->cv_mtx);
|
||||
}
|
||||
|
||||
/*
|
||||
* bitgetpage - subroutine for BitmapHeapNext()
|
||||
*
|
||||
|
@ -624,6 +804,11 @@ BitmapHeapScanState* ExecInitBitmapHeapScan(BitmapHeapScan* node, EState* estate
|
|||
scanstate->ss.isPartTbl = node->scan.isPartTbl;
|
||||
scanstate->ss.currentSlot = 0;
|
||||
scanstate->ss.partScanDirection = node->scan.partScanDirection;
|
||||
scanstate->pscan_len = 0;
|
||||
scanstate->initialized = false;
|
||||
scanstate->shared_tbmiterator = NULL;
|
||||
scanstate->shared_prefetch_iterator = NULL;
|
||||
scanstate->pstate = NULL;
|
||||
|
||||
/* initilize Global partition index scan information */
|
||||
GPIScanInit(&scanstate->gpi_scan);
|
||||
|
@ -802,14 +987,18 @@ static void ExecInitPartitionForBitmapHeapScan(BitmapHeapScanState* scanstate, E
|
|||
* to do on the current page, else we may uselessly prefetch the same
|
||||
* page we are just about to request for real.
|
||||
*/
|
||||
void BitmapHeapPrefetchNext(
|
||||
BitmapHeapScanState* node, HeapScanDesc scan, const TIDBitmap* tbm, TBMIterator** prefetch_iterator)
|
||||
void BitmapHeapPrefetchNext(BitmapHeapScanState* node, HeapScanDesc scan, const TIDBitmap* tbm,
|
||||
TBMIterator** prefetch_iterator, TBMSharedIterator** shared_prefetch_it)
|
||||
{
|
||||
if (*prefetch_iterator == NULL) {
|
||||
ParallelBitmapHeapState *pstate = node->pstate;
|
||||
if ((pstate == NULL && *prefetch_iterator == NULL) ||
|
||||
(pstate != NULL && *shared_prefetch_it == NULL)) {
|
||||
return;
|
||||
}
|
||||
|
||||
ADIO_RUN()
|
||||
{
|
||||
Assert(shared_prefetch_it == NULL);
|
||||
BlockNumber* blockList = NULL;
|
||||
BlockNumber* blockListPtr = NULL;
|
||||
PrefetchNode* prefetchNode = NULL;
|
||||
|
@ -889,32 +1078,204 @@ void BitmapHeapPrefetchNext(
|
|||
}
|
||||
ADIO_ELSE()
|
||||
{
|
||||
Oid oldOid = GPIGetCurrPartOid(node->gpi_scan);
|
||||
while (node->prefetch_pages < node->prefetch_target) {
|
||||
TBMIterateResult* tbmpre = tbm_iterate(*prefetch_iterator);
|
||||
Relation prefetchRel = scan->rs_rd;
|
||||
if (tbmpre == NULL) {
|
||||
/* No more pages to prefetch */
|
||||
tbm_end_iterate(*prefetch_iterator);
|
||||
node->prefetch_iterator = *prefetch_iterator = NULL;
|
||||
break;
|
||||
if (pstate == NULL) {
|
||||
Oid oldOid = GPIGetCurrPartOid(node->gpi_scan);
|
||||
while (node->prefetch_pages < node->prefetch_target) {
|
||||
TBMIterateResult* tbmpre = tbm_iterate(*prefetch_iterator);
|
||||
Relation prefetchRel = scan->rs_rd;
|
||||
if (tbmpre == NULL) {
|
||||
/* No more pages to prefetch */
|
||||
tbm_end_iterate(*prefetch_iterator);
|
||||
node->prefetch_iterator = *prefetch_iterator = NULL;
|
||||
break;
|
||||
}
|
||||
node->prefetch_pages++;
|
||||
if (tbm_is_global(node->tbm) && GPIScanCheckPartOid(node->gpi_scan, tbmpre->partitionOid)) {
|
||||
GPISetCurrPartOid(node->gpi_scan, tbmpre->partitionOid);
|
||||
if (!GPIGetNextPartRelation(node->gpi_scan, CurrentMemoryContext, AccessShareLock)) {
|
||||
/* If the current partition is invalid, the next page is directly processed */
|
||||
tbmpre = NULL;
|
||||
continue;
|
||||
} else {
|
||||
prefetchRel = node->gpi_scan->fakePartRelation;
|
||||
}
|
||||
}
|
||||
/* For posix_fadvise() we just send the one request */
|
||||
PrefetchBuffer(prefetchRel, MAIN_FORKNUM, tbmpre->blockno);
|
||||
}
|
||||
node->prefetch_pages++;
|
||||
if (tbm_is_global(node->tbm) && GPIScanCheckPartOid(node->gpi_scan, tbmpre->partitionOid)) {
|
||||
GPISetCurrPartOid(node->gpi_scan, tbmpre->partitionOid);
|
||||
if (!GPIGetNextPartRelation(node->gpi_scan, CurrentMemoryContext, AccessShareLock)) {
|
||||
/* If the current partition is invalid, the next page is directly processed */
|
||||
tbmpre = NULL;
|
||||
continue;
|
||||
} else {
|
||||
prefetchRel = node->gpi_scan->fakePartRelation;
|
||||
/* recover old oid after prefetch switch */
|
||||
GPISetCurrPartOid(node->gpi_scan, oldOid);
|
||||
} else if (pstate->prefetch_pages < pstate->prefetch_target) {
|
||||
if (*shared_prefetch_it != NULL) {
|
||||
while (1) {
|
||||
bool do_prefetch = false;
|
||||
|
||||
/*
|
||||
* Recheck under the mutex. If some other process has already
|
||||
* done enough prefetching then we need not to do anything.
|
||||
*/
|
||||
(void)pthread_mutex_lock(&pstate->cv_mtx);
|
||||
if (pstate->prefetch_pages < pstate->prefetch_target) {
|
||||
pstate->prefetch_pages++;
|
||||
do_prefetch = true;
|
||||
}
|
||||
(void)pthread_mutex_unlock(&pstate->cv_mtx);
|
||||
|
||||
if (!do_prefetch) {
|
||||
return;
|
||||
}
|
||||
TBMIterateResult *tbmpre = tbm_shared_iterate(*shared_prefetch_it);
|
||||
if (tbmpre == NULL) {
|
||||
/* No more pages to prefetch */
|
||||
tbm_end_shared_iterate(*shared_prefetch_it);
|
||||
node->shared_prefetch_iterator = *shared_prefetch_it = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
|
||||
}
|
||||
}
|
||||
/* For posix_fadvise() we just send the one request */
|
||||
PrefetchBuffer(prefetchRel, MAIN_FORKNUM, tbmpre->blockno);
|
||||
}
|
||||
/* recover old oid after prefetch switch */
|
||||
GPISetCurrPartOid(node->gpi_scan, oldOid);
|
||||
}
|
||||
ADIO_END();
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* BitmapShouldInitializeSharedState
|
||||
*
|
||||
* The first process to come here and see the state to the BM_INITIAL
|
||||
* will become the leader for the parallel bitmap scan and will be
|
||||
* responsible for populating the TIDBitmap. The other processes will
|
||||
* be blocked by the condition variable until the leader wakes them up.
|
||||
* ---------------
|
||||
*/
|
||||
static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
|
||||
{
|
||||
SharedBitmapState state;
|
||||
|
||||
(void)pthread_mutex_lock(&pstate->cv_mtx);
|
||||
while (1) {
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
state = pstate->state;
|
||||
if (pstate->state == BM_INITIAL) {
|
||||
pstate->state = BM_INPROGRESS;
|
||||
}
|
||||
|
||||
/* Exit if bitmap is done, or if we're the leader. */
|
||||
if (state != BM_INPROGRESS) {
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use pthread_cond_timedwait here in case of worker exit in error cases, and call
|
||||
* CHECK_FOR_INTERRUPTS to handle the error msg from worker.
|
||||
*/
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_REALTIME, &ts);
|
||||
ts.tv_sec += WAIT_BITMAP_INIT_TIMEOUT;
|
||||
(void)pthread_cond_timedwait(&pstate->cv, &pstate->cv_mtx, &ts);
|
||||
}
|
||||
(void)pthread_mutex_unlock(&pstate->cv_mtx);
|
||||
|
||||
return (state == BM_INITIAL);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* ExecBitmapHeapEstimate
|
||||
*
|
||||
* Compute the amount of space we'll need in the parallel
|
||||
* query DSM, and inform pcxt->estimator about our needs.
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
void ExecBitmapHeapEstimate(BitmapHeapScanState *node, ParallelContext *pcxt)
|
||||
{
|
||||
EState *estate = node->ss.ps.state;
|
||||
|
||||
node->pscan_len =
|
||||
add_size(offsetof(ParallelBitmapHeapState, phs_snapshot_data), EstimateSnapshotSpace(estate->es_snapshot));
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* ExecBitmapHeapInitializeDSM
|
||||
*
|
||||
* Set up a parallel bitmap heap scan descriptor.
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
void ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt, int nodeid)
|
||||
{
|
||||
knl_u_parallel_context *cxt = (knl_u_parallel_context *)pcxt->seg;
|
||||
EState *estate = node->ss.ps.state;
|
||||
ParallelBitmapHeapState *pstate = (ParallelBitmapHeapState*)MemoryContextAllocZero(cxt->memCtx,
|
||||
node->pscan_len);
|
||||
|
||||
pstate->tbmiterator = NULL;
|
||||
pstate->prefetch_iterator = NULL;
|
||||
pstate->prefetch_pages = 0;
|
||||
pstate->prefetch_target = 0;
|
||||
pstate->state = BM_INITIAL;
|
||||
pstate->plan_node_id = node->ss.ps.plan->plan_node_id;
|
||||
pstate->pscan_len = node->pscan_len;
|
||||
|
||||
(void)pthread_cond_init(&pstate->cv, NULL);
|
||||
(void)pthread_mutex_init(&pstate->cv_mtx, NULL);
|
||||
SerializeSnapshot(estate->es_snapshot, pstate->phs_snapshot_data,
|
||||
node->pscan_len - offsetof(ParallelBitmapHeapState, phs_snapshot_data));
|
||||
|
||||
cxt->pwCtx->queryInfo.bmscan[nodeid] = pstate;
|
||||
node->pstate = pstate;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* ExecBitmapHeapReInitializeDSM
|
||||
*
|
||||
* Reset shared state before beginning a fresh scan.
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
|
||||
{
|
||||
ParallelBitmapHeapState *pstate = node->pstate;
|
||||
|
||||
/* If there's no DSA, there are no workers; do nothing. */
|
||||
if (t_thrd.bgworker_cxt.memCxt == NULL) {
|
||||
return;
|
||||
}
|
||||
pstate->state = BM_INITIAL;
|
||||
|
||||
if (pstate->tbmiterator != NULL) {
|
||||
tbm_free_shared_area(pstate->tbmiterator);
|
||||
}
|
||||
if (pstate->prefetch_iterator != NULL) {
|
||||
tbm_free_shared_area(pstate->prefetch_iterator);
|
||||
}
|
||||
pstate->tbmiterator = NULL;
|
||||
pstate->prefetch_iterator = NULL;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* ExecBitmapHeapInitializeWorker
|
||||
*
|
||||
* Copy relevant information from TOC into planstate.
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, void *context)
|
||||
{
|
||||
Assert(t_thrd.bgworker_cxt.memCxt != NULL);
|
||||
ParallelBitmapHeapState *pstate = NULL;
|
||||
knl_u_parallel_context *cxt = (knl_u_parallel_context *)context;
|
||||
|
||||
for (int i = 0; i < cxt->pwCtx->queryInfo.bmscan_num; i++) {
|
||||
if (node->ss.ps.plan->plan_node_id == cxt->pwCtx->queryInfo.bmscan[i]->plan_node_id) {
|
||||
pstate = cxt->pwCtx->queryInfo.bmscan[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (pstate == NULL) {
|
||||
ereport(ERROR, (errmsg("could not find plan info, plan node id:%d", node->ss.ps.plan->plan_node_id)));
|
||||
}
|
||||
node->pstate = pstate;
|
||||
|
||||
Snapshot snapshot = RestoreSnapshot(pstate->phs_snapshot_data,
|
||||
pstate->pscan_len - offsetof(ParallelBitmapHeapState, phs_snapshot_data));
|
||||
heap_scan_update_snapshot((HeapScanDesc)node->ss.ss_currentScanDesc, snapshot);
|
||||
}
|
||||
|
||||
|
|
|
@ -91,7 +91,8 @@ Node* MultiExecBitmapIndexScan(BitmapIndexScanState* node)
|
|||
node->biss_result = NULL; /* reset for next time */
|
||||
} else {
|
||||
/* XXX should we use less than u_sess->attr.attr_memory.work_mem for this? */
|
||||
tbm = tbm_create(u_sess->attr.attr_memory.work_mem * 1024L);
|
||||
tbm = tbm_create(u_sess->attr.attr_memory.work_mem * 1024L,
|
||||
((BitmapIndexScan *) node->ss.ps.plan)->isshared ? t_thrd.bgworker_cxt.memCxt : NULL);
|
||||
|
||||
/* If bitmapscan uses global partition index, set tbm to global */
|
||||
if (RelationIsGlobalIndex(node->biss_RelationDesc)) {
|
||||
|
|
|
@ -125,7 +125,8 @@ Node* MultiExecBitmapOr(BitmapOrState* node)
|
|||
/* first subplan */
|
||||
if (result == NULL) {
|
||||
/* XXX should we use less than u_sess->attr.attr_memory.work_mem for this? */
|
||||
result = tbm_create(u_sess->attr.attr_memory.work_mem * 1024L);
|
||||
result = tbm_create(u_sess->attr.attr_memory.work_mem * 1024L,
|
||||
((BitmapOr *) node->ps.plan)->isshared ? t_thrd.bgworker_cxt.memCxt : NULL);
|
||||
/* If bitmapscan uses global partition index, set tbm to global */
|
||||
if (RelationIsGlobalIndex(((BitmapIndexScanState*)subnode)->biss_RelationDesc)) {
|
||||
tbm_set_global(result, true);
|
||||
|
|
|
@ -2047,14 +2047,13 @@ bool ExecParallelScanHashBucket(HashJoinState* hjstate, ExprContext* econtext)
|
|||
return true;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* For right Semi/Anti join, we delete mathced tuples in HashTable to make next matching faster,
|
||||
* so pointer hj_PreTuple is designed to follow the hj_CurTuple and to help us to clear the HashTable.
|
||||
*/
|
||||
if (hjstate->js.jointype == JOIN_RIGHT_SEMI || hjstate->js.jointype == JOIN_RIGHT_ANTI) {
|
||||
hjstate->hj_PreTuple = hashTuple;
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't support parallel right Semi/Anti join, so we don't set hj_PreTuple like ExecScanHashBucket
|
||||
* did. Check ExecScanHashBucket and hash_inner_and_outer.
|
||||
*/
|
||||
Assert(hjstate->js.jointype != JOIN_RIGHT_SEMI);
|
||||
Assert(hjstate->js.jointype != JOIN_RIGHT_ANTI);
|
||||
hashTuple = ExecParallelHashNextTuple(hashtable, hashTuple);
|
||||
}
|
||||
|
||||
|
|
|
@ -1597,7 +1597,6 @@ void ExecHashJoinInitializeDSM(HashJoinState* state, ParallelContext* pcxt, int
|
|||
/* Initialize the shared state in the hash node. */
|
||||
hashNode = (HashState*)innerPlanState(state);
|
||||
hashNode->parallel_state = pstate;
|
||||
t_thrd.bgworker_cxt.memCxt = cxt->memCtx;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
|
@ -1669,5 +1668,4 @@ void ExecHashJoinInitializeWorker(HashJoinState* state, void* pwcxt)
|
|||
hashNode = (HashState*)innerPlanState(state);
|
||||
hashNode->parallel_state = pstate;
|
||||
state->isParallel = true;
|
||||
t_thrd.bgworker_cxt.memCxt = cxt->memCtx;
|
||||
}
|
||||
|
|
|
@ -135,7 +135,7 @@ static bool collectMatchBitmap(GinBtreeData* btree, GinBtreeStack* stack, GinSca
|
|||
|
||||
/* Initialize empty bitmap result */
|
||||
if (!isColStore) {
|
||||
scanEntry->matchBitmap = tbm_create(u_sess->attr.attr_memory.work_mem * 1024L);
|
||||
scanEntry->matchBitmap = tbm_create(u_sess->attr.attr_memory.work_mem * 1024L, NULL);
|
||||
}
|
||||
|
||||
/* Null query cannot partial-match anything */
|
||||
|
|
|
@ -1857,6 +1857,15 @@ HeapTuple heapGetNextForVerify(HeapScanDesc scan, ScanDirection direction, bool&
|
|||
return &(scan->rs_ctup);
|
||||
}
|
||||
|
||||
void heap_scan_update_snapshot(HeapScanDesc scan, Snapshot snapshot)
|
||||
{
|
||||
Assert(IsMVCCSnapshot(snapshot));
|
||||
|
||||
RegisterSnapshot(snapshot);
|
||||
scan->rs_snapshot = snapshot;
|
||||
scan->rs_flags |= SO_TEMP_SNAPSHOT;
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* heap_parallelscan_estimate - estimate storage for ParallelHeapScanDesc
|
||||
*
|
||||
|
|
|
@ -918,6 +918,7 @@ void ParallelWorkerMain(Datum main_arg)
|
|||
|
||||
/* Set flag to indicate that we're initializing a parallel worker. */
|
||||
t_thrd.bgworker_cxt.InitializingParallelWorker = true;
|
||||
t_thrd.bgworker_cxt.memCxt = ctx->memCtx;
|
||||
|
||||
/* Establish signal handlers. */
|
||||
gspqsignal(SIGTERM, die);
|
||||
|
|
|
@ -104,6 +104,8 @@ void *dsm_create(void)
|
|||
|
||||
u_sess->parallel_ctx[i].used = true;
|
||||
slist_init(&u_sess->parallel_ctx[i].on_detach);
|
||||
|
||||
t_thrd.bgworker_cxt.memCxt = u_sess->parallel_ctx[i].memCtx;
|
||||
return &(u_sess->parallel_ctx[i]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -158,7 +158,8 @@ static const char *BuiltinTrancheNames[] = {
|
|||
"PLdebugger",
|
||||
"SharedTupleStore",
|
||||
"parallel_append",
|
||||
"ParallelHashJoinLock"
|
||||
"ParallelHashJoinLock",
|
||||
"TidBitMapLock"
|
||||
};
|
||||
|
||||
static void RegisterLWLockTranches(void);
|
||||
|
|
|
@ -101,6 +101,10 @@ extern void heapgetpage(HeapScanDesc scan, BlockNumber page);
|
|||
extern void heap_rescan(HeapScanDesc scan, ScanKey key);
|
||||
extern void heap_endscan(HeapScanDesc scan);
|
||||
extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
|
||||
/*
|
||||
* Update snapshot used by the scan.
|
||||
*/
|
||||
extern void heap_scan_update_snapshot(HeapScanDesc scan, Snapshot snapshot);
|
||||
|
||||
extern Size heap_parallelscan_estimate(Snapshot snapshot);
|
||||
extern void heap_parallelscan_initialize(ParallelHeapScanDesc target, Size pscan_len, Relation relation,
|
||||
|
|
|
@ -15,10 +15,15 @@
|
|||
#define NODEBITMAPHEAPSCAN_H
|
||||
|
||||
#include "nodes/execnodes.h"
|
||||
#include "access/parallel.h"
|
||||
|
||||
extern BitmapHeapScanState* ExecInitBitmapHeapScan(BitmapHeapScan* node, EState* estate, int eflags);
|
||||
extern TupleTableSlot* ExecBitmapHeapScan(BitmapHeapScanState* node);
|
||||
extern void ExecEndBitmapHeapScan(BitmapHeapScanState* node);
|
||||
extern void ExecReScanBitmapHeapScan(BitmapHeapScanState* node);
|
||||
extern void ExecBitmapHeapEstimate(BitmapHeapScanState *node, ParallelContext *pcxt);
|
||||
extern void ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt, int nodeid);
|
||||
extern void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt);
|
||||
extern void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, void *context);
|
||||
|
||||
#endif /* NODEBITMAPHEAPSCAN_H */
|
||||
|
|
|
@ -2059,6 +2059,7 @@ struct ParallelHeapScanDescData;
|
|||
struct ParallelIndexScanDescData;
|
||||
struct ParallelHashJoinState;
|
||||
struct SharedHashInfo;
|
||||
struct ParallelBitmapHeapState;
|
||||
typedef uint64 XLogRecPtr;
|
||||
typedef struct ParallelQueryInfo {
|
||||
struct SharedExecutorInstrumentation* instrumentation;
|
||||
|
@ -2079,6 +2080,8 @@ typedef struct ParallelQueryInfo {
|
|||
ParallelHashJoinState** jstate;
|
||||
int hash_num;
|
||||
SharedHashInfo** shared_info;
|
||||
int bmscan_num;
|
||||
ParallelBitmapHeapState **bmscan;
|
||||
} ParallelQueryInfo;
|
||||
|
||||
struct BTShared;
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "executor/instrument.h"
|
||||
#include "nodes/params.h"
|
||||
#include "nodes/plannodes.h"
|
||||
#include "nodes/tidbitmap.h"
|
||||
#include "storage/pagecompress.h"
|
||||
#include "utils/bloom_filter.h"
|
||||
#include "utils/reltrigger.h"
|
||||
|
@ -1714,15 +1715,65 @@ typedef struct BitmapIndexScanState {
|
|||
} BitmapIndexScanState;
|
||||
|
||||
/* ----------------
|
||||
* BitmapHeapScanState information
|
||||
* SharedBitmapState information
|
||||
*
|
||||
* bitmapqualorig execution state for bitmapqualorig expressions
|
||||
* tbm bitmap obtained from child index scan(s)
|
||||
* tbmiterator iterator for scanning current pages
|
||||
* tbmres current-page data
|
||||
* prefetch_iterator iterator for prefetching ahead of current page
|
||||
* prefetch_pages # pages prefetch iterator is ahead of current
|
||||
* prefetch_target target prefetch distance
|
||||
* BM_INITIAL TIDBitmap creation is not yet started, so first worker
|
||||
* to see this state will set the state to BM_INPROGRESS
|
||||
* and that process will be responsible for creating
|
||||
* TIDBitmap.
|
||||
* BM_INPROGRESS TIDBitmap creation is in progress; workers need to
|
||||
* sleep until it's finished.
|
||||
* BM_FINISHED TIDBitmap creation is done, so now all workers can
|
||||
* proceed to iterate over TIDBitmap.
|
||||
* ----------------
|
||||
*/
|
||||
typedef enum {
|
||||
BM_INITIAL,
|
||||
BM_INPROGRESS,
|
||||
BM_FINISHED
|
||||
} SharedBitmapState;
|
||||
|
||||
/* ----------------
|
||||
* ParallelBitmapHeapState information
|
||||
* tbmiterator iterator for scanning current pages
|
||||
* prefetch_iterator iterator for prefetching ahead of current page
|
||||
* mutex mutual exclusion for the prefetching variable
|
||||
* and state
|
||||
* prefetch_pages # pages prefetch iterator is ahead of current
|
||||
* prefetch_target current target prefetch distance
|
||||
* state current state of the TIDBitmap
|
||||
* cv conditional wait variable
|
||||
* phs_snapshot_data snapshot data shared to workers
|
||||
* ----------------
|
||||
*/
|
||||
typedef struct ParallelBitmapHeapState {
|
||||
TBMSharedIteratorState *tbmiterator;
|
||||
TBMSharedIteratorState *prefetch_iterator;
|
||||
Size pscan_len;
|
||||
int prefetch_pages;
|
||||
int prefetch_target;
|
||||
int plan_node_id;
|
||||
SharedBitmapState state;
|
||||
pthread_mutex_t cv_mtx;
|
||||
pthread_cond_t cv;
|
||||
char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
|
||||
} ParallelBitmapHeapState;
|
||||
|
||||
/* ----------------
|
||||
* BitmapHeapScanState information
|
||||
*
|
||||
* bitmapqualorig execution state for bitmapqualorig expressions
|
||||
* tbm bitmap obtained from child index scan(s)
|
||||
* tbmiterator iterator for scanning current pages
|
||||
* tbmres current-page data
|
||||
* prefetch_iterator iterator for prefetching ahead of current page
|
||||
* prefetch_pages # pages prefetch iterator is ahead of current
|
||||
* prefetch_target target prefetch distance
|
||||
* pscan_len size of the shared memory for parallel bitmap
|
||||
* initialized is node is ready to iterate
|
||||
* shared_tbmiterator shared iterator
|
||||
* shared_prefetch_iterator shared iterator for prefetching
|
||||
* pstate shared state for parallel bitmap scan
|
||||
* ----------------
|
||||
*/
|
||||
typedef struct BitmapHeapScanState {
|
||||
|
@ -1735,6 +1786,11 @@ typedef struct BitmapHeapScanState {
|
|||
int prefetch_pages;
|
||||
int prefetch_target;
|
||||
GPIScanDesc gpi_scan; /* global partition index scan use information */
|
||||
Size pscan_len;
|
||||
bool initialized;
|
||||
TBMSharedIterator *shared_tbmiterator;
|
||||
TBMSharedIterator *shared_prefetch_iterator;
|
||||
ParallelBitmapHeapState *pstate;
|
||||
} BitmapHeapScanState;
|
||||
|
||||
/* ----------------
|
||||
|
|
|
@ -507,6 +507,7 @@ typedef struct BitmapAnd {
|
|||
*/
|
||||
typedef struct BitmapOr {
|
||||
Plan plan;
|
||||
bool isshared;
|
||||
List* bitmapplans;
|
||||
} BitmapOr;
|
||||
|
||||
|
@ -683,6 +684,7 @@ typedef struct IndexOnlyScan {
|
|||
typedef struct BitmapIndexScan {
|
||||
Scan scan;
|
||||
Oid indexid; /* OID of index to scan */
|
||||
bool isshared;
|
||||
char* indexname; /* name of index to scan */
|
||||
List* indexqual; /* list of index quals (OpExprs) */
|
||||
List* indexqualorig; /* the same in original form */
|
||||
|
|
|
@ -32,6 +32,8 @@ typedef struct TIDBitmap TIDBitmap;
|
|||
|
||||
/* Likewise, TBMIterator is private */
|
||||
typedef struct TBMIterator TBMIterator;
|
||||
typedef struct TBMSharedIterator TBMSharedIterator;
|
||||
typedef struct TBMSharedIteratorState TBMSharedIteratorState;
|
||||
|
||||
/* Result structure for tbm_iterate */
|
||||
typedef struct {
|
||||
|
@ -44,8 +46,9 @@ typedef struct {
|
|||
} TBMIterateResult;
|
||||
|
||||
/* function prototypes in nodes/tidbitmap.c */
|
||||
extern TIDBitmap* tbm_create(long maxbytes);
|
||||
extern TIDBitmap* tbm_create(long maxbytes, MemoryContext dsa);
|
||||
extern void tbm_free(TIDBitmap* tbm);
|
||||
extern void tbm_free_shared_area(TBMSharedIteratorState *istate);
|
||||
|
||||
extern void tbm_add_tuples(
|
||||
TIDBitmap* tbm, const ItemPointer tids, int ntids, bool recheck, Oid partitionOid = InvalidOid);
|
||||
|
@ -57,8 +60,12 @@ extern void tbm_intersect(TIDBitmap* a, const TIDBitmap* b);
|
|||
extern bool tbm_is_empty(const TIDBitmap* tbm);
|
||||
|
||||
extern TBMIterator* tbm_begin_iterate(TIDBitmap* tbm);
|
||||
extern TBMSharedIteratorState* tbm_prepare_shared_iterate(TIDBitmap *tbm);
|
||||
extern TBMIterateResult* tbm_iterate(TBMIterator* iterator);
|
||||
extern TBMIterateResult* tbm_shared_iterate(TBMSharedIterator *iterator);
|
||||
extern void tbm_end_iterate(TBMIterator* iterator);
|
||||
extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
|
||||
extern TBMSharedIterator *tbm_attach_shared_iterate(TBMSharedIteratorState* istate);
|
||||
extern bool tbm_is_global(const TIDBitmap* tbm);
|
||||
extern void tbm_set_global(TIDBitmap* tbm, bool isGlobal);
|
||||
#endif /* TIDBITMAP_H */
|
||||
|
|
|
@ -179,6 +179,8 @@ extern double estimate_hash_num_distinct(PlannerInfo* root, List* hashkey, Path*
|
|||
double local_ndistinct, double global_ndistinct, bool* usesinglestats);
|
||||
extern RelOptInfo* find_join_input_rel(PlannerInfo* root, Relids relids);
|
||||
extern double compute_sort_disk_cost(double input_bytes, double sort_mem_bytes);
|
||||
extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel,
|
||||
Path *bitmapqual, double loop_count, Cost *cost, double *tuple, bool ispartitionedindex);
|
||||
|
||||
extern double approx_tuple_count(PlannerInfo* root, JoinPath* path, List* quals);
|
||||
extern void set_rel_path_rows(Path* path, RelOptInfo* rel, ParamPathInfo* param_info);
|
||||
|
|
|
@ -66,8 +66,8 @@ extern bool CheckBitmapHeapPathContainGlobalOrLocal(Path* bitmapqual);
|
|||
extern bool check_bitmap_heap_path_index_unusable(Path* bitmapqual, RelOptInfo* baserel);
|
||||
extern bool is_partitionIndex_Subpath(Path* subpath);
|
||||
extern bool is_pwj_path(Path* pwjpath);
|
||||
extern BitmapHeapPath* create_bitmap_heap_path(
|
||||
PlannerInfo* root, RelOptInfo* rel, Path* bitmapqual, Relids required_outer, double loop_count);
|
||||
extern BitmapHeapPath *create_bitmap_heap_path(PlannerInfo *root, RelOptInfo *rel, Path *bitmapqual,
|
||||
Relids required_outer, double loop_count, int parallel_degree);
|
||||
extern BitmapAndPath* create_bitmap_and_path(PlannerInfo* root, RelOptInfo* rel, List* bitmapquals);
|
||||
extern BitmapOrPath* create_bitmap_or_path(PlannerInfo* root, RelOptInfo* rel, List* bitmapquals);
|
||||
extern TidPath* create_tidscan_path(PlannerInfo* root, RelOptInfo* rel, List* tidquals);
|
||||
|
|
|
@ -21,6 +21,7 @@ extern RelOptInfo* standard_join_search(PlannerInfo* root, int levels_needed, Li
|
|||
|
||||
extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel);
|
||||
extern int compute_parallel_worker(const RelOptInfo *rel, double heap_pages, double index_pages, int max_workers);
|
||||
extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel, Path *bitmapqual);
|
||||
|
||||
extern void set_rel_size(PlannerInfo* root, RelOptInfo* rel, Index rti, RangeTblEntry* rte);
|
||||
|
||||
|
|
|
@ -146,6 +146,7 @@ enum BuiltinTrancheIds {
|
|||
LWTRANCHE_SHARED_TUPLESTORE,
|
||||
LWTRANCHE_PARALLEL_APPEND,
|
||||
LWTRANCHE_PARALLEL_HASH_JOIN,
|
||||
LWTRANCHE_TBM,
|
||||
/*
|
||||
* Each trancheId above should have a corresponding item in BuiltinTrancheNames;
|
||||
*/
|
||||
|
|
|
@ -82,18 +82,24 @@ explain (costs off) select * from a where a1 > 4 union all select * from b where
|
|||
(10 rows)
|
||||
|
||||
explain (costs off) select * from c where c1 in (select a1 from a union select b1 from b);
|
||||
QUERY PLAN
|
||||
---------------------------------
|
||||
QUERY PLAN
|
||||
------------------------------------------------
|
||||
Hash Join
|
||||
Hash Cond: (a.a1 = c.c1)
|
||||
-> HashAggregate
|
||||
Group By Key: a.a1
|
||||
-> Append
|
||||
-> Seq Scan on a
|
||||
-> Seq Scan on b
|
||||
-> Gather
|
||||
Number of Workers: 1
|
||||
-> Parallel Seq Scan on a
|
||||
-> Gather
|
||||
Number of Workers: 1
|
||||
-> Parallel Seq Scan on b
|
||||
-> Hash
|
||||
-> Seq Scan on c
|
||||
(9 rows)
|
||||
-> Gather
|
||||
Number of Workers: 1
|
||||
-> Parallel Seq Scan on c
|
||||
(15 rows)
|
||||
|
||||
explain (costs off) select * from (select * from a union all select * from b) as ta, c where ta.a1 = c.c1;
|
||||
QUERY PLAN
|
||||
|
|
|
@ -72,14 +72,539 @@ select count(*) from parallel_t1 where a <> 5000;
|
|||
99999
|
||||
(1 row)
|
||||
|
||||
--normal plan for bitmapscan
|
||||
create index idx_parallel_t1 on parallel_t1(a);
|
||||
analyze parallel_t1;
|
||||
set enable_seqscan to off;
|
||||
set enable_indexscan to off;
|
||||
explain (costs off) select count(*) from parallel_t1 where a > 5000;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------
|
||||
Aggregate
|
||||
-> Bitmap Heap Scan on parallel_t1
|
||||
Recheck Cond: (a > 5000)
|
||||
-> Bitmap Index Scan on idx_parallel_t1
|
||||
Index Cond: (a > 5000)
|
||||
(5 rows)
|
||||
|
||||
explain (costs off) select count(*) from parallel_t1 where a < 5000;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------
|
||||
Aggregate
|
||||
-> Bitmap Heap Scan on parallel_t1
|
||||
Recheck Cond: (a < 5000)
|
||||
-> Bitmap Index Scan on idx_parallel_t1
|
||||
Index Cond: (a < 5000)
|
||||
(5 rows)
|
||||
|
||||
explain (costs off) select count(*) from parallel_t1 where a <> 5000;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------
|
||||
Aggregate
|
||||
-> Bitmap Heap Scan on parallel_t1
|
||||
Filter: (a <> 5000)
|
||||
-> Bitmap Index Scan on idx_parallel_t1
|
||||
(4 rows)
|
||||
|
||||
select count(*) from parallel_t1 where a > 5000;
|
||||
count
|
||||
-------
|
||||
95000
|
||||
(1 row)
|
||||
|
||||
select count(*) from parallel_t1 where a < 5000;
|
||||
count
|
||||
-------
|
||||
4999
|
||||
(1 row)
|
||||
|
||||
select count(*) from parallel_t1 where a <> 5000;
|
||||
count
|
||||
-------
|
||||
99999
|
||||
(1 row)
|
||||
|
||||
reset enable_seqscan;
|
||||
reset enable_indexscan;
|
||||
--set parallel parameter
|
||||
set force_parallel_mode=on;
|
||||
set parallel_setup_cost=0;
|
||||
set parallel_tuple_cost=0.000005;
|
||||
set max_parallel_workers_per_gather=2;
|
||||
set min_parallel_table_scan_size=0;
|
||||
set min_parallel_index_scan_size=0;
|
||||
set parallel_leader_participation=on;
|
||||
--rescan case
|
||||
create table test_with_rescan(dm int, sj_dm int, name text);
|
||||
insert into test_with_rescan values(1,0,'universe');
|
||||
insert into test_with_rescan values(2,1,'galaxy');
|
||||
insert into test_with_rescan values(3,2,'sun');
|
||||
insert into test_with_rescan values(4,3,'earth');
|
||||
insert into test_with_rescan values(5,4,'asia');
|
||||
insert into test_with_rescan values(6,5,'China');
|
||||
insert into test_with_rescan values(7,6,'shaanxi');
|
||||
insert into test_with_rescan values(8,7,'xian');
|
||||
insert into test_with_rescan values(9,8,'huawei');
|
||||
insert into test_with_rescan values(10,9,'v10');
|
||||
insert into test_with_rescan values(11,10,'v10-3L');
|
||||
insert into test_with_rescan values(12,11,'gauss');
|
||||
insert into test_with_rescan values(13,12,'test');
|
||||
insert into test_with_rescan values(14,13,'test');
|
||||
insert into test_with_rescan values(15,14,'test');
|
||||
insert into test_with_rescan values(16,15,'test');
|
||||
insert into test_with_rescan values(17,16,'test');
|
||||
insert into test_with_rescan values(18,17,'test');
|
||||
insert into test_with_rescan values(19,18,'test');
|
||||
insert into test_with_rescan values(20,19,'test');
|
||||
create index on test_with_rescan(dm);
|
||||
create index on test_with_rescan(sj_dm);
|
||||
create index on test_with_rescan(name);
|
||||
analyze test_with_rescan;
|
||||
explain (costs off)
|
||||
WITH recursive t_result AS (
|
||||
select * from(
|
||||
SELECT dm,sj_dm,name,1 as level
|
||||
FROM test_with_rescan
|
||||
WHERE sj_dm < 10 order by dm limit 6 offset 2)
|
||||
UNION all
|
||||
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
|
||||
FROM t_result t1
|
||||
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
|
||||
)
|
||||
SELECT *
|
||||
FROM t_result t;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------------------------
|
||||
CTE Scan on t_result t
|
||||
CTE t_result
|
||||
-> Recursive Union
|
||||
-> Limit
|
||||
-> Sort
|
||||
Sort Key: test_with_rescan.dm
|
||||
-> Gather
|
||||
Number of Workers: 1
|
||||
-> Parallel Seq Scan on test_with_rescan
|
||||
Filter: (sj_dm < 10)
|
||||
-> Hash Join
|
||||
Hash Cond: (t1.dm = t2.sj_dm)
|
||||
-> WorkTable Scan on t_result t1
|
||||
-> Hash
|
||||
-> Gather
|
||||
Number of Workers: 1
|
||||
-> Parallel Seq Scan on test_with_rescan t2
|
||||
(17 rows)
|
||||
|
||||
WITH recursive t_result AS (
|
||||
select * from(
|
||||
SELECT dm,sj_dm,name,1 as level
|
||||
FROM test_with_rescan
|
||||
WHERE sj_dm < 10 order by dm limit 6 offset 2)
|
||||
UNION all
|
||||
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
|
||||
FROM t_result t1
|
||||
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
|
||||
)
|
||||
SELECT *
|
||||
FROM t_result t order by 1,2,3,4;
|
||||
dm | sj_dm | name | level
|
||||
----+-------+-------------------------------------------------------------------------------------------------------------------------------------+-------
|
||||
3 | 2 | sun | 1
|
||||
4 | 3 | earth | 1
|
||||
4 | 3 | earth > sun | 2
|
||||
5 | 4 | asia | 1
|
||||
5 | 4 | asia > earth | 2
|
||||
5 | 4 | asia > earth > sun | 3
|
||||
6 | 5 | China | 1
|
||||
6 | 5 | China > asia | 2
|
||||
6 | 5 | China > asia > earth | 3
|
||||
6 | 5 | China > asia > earth > sun | 4
|
||||
7 | 6 | shaanxi | 1
|
||||
7 | 6 | shaanxi > China | 2
|
||||
7 | 6 | shaanxi > China > asia | 3
|
||||
7 | 6 | shaanxi > China > asia > earth | 4
|
||||
7 | 6 | shaanxi > China > asia > earth > sun | 5
|
||||
8 | 7 | xian | 1
|
||||
8 | 7 | xian > shaanxi | 2
|
||||
8 | 7 | xian > shaanxi > China | 3
|
||||
8 | 7 | xian > shaanxi > China > asia | 4
|
||||
8 | 7 | xian > shaanxi > China > asia > earth | 5
|
||||
8 | 7 | xian > shaanxi > China > asia > earth > sun | 6
|
||||
9 | 8 | huawei > xian | 2
|
||||
9 | 8 | huawei > xian > shaanxi | 3
|
||||
9 | 8 | huawei > xian > shaanxi > China | 4
|
||||
9 | 8 | huawei > xian > shaanxi > China > asia | 5
|
||||
9 | 8 | huawei > xian > shaanxi > China > asia > earth | 6
|
||||
9 | 8 | huawei > xian > shaanxi > China > asia > earth > sun | 7
|
||||
10 | 9 | v10 > huawei > xian | 3
|
||||
10 | 9 | v10 > huawei > xian > shaanxi | 4
|
||||
10 | 9 | v10 > huawei > xian > shaanxi > China | 5
|
||||
10 | 9 | v10 > huawei > xian > shaanxi > China > asia | 6
|
||||
10 | 9 | v10 > huawei > xian > shaanxi > China > asia > earth | 7
|
||||
10 | 9 | v10 > huawei > xian > shaanxi > China > asia > earth > sun | 8
|
||||
11 | 10 | v10-3L > v10 > huawei > xian | 4
|
||||
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi | 5
|
||||
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China | 6
|
||||
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia | 7
|
||||
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 8
|
||||
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 9
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian | 5
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi | 6
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 7
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 8
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 9
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 10
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian | 6
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 7
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 8
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 9
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 10
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 11
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian | 7
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 8
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 9
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 10
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 11
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 12
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian | 8
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 9
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 10
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 11
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 12
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 13
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 9
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 10
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 11
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 12
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 13
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 14
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 10
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 11
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 12
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 13
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 14
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 15
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 11
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 12
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 13
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 14
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 15
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 16
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 12
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 13
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 14
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 15
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 16
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 17
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 13
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 14
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 15
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 16
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 17
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 18
|
||||
(93 rows)
|
||||
|
||||
--increate cpu_tuple_cost, disable seqscan, test parallel index scan
|
||||
set enable_seqscan=off;
|
||||
set cpu_tuple_cost=1000;
|
||||
explain (costs off)
|
||||
WITH recursive t_result AS (
|
||||
select * from(
|
||||
SELECT dm,sj_dm,name,1 as level
|
||||
FROM test_with_rescan
|
||||
WHERE sj_dm < 10 order by dm limit 6 offset 2)
|
||||
UNION all
|
||||
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
|
||||
FROM t_result t1
|
||||
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
|
||||
)
|
||||
SELECT *
|
||||
FROM t_result t;
|
||||
QUERY PLAN
|
||||
-------------------------------------------------------------------------------------------------------------
|
||||
CTE Scan on t_result t
|
||||
CTE t_result
|
||||
-> Recursive Union
|
||||
-> Limit
|
||||
-> Sort
|
||||
Sort Key: test_with_rescan.dm
|
||||
-> Gather
|
||||
Number of Workers: 1
|
||||
-> Parallel Index Scan using test_with_rescan_sj_dm_idx on test_with_rescan
|
||||
Index Cond: (sj_dm < 10)
|
||||
-> Merge Join
|
||||
Merge Cond: (t1.dm = t2.sj_dm)
|
||||
-> Sort
|
||||
Sort Key: t1.dm
|
||||
-> WorkTable Scan on t_result t1
|
||||
-> Sort
|
||||
Sort Key: t2.sj_dm
|
||||
-> Gather
|
||||
Number of Workers: 1
|
||||
-> Parallel Index Scan using test_with_rescan_sj_dm_idx on test_with_rescan t2
|
||||
(20 rows)
|
||||
|
||||
WITH recursive t_result AS (
|
||||
select * from(
|
||||
SELECT dm,sj_dm,name,1 as level
|
||||
FROM test_with_rescan
|
||||
WHERE sj_dm < 10 order by dm limit 6 offset 2)
|
||||
UNION all
|
||||
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
|
||||
FROM t_result t1
|
||||
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
|
||||
)
|
||||
SELECT *
|
||||
FROM t_result t order by 1,2,3,4;
|
||||
dm | sj_dm | name | level
|
||||
----+-------+-------------------------------------------------------------------------------------------------------------------------------------+-------
|
||||
3 | 2 | sun | 1
|
||||
4 | 3 | earth | 1
|
||||
4 | 3 | earth > sun | 2
|
||||
5 | 4 | asia | 1
|
||||
5 | 4 | asia > earth | 2
|
||||
5 | 4 | asia > earth > sun | 3
|
||||
6 | 5 | China | 1
|
||||
6 | 5 | China > asia | 2
|
||||
6 | 5 | China > asia > earth | 3
|
||||
6 | 5 | China > asia > earth > sun | 4
|
||||
7 | 6 | shaanxi | 1
|
||||
7 | 6 | shaanxi > China | 2
|
||||
7 | 6 | shaanxi > China > asia | 3
|
||||
7 | 6 | shaanxi > China > asia > earth | 4
|
||||
7 | 6 | shaanxi > China > asia > earth > sun | 5
|
||||
8 | 7 | xian | 1
|
||||
8 | 7 | xian > shaanxi | 2
|
||||
8 | 7 | xian > shaanxi > China | 3
|
||||
8 | 7 | xian > shaanxi > China > asia | 4
|
||||
8 | 7 | xian > shaanxi > China > asia > earth | 5
|
||||
8 | 7 | xian > shaanxi > China > asia > earth > sun | 6
|
||||
9 | 8 | huawei > xian | 2
|
||||
9 | 8 | huawei > xian > shaanxi | 3
|
||||
9 | 8 | huawei > xian > shaanxi > China | 4
|
||||
9 | 8 | huawei > xian > shaanxi > China > asia | 5
|
||||
9 | 8 | huawei > xian > shaanxi > China > asia > earth | 6
|
||||
9 | 8 | huawei > xian > shaanxi > China > asia > earth > sun | 7
|
||||
10 | 9 | v10 > huawei > xian | 3
|
||||
10 | 9 | v10 > huawei > xian > shaanxi | 4
|
||||
10 | 9 | v10 > huawei > xian > shaanxi > China | 5
|
||||
10 | 9 | v10 > huawei > xian > shaanxi > China > asia | 6
|
||||
10 | 9 | v10 > huawei > xian > shaanxi > China > asia > earth | 7
|
||||
10 | 9 | v10 > huawei > xian > shaanxi > China > asia > earth > sun | 8
|
||||
11 | 10 | v10-3L > v10 > huawei > xian | 4
|
||||
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi | 5
|
||||
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China | 6
|
||||
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia | 7
|
||||
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 8
|
||||
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 9
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian | 5
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi | 6
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 7
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 8
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 9
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 10
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian | 6
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 7
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 8
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 9
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 10
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 11
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian | 7
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 8
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 9
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 10
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 11
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 12
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian | 8
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 9
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 10
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 11
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 12
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 13
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 9
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 10
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 11
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 12
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 13
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 14
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 10
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 11
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 12
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 13
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 14
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 15
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 11
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 12
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 13
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 14
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 15
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 16
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 12
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 13
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 14
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 15
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 16
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 17
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 13
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 14
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 15
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 16
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 17
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 18
|
||||
(93 rows)
|
||||
|
||||
--disable indexscan, test parallel bitmap scan
|
||||
set enable_indexscan to off;
|
||||
explain (costs off)
|
||||
WITH recursive t_result AS (
|
||||
select * from(
|
||||
SELECT dm,sj_dm,name,1 as level
|
||||
FROM test_with_rescan
|
||||
WHERE sj_dm < 10 order by dm limit 6 offset 2)
|
||||
UNION all
|
||||
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
|
||||
FROM t_result t1
|
||||
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
|
||||
)
|
||||
SELECT *
|
||||
FROM t_result t;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------------------------
|
||||
CTE Scan on t_result t
|
||||
CTE t_result
|
||||
-> Recursive Union
|
||||
-> Limit
|
||||
-> Sort
|
||||
Sort Key: test_with_rescan.dm
|
||||
-> Gather
|
||||
Number of Workers: 1
|
||||
-> Parallel Bitmap Heap Scan on test_with_rescan
|
||||
Recheck Cond: (sj_dm < 10)
|
||||
-> Bitmap Index Scan on test_with_rescan_sj_dm_idx
|
||||
Index Cond: (sj_dm < 10)
|
||||
-> Nested Loop
|
||||
-> WorkTable Scan on t_result t1
|
||||
-> Bitmap Heap Scan on test_with_rescan t2
|
||||
Recheck Cond: (sj_dm = t1.dm)
|
||||
-> Bitmap Index Scan on test_with_rescan_sj_dm_idx
|
||||
Index Cond: (sj_dm = t1.dm)
|
||||
(18 rows)
|
||||
|
||||
WITH recursive t_result AS (
|
||||
select * from(
|
||||
SELECT dm,sj_dm,name,1 as level
|
||||
FROM test_with_rescan
|
||||
WHERE sj_dm < 10 order by dm limit 6 offset 2)
|
||||
UNION all
|
||||
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
|
||||
FROM t_result t1
|
||||
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
|
||||
)
|
||||
SELECT *
|
||||
FROM t_result t order by 1,2,3,4;
|
||||
dm | sj_dm | name | level
|
||||
----+-------+-------------------------------------------------------------------------------------------------------------------------------------+-------
|
||||
3 | 2 | sun | 1
|
||||
4 | 3 | earth | 1
|
||||
4 | 3 | earth > sun | 2
|
||||
5 | 4 | asia | 1
|
||||
5 | 4 | asia > earth | 2
|
||||
5 | 4 | asia > earth > sun | 3
|
||||
6 | 5 | China | 1
|
||||
6 | 5 | China > asia | 2
|
||||
6 | 5 | China > asia > earth | 3
|
||||
6 | 5 | China > asia > earth > sun | 4
|
||||
7 | 6 | shaanxi | 1
|
||||
7 | 6 | shaanxi > China | 2
|
||||
7 | 6 | shaanxi > China > asia | 3
|
||||
7 | 6 | shaanxi > China > asia > earth | 4
|
||||
7 | 6 | shaanxi > China > asia > earth > sun | 5
|
||||
8 | 7 | xian | 1
|
||||
8 | 7 | xian > shaanxi | 2
|
||||
8 | 7 | xian > shaanxi > China | 3
|
||||
8 | 7 | xian > shaanxi > China > asia | 4
|
||||
8 | 7 | xian > shaanxi > China > asia > earth | 5
|
||||
8 | 7 | xian > shaanxi > China > asia > earth > sun | 6
|
||||
9 | 8 | huawei > xian | 2
|
||||
9 | 8 | huawei > xian > shaanxi | 3
|
||||
9 | 8 | huawei > xian > shaanxi > China | 4
|
||||
9 | 8 | huawei > xian > shaanxi > China > asia | 5
|
||||
9 | 8 | huawei > xian > shaanxi > China > asia > earth | 6
|
||||
9 | 8 | huawei > xian > shaanxi > China > asia > earth > sun | 7
|
||||
10 | 9 | v10 > huawei > xian | 3
|
||||
10 | 9 | v10 > huawei > xian > shaanxi | 4
|
||||
10 | 9 | v10 > huawei > xian > shaanxi > China | 5
|
||||
10 | 9 | v10 > huawei > xian > shaanxi > China > asia | 6
|
||||
10 | 9 | v10 > huawei > xian > shaanxi > China > asia > earth | 7
|
||||
10 | 9 | v10 > huawei > xian > shaanxi > China > asia > earth > sun | 8
|
||||
11 | 10 | v10-3L > v10 > huawei > xian | 4
|
||||
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi | 5
|
||||
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China | 6
|
||||
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia | 7
|
||||
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 8
|
||||
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 9
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian | 5
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi | 6
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 7
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 8
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 9
|
||||
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 10
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian | 6
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 7
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 8
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 9
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 10
|
||||
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 11
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian | 7
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 8
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 9
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 10
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 11
|
||||
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 12
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian | 8
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 9
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 10
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 11
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 12
|
||||
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 13
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 9
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 10
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 11
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 12
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 13
|
||||
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 14
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 10
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 11
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 12
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 13
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 14
|
||||
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 15
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 11
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 12
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 13
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 14
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 15
|
||||
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 16
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 12
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 13
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 14
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 15
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 16
|
||||
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 17
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 13
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 14
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 15
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 16
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 17
|
||||
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 18
|
||||
(93 rows)
|
||||
|
||||
drop table test_with_rescan;
|
||||
reset enable_seqscan;
|
||||
reset enable_indexscan;
|
||||
reset cpu_tuple_cost;
|
||||
--parallel plan for seq scan
|
||||
set enable_bitmapscan to off;
|
||||
set enable_indexscan to off;
|
||||
explain (costs off) select count(*) from parallel_t1;
|
||||
QUERY PLAN
|
||||
----------------------------------------------
|
||||
|
@ -174,6 +699,100 @@ explain (costs off,analyse on,verbose on) select count(*) from parallel_t1;
|
|||
--? Total runtime: [0-9]*\.[0-9]*\.\.[0-9]*\.[0-9]* ms
|
||||
(10 rows)
|
||||
|
||||
reset enable_indexscan;
|
||||
reset enable_bitmapscan;
|
||||
--parallel plan for bitmap scan
|
||||
--onepage case
|
||||
CREATE TABLE onepage1 (val int, val2 int);
|
||||
ALTER TABLE onepage1 ADD PRIMARY KEY(val, val2);
|
||||
NOTICE: ALTER TABLE / ADD PRIMARY KEY will create implicit index "onepage1_pkey" for table "onepage1"
|
||||
insert into onepage1 (select * from generate_series(1, 5) a, generate_series(1, 5) b);
|
||||
CREATE TABLE onepage2 as select * from onepage1;
|
||||
explain select * from onepage2 natural join onepage1 where onepage2.val > 2 and onepage1.val < 4;
|
||||
QUERY PLAN
|
||||
-----------------------------------------------------------------------------------------------
|
||||
Gather (cost=15.04..32.69 rows=1 width=8)
|
||||
Number of Workers: 2
|
||||
-> Parallel Hash Join (cost=15.04..32.69 rows=1 width=8)
|
||||
Hash Cond: ((onepage2.val = onepage1.val) AND (onepage2.val2 = onepage1.val2))
|
||||
-> Parallel Seq Scan on onepage2 (cost=0.00..17.60 rows=5 width=8)
|
||||
Filter: ((val > 2) AND (val < 4))
|
||||
-> Parallel Hash (cost=14.97..14.97 rows=5 width=8)
|
||||
-> Parallel Bitmap Heap Scan on onepage1 (cost=4.36..14.97 rows=5 width=8)
|
||||
Recheck Cond: ((val < 4) AND (val > 2))
|
||||
-> Bitmap Index Scan on onepage1_pkey (cost=0.00..4.36 rows=11 width=0)
|
||||
Index Cond: ((val < 4) AND (val > 2))
|
||||
(11 rows)
|
||||
|
||||
select * from onepage2 natural join onepage1 where onepage2.val > 2 and onepage1.val < 4 order by 1,2;
|
||||
val | val2
|
||||
-----+------
|
||||
3 | 1
|
||||
3 | 2
|
||||
3 | 3
|
||||
3 | 4
|
||||
3 | 5
|
||||
(5 rows)
|
||||
|
||||
drop table onepage1;
|
||||
drop table onepage2;
|
||||
set enable_seqscan to off;
|
||||
set enable_indexscan to off;
|
||||
explain (costs off) select count(*) from parallel_t1 where a > 5000;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------
|
||||
Aggregate
|
||||
-> Gather
|
||||
Number of Workers: 2
|
||||
-> Parallel Bitmap Heap Scan on parallel_t1
|
||||
Recheck Cond: (a > 5000)
|
||||
-> Bitmap Index Scan on idx_parallel_t1
|
||||
Index Cond: (a > 5000)
|
||||
(7 rows)
|
||||
|
||||
explain (costs off) select count(*) from parallel_t1 where a < 5000;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------
|
||||
Aggregate
|
||||
-> Gather
|
||||
Number of Workers: 2
|
||||
-> Parallel Bitmap Heap Scan on parallel_t1
|
||||
Recheck Cond: (a < 5000)
|
||||
-> Bitmap Index Scan on idx_parallel_t1
|
||||
Index Cond: (a < 5000)
|
||||
(7 rows)
|
||||
|
||||
explain (costs off) select count(*) from parallel_t1 where a <> 5000;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------
|
||||
Aggregate
|
||||
-> Gather
|
||||
Number of Workers: 2
|
||||
-> Parallel Bitmap Heap Scan on parallel_t1
|
||||
Filter: (a <> 5000)
|
||||
-> Bitmap Index Scan on idx_parallel_t1
|
||||
(6 rows)
|
||||
|
||||
select count(*) from parallel_t1 where a > 5000;
|
||||
count
|
||||
-------
|
||||
95000
|
||||
(1 row)
|
||||
|
||||
select count(*) from parallel_t1 where a < 5000;
|
||||
count
|
||||
-------
|
||||
4999
|
||||
(1 row)
|
||||
|
||||
select count(*) from parallel_t1 where a <> 5000;
|
||||
count
|
||||
-------
|
||||
99999
|
||||
(1 row)
|
||||
|
||||
reset enable_seqscan;
|
||||
reset enable_indexscan;
|
||||
--clean up
|
||||
reset force_parallel_mode;
|
||||
reset parallel_setup_cost;
|
||||
|
@ -442,6 +1061,57 @@ select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
|
|||
reset enable_hashjoin;
|
||||
reset enable_nestloop;
|
||||
reset enable_indexscan;
|
||||
--parallel with subplan
|
||||
drop table subplan_tb1;
|
||||
ERROR: table "subplan_tb1" does not exist
|
||||
create table subplan_tb1(a int, b varchar);
|
||||
insert into subplan_tb1 values (0, NULL);
|
||||
insert into subplan_tb1 values (1, NULL);
|
||||
insert into subplan_tb1 values (2, NULL);
|
||||
insert into subplan_tb1 values (3, NULL);
|
||||
explain (verbose,costs off) select boo_1.a
|
||||
from subplan_tb1 boo_1 inner join subplan_tb1 boo_2
|
||||
on ( case when boo_1.a in (
|
||||
select boo_3.a from subplan_tb1 boo_3 ) then boo_1.a end ) in
|
||||
(select max( boo.a ) column_009 from subplan_tb1 boo);
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------------------------------------
|
||||
Merge Join
|
||||
Output: boo_1.a
|
||||
Merge Cond: ((max(boo.a)) = (CASE WHEN (hashed SubPlan 1) THEN boo_1.a ELSE NULL::integer END))
|
||||
-> Sort
|
||||
Output: (max(boo.a))
|
||||
Sort Key: (max(boo.a))
|
||||
-> Nested Loop
|
||||
Output: (max(boo.a))
|
||||
-> Aggregate
|
||||
Output: max(boo.a)
|
||||
-> Gather
|
||||
Output: boo.a
|
||||
Number of Workers: 2
|
||||
-> Parallel Seq Scan on public.subplan_tb1 boo
|
||||
Output: boo.a
|
||||
-> Gather
|
||||
Output: boo_2.a, boo_2.b
|
||||
Number of Workers: 2
|
||||
-> Parallel Seq Scan on public.subplan_tb1 boo_2
|
||||
Output: boo_2.a, boo_2.b
|
||||
-> Sort
|
||||
Output: boo_1.a, (CASE WHEN (hashed SubPlan 1) THEN boo_1.a ELSE NULL::integer END)
|
||||
Sort Key: (CASE WHEN (hashed SubPlan 1) THEN boo_1.a ELSE NULL::integer END)
|
||||
-> Gather
|
||||
Output: boo_1.a, CASE WHEN (hashed SubPlan 1) THEN boo_1.a ELSE NULL::integer END
|
||||
Number of Workers: 2
|
||||
-> Parallel Seq Scan on public.subplan_tb1 boo_1
|
||||
Output: boo_1.a
|
||||
SubPlan 1
|
||||
-> Gather
|
||||
Output: boo_3.a
|
||||
Number of Workers: 2
|
||||
-> Parallel Seq Scan on public.subplan_tb1 boo_3
|
||||
Output: boo_3.a
|
||||
(34 rows)
|
||||
|
||||
--clean up
|
||||
drop table parallel_t1;
|
||||
drop table parallel_t2;
|
||||
|
|
|
@ -29,8 +29,10 @@ test: misc
|
|||
test: stats
|
||||
test: alter_system_set
|
||||
|
||||
# parallel query
|
||||
test: parallel_query parallel_nested_loop parallel_hashjoin parallel_append parallel_create_index
|
||||
# parallel query, don't put more than 2 parallel query testcases into one test group
|
||||
test: parallel_query parallel_nested_loop
|
||||
test: parallel_hashjoin parallel_append
|
||||
test: parallel_create_index
|
||||
|
||||
#dispatch from 13
|
||||
test: function
|
||||
|
|
|
@ -26,5 +26,7 @@ test: upsert_grammer_test_02 upsert_restriction upsert_composite
|
|||
test: upsert_trigger_test upsert_explain
|
||||
test: upsert_clean
|
||||
|
||||
# test parallel query
|
||||
test: parallel_query parallel_nested_loop parallel_hashjoin parallel_append parallel_create_index
|
||||
# test parallel query, don't put more than 2 parallel query testcases into one test group
|
||||
test: parallel_query parallel_nested_loop
|
||||
test: parallel_hashjoin parallel_append
|
||||
test: parallel_create_index
|
||||
|
|
|
@ -14,15 +14,149 @@ select count(*) from parallel_t1 where a > 5000;
|
|||
select count(*) from parallel_t1 where a < 5000;
|
||||
select count(*) from parallel_t1 where a <> 5000;
|
||||
|
||||
--normal plan for bitmapscan
|
||||
create index idx_parallel_t1 on parallel_t1(a);
|
||||
analyze parallel_t1;
|
||||
set enable_seqscan to off;
|
||||
set enable_indexscan to off;
|
||||
explain (costs off) select count(*) from parallel_t1 where a > 5000;
|
||||
explain (costs off) select count(*) from parallel_t1 where a < 5000;
|
||||
explain (costs off) select count(*) from parallel_t1 where a <> 5000;
|
||||
select count(*) from parallel_t1 where a > 5000;
|
||||
select count(*) from parallel_t1 where a < 5000;
|
||||
select count(*) from parallel_t1 where a <> 5000;
|
||||
reset enable_seqscan;
|
||||
reset enable_indexscan;
|
||||
|
||||
--set parallel parameter
|
||||
set force_parallel_mode=on;
|
||||
set parallel_setup_cost=0;
|
||||
set parallel_tuple_cost=0.000005;
|
||||
set max_parallel_workers_per_gather=2;
|
||||
set min_parallel_table_scan_size=0;
|
||||
set min_parallel_index_scan_size=0;
|
||||
set parallel_leader_participation=on;
|
||||
|
||||
--rescan case
|
||||
create table test_with_rescan(dm int, sj_dm int, name text);
|
||||
insert into test_with_rescan values(1,0,'universe');
|
||||
insert into test_with_rescan values(2,1,'galaxy');
|
||||
insert into test_with_rescan values(3,2,'sun');
|
||||
insert into test_with_rescan values(4,3,'earth');
|
||||
insert into test_with_rescan values(5,4,'asia');
|
||||
insert into test_with_rescan values(6,5,'China');
|
||||
insert into test_with_rescan values(7,6,'shaanxi');
|
||||
insert into test_with_rescan values(8,7,'xian');
|
||||
insert into test_with_rescan values(9,8,'huawei');
|
||||
insert into test_with_rescan values(10,9,'v10');
|
||||
insert into test_with_rescan values(11,10,'v10-3L');
|
||||
insert into test_with_rescan values(12,11,'gauss');
|
||||
insert into test_with_rescan values(13,12,'test');
|
||||
insert into test_with_rescan values(14,13,'test');
|
||||
insert into test_with_rescan values(15,14,'test');
|
||||
insert into test_with_rescan values(16,15,'test');
|
||||
insert into test_with_rescan values(17,16,'test');
|
||||
insert into test_with_rescan values(18,17,'test');
|
||||
insert into test_with_rescan values(19,18,'test');
|
||||
insert into test_with_rescan values(20,19,'test');
|
||||
create index on test_with_rescan(dm);
|
||||
create index on test_with_rescan(sj_dm);
|
||||
create index on test_with_rescan(name);
|
||||
analyze test_with_rescan;
|
||||
explain (costs off)
|
||||
WITH recursive t_result AS (
|
||||
select * from(
|
||||
SELECT dm,sj_dm,name,1 as level
|
||||
FROM test_with_rescan
|
||||
WHERE sj_dm < 10 order by dm limit 6 offset 2)
|
||||
UNION all
|
||||
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
|
||||
FROM t_result t1
|
||||
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
|
||||
)
|
||||
SELECT *
|
||||
FROM t_result t;
|
||||
|
||||
WITH recursive t_result AS (
|
||||
select * from(
|
||||
SELECT dm,sj_dm,name,1 as level
|
||||
FROM test_with_rescan
|
||||
WHERE sj_dm < 10 order by dm limit 6 offset 2)
|
||||
UNION all
|
||||
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
|
||||
FROM t_result t1
|
||||
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
|
||||
)
|
||||
SELECT *
|
||||
FROM t_result t order by 1,2,3,4;
|
||||
|
||||
--increate cpu_tuple_cost, disable seqscan, test parallel index scan
|
||||
set enable_seqscan=off;
|
||||
set cpu_tuple_cost=1000;
|
||||
explain (costs off)
|
||||
WITH recursive t_result AS (
|
||||
select * from(
|
||||
SELECT dm,sj_dm,name,1 as level
|
||||
FROM test_with_rescan
|
||||
WHERE sj_dm < 10 order by dm limit 6 offset 2)
|
||||
UNION all
|
||||
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
|
||||
FROM t_result t1
|
||||
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
|
||||
)
|
||||
SELECT *
|
||||
FROM t_result t;
|
||||
|
||||
WITH recursive t_result AS (
|
||||
select * from(
|
||||
SELECT dm,sj_dm,name,1 as level
|
||||
FROM test_with_rescan
|
||||
WHERE sj_dm < 10 order by dm limit 6 offset 2)
|
||||
UNION all
|
||||
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
|
||||
FROM t_result t1
|
||||
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
|
||||
)
|
||||
SELECT *
|
||||
FROM t_result t order by 1,2,3,4;
|
||||
|
||||
--disable indexscan, test parallel bitmap scan
|
||||
set enable_indexscan to off;
|
||||
explain (costs off)
|
||||
WITH recursive t_result AS (
|
||||
select * from(
|
||||
SELECT dm,sj_dm,name,1 as level
|
||||
FROM test_with_rescan
|
||||
WHERE sj_dm < 10 order by dm limit 6 offset 2)
|
||||
UNION all
|
||||
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
|
||||
FROM t_result t1
|
||||
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
|
||||
)
|
||||
SELECT *
|
||||
FROM t_result t;
|
||||
|
||||
WITH recursive t_result AS (
|
||||
select * from(
|
||||
SELECT dm,sj_dm,name,1 as level
|
||||
FROM test_with_rescan
|
||||
WHERE sj_dm < 10 order by dm limit 6 offset 2)
|
||||
UNION all
|
||||
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
|
||||
FROM t_result t1
|
||||
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
|
||||
)
|
||||
SELECT *
|
||||
FROM t_result t order by 1,2,3,4;
|
||||
|
||||
drop table test_with_rescan;
|
||||
reset enable_seqscan;
|
||||
reset enable_indexscan;
|
||||
reset cpu_tuple_cost;
|
||||
|
||||
--parallel plan for seq scan
|
||||
set enable_bitmapscan to off;
|
||||
set enable_indexscan to off;
|
||||
explain (costs off) select count(*) from parallel_t1;
|
||||
explain (costs off) select count(*) from parallel_t1 where a = 5000;
|
||||
explain (costs off) select count(*) from parallel_t1 where a > 5000;
|
||||
|
@ -34,6 +168,29 @@ select count(*) from parallel_t1 where a > 5000;
|
|||
select count(*) from parallel_t1 where a < 5000;
|
||||
select count(*) from parallel_t1 where a <> 5000;
|
||||
explain (costs off,analyse on,verbose on) select count(*) from parallel_t1;
|
||||
reset enable_indexscan;
|
||||
reset enable_bitmapscan;
|
||||
|
||||
--parallel plan for bitmap scan
|
||||
--onepage case
|
||||
CREATE TABLE onepage1 (val int, val2 int);
|
||||
ALTER TABLE onepage1 ADD PRIMARY KEY(val, val2);
|
||||
insert into onepage1 (select * from generate_series(1, 5) a, generate_series(1, 5) b);
|
||||
CREATE TABLE onepage2 as select * from onepage1;
|
||||
explain select * from onepage2 natural join onepage1 where onepage2.val > 2 and onepage1.val < 4;
|
||||
select * from onepage2 natural join onepage1 where onepage2.val > 2 and onepage1.val < 4 order by 1,2;
|
||||
drop table onepage1;
|
||||
drop table onepage2;
|
||||
set enable_seqscan to off;
|
||||
set enable_indexscan to off;
|
||||
explain (costs off) select count(*) from parallel_t1 where a > 5000;
|
||||
explain (costs off) select count(*) from parallel_t1 where a < 5000;
|
||||
explain (costs off) select count(*) from parallel_t1 where a <> 5000;
|
||||
select count(*) from parallel_t1 where a > 5000;
|
||||
select count(*) from parallel_t1 where a < 5000;
|
||||
select count(*) from parallel_t1 where a <> 5000;
|
||||
reset enable_seqscan;
|
||||
reset enable_indexscan;
|
||||
|
||||
--clean up
|
||||
reset force_parallel_mode;
|
||||
|
@ -121,6 +278,20 @@ reset enable_hashjoin;
|
|||
reset enable_nestloop;
|
||||
reset enable_indexscan;
|
||||
|
||||
--parallel with subplan
|
||||
drop table subplan_tb1;
|
||||
create table subplan_tb1(a int, b varchar);
|
||||
insert into subplan_tb1 values (0, NULL);
|
||||
insert into subplan_tb1 values (1, NULL);
|
||||
insert into subplan_tb1 values (2, NULL);
|
||||
insert into subplan_tb1 values (3, NULL);
|
||||
|
||||
explain (verbose,costs off) select boo_1.a
|
||||
from subplan_tb1 boo_1 inner join subplan_tb1 boo_2
|
||||
on ( case when boo_1.a in (
|
||||
select boo_3.a from subplan_tb1 boo_3 ) then boo_1.a end ) in
|
||||
(select max( boo.a ) column_009 from subplan_tb1 boo);
|
||||
|
||||
--clean up
|
||||
drop table parallel_t1;
|
||||
drop table parallel_t2;
|
||||
|
|
Loading…
Reference in New Issue