!465 并行查询-并行bitmapscan

Merge pull request !465 from TotaJ/feature/test
This commit is contained in:
opengauss-bot 2020-12-07 15:29:10 +08:00 committed by Gitee
commit eeb0e488d8
41 changed files with 2084 additions and 275 deletions

View File

@ -462,6 +462,7 @@ static BitmapOr* _copyBitmapOr(const BitmapOr* from)
/*
* copy remainder of node
*/
COPY_SCALAR_FIELD(isshared);
COPY_NODE_FIELD(bitmapplans);
return newnode;
@ -703,6 +704,7 @@ static BitmapIndexScan* _copyBitmapIndexScan(const BitmapIndexScan* from)
* copy remainder of node
*/
COPY_SCALAR_FIELD(indexid);
COPY_SCALAR_FIELD(isshared);
COPY_NODE_FIELD(indexqual);
COPY_NODE_FIELD(indexqualorig);

View File

@ -873,7 +873,7 @@ static void _outBitmapOr(StringInfo str, BitmapOr* node)
WRITE_NODE_TYPE("BITMAPOR");
_outPlanInfo(str, (Plan*)node);
WRITE_BOOL_FIELD(isshared);
WRITE_NODE_FIELD(bitmapplans);
}
static void _outCStoreIndexOr(StringInfo str, CStoreIndexOr* node)
@ -1176,6 +1176,7 @@ static void _outBitmapIndexScan(StringInfo str, BitmapIndexScan* node)
_outScanInfo(str, (Scan*)node);
WRITE_OID_FIELD(indexid);
WRITE_BOOL_FIELD(isshared);
WRITE_NODE_FIELD(indexqual);
WRITE_NODE_FIELD(indexqualorig);
#ifdef STREAMPLAN

View File

@ -2838,6 +2838,7 @@ static BitmapOr* _readBitmapOr(BitmapOr* local_node)
READ_TEMP_LOCALS();
_readPlan(&local_node->plan);
READ_BOOL_FIELD(isshared);
READ_NODE_FIELD(bitmapplans);
READ_DONE();
@ -3059,6 +3060,7 @@ static BitmapIndexScan* _readBitmapIndexScan(BitmapIndexScan* local_node)
_readScan(&local_node->scan);
READ_OID_FIELD(indexid);
READ_BOOL_FIELD(isshared);
READ_NODE_FIELD(indexqual);
READ_NODE_FIELD(indexqualorig);
#ifdef STREAMPLAN

View File

@ -45,6 +45,7 @@
#include "nodes/bitmapset.h"
#include "nodes/tidbitmap.h"
#include "utils/hsearch.h"
#include "storage/lwlock.h"
/*
* The maximum number of tuples per page is not large (typically 256 with
@ -117,6 +118,7 @@ typedef struct PagetableEntry {
bool recheck; /* should the tuples be rechecked? */
bitmapword words[Max(WORDS_PER_PAGE, WORDS_PER_CHUNK)];
} PagetableEntry;
/*
* dynahash.c is optimized for relatively large, long-lived hash tables.
* This is not ideal for TIDBitMap, particularly when we are using a bitmap
@ -134,6 +136,15 @@ typedef enum {
TBM_HASH /* pagetable is valid, entry1 is not */
} TBMStatus;
/*
* Current iterating state of the TBM.
*/
typedef enum {
TBM_NOT_ITERATING, /* not yet converted to page and chunk array */
TBM_ITERATING_PRIVATE, /* converted to local page and chunk array */
TBM_ITERATING_SHARED /* converted to shared page and chunk array */
} TBMIteratingState;
/*
* Here is the representation for a whole TIDBitMap:
*/
@ -146,8 +157,9 @@ struct TIDBitmap {
int maxentries; /* limit on same to meet maxbytes */
int npages; /* number of exact entries in pagetable */
int nchunks; /* number of lossy entries in pagetable */
bool iterating; /* tbm_begin_iterate called? */
TBMIteratingState iterating; /* tbm_begin_iterate called? */
bool isGlobalPart; /* represent global partition index tbm */
bool isShared; /* is shared pagetable? */
PagetableEntry entry1; /* used when status == TBM_ONE_PAGE */
/* these are valid when iterating is true: */
PagetableEntry** spages; /* sorted exact-page list, or NULL */
@ -168,8 +180,40 @@ struct TBMIterator {
TBMIterateResult output; /* MUST BE LAST (because variable-size) */
};
/*
* Holds the shared members of the iterator so that multiple processes
* can jointly iterate.
*/
struct TBMSharedIteratorState {
int nentries; /* number of entries in pagetable */
int maxentries; /* limit on same to meet maxbytes */
int npages; /* number of exact entries in pagetable */
int nchunks; /* number of lossy entries in pagetable */
TBMStatus status; /* see codes above */
PagetableEntry entry1; /* used when status == TBM_ONE_PAGE */
HTAB* pagetable; /* hash table of PagetableEntry's */
PagetableEntry** spages; /* sorted exact-page list, or NULL */
PagetableEntry** schunks; /* sorted lossy-chunk list, or NULL */
pg_atomic_uint32 pagetableRefcount; /* ref count for pagetable */
pg_atomic_uint32 pagesRefcount; /* ref count for spages */
pg_atomic_uint32 chunksRefcount; /* ref count for schunks */
LWLock lock; /* lock to protect below members */
int spageptr; /* next spages index */
int schunkptr; /* next schunks index */
int schunkbit; /* next bit to check in current schunk */
};
/*
* same as TBMIterator, but it is used for joint iteration, therefore this
* also holds a reference to the shared state.
*/
struct TBMSharedIterator {
TBMSharedIteratorState *state; /* shared state */
TBMIterateResult output; /* MUST BE LAST (because variable-size) */
};
/* Local function prototypes */
static void tbm_union_page(TIDBitmap* a, const PagetableEntry* bpage);
static void tbm_union_page(TIDBitmap *a, const PagetableEntry *bpage);
static bool tbm_intersect_page(TIDBitmap* a, PagetableEntry* apage, const TIDBitmap* b);
static const PagetableEntry* tbm_find_pageentry(const TIDBitmap* tbm, PagetableEntryNode pageNode);
static PagetableEntry* tbm_get_pageentry(TIDBitmap* tbm, PagetableEntryNode pageNode);
@ -177,15 +221,18 @@ static bool tbm_page_is_lossy(const TIDBitmap* tbm, PagetableEntryNode pageNode)
static void tbm_mark_page_lossy(TIDBitmap* tbm, PagetableEntryNode pageNode);
static void tbm_lossify(TIDBitmap* tbm);
static int tbm_comparator(const void* left, const void* right);
static void tbm_sort_pages(TIDBitmap* tbm);
/*
* tbm_create - create an initially-empty bitmap
*
* The bitmap will live in the memory context that is CurrentMemoryContext
* at the time of this call. It will be limited to (approximately) maxbytes
* total memory consumption.
* total memory consumption. If the DSA passed to this function is not NULL
* then the memory for storing elements of the underlying page table will
* be allocated from the DSA.
*/
TIDBitmap* tbm_create(long maxbytes)
TIDBitmap* tbm_create(long maxbytes, MemoryContext dsa)
{
TIDBitmap* tbm = NULL;
long nbuckets;
@ -193,7 +240,13 @@ TIDBitmap* tbm_create(long maxbytes)
/* Create the TIDBitmap struct and zero all its fields */
tbm = makeNode(TIDBitmap);
tbm->mcxt = CurrentMemoryContext;
if (dsa == NULL) {
tbm->mcxt = CurrentMemoryContext;
tbm->isShared = false;
} else {
tbm->mcxt = dsa;
tbm->isShared = true;
}
tbm->status = TBM_EMPTY;
tbm->isGlobalPart = false;
/*
@ -255,18 +308,39 @@ static void tbm_create_pagetable(TIDBitmap* tbm)
*/
void tbm_free(TIDBitmap* tbm)
{
if (tbm->pagetable != NULL) {
/*
* Don't call hash_destroy when it's shared, cause the memcxt for hash table is already deleted
* when calling dsm_detach. Same for spages and schunks, cause they are alloc by the same memcxt.
*/
if (!tbm->isShared) {
hash_destroy(tbm->pagetable);
}
if (tbm->spages != NULL) {
pfree_ext(tbm->spages);
}
if (tbm->schunks != NULL) {
pfree_ext(tbm->schunks);
}
pfree_ext(tbm);
}
/*
* tbm_free_shared_area - free shared state
*
* Free shared iterator state, Also free shared pagetable and iterator arrays
* memory if they are not referred by any of the shared iterator i.e recount
* is becomes 0.
*/
void tbm_free_shared_area(TBMSharedIteratorState *istate)
{
if (pg_atomic_sub_fetch_u32(&istate->pagetableRefcount, 1) == 0) {
hash_destroy(istate->pagetable);
}
if (pg_atomic_sub_fetch_u32(&istate->pagesRefcount, 1) == 0) {
pfree_ext(istate->spages);
}
if (pg_atomic_sub_fetch_u32(&istate->chunksRefcount, 1) == 0) {
pfree_ext(istate->schunks);
}
pfree_ext(istate);
}
/*
* tbm_add_tuples - add some tuple IDs to a TIDBitmap
*
@ -277,7 +351,7 @@ void tbm_add_tuples(TIDBitmap* tbm, const ItemPointer tids, int ntids, bool rech
{
int i;
Assert(!tbm->iterating);
Assert(tbm->iterating == TBM_NOT_ITERATING);
for (i = 0; i < ntids; i++) {
BlockNumber blk = ItemPointerGetBlockNumber(tids + i);
OffsetNumber off = ItemPointerGetOffsetNumber(tids + i);
@ -340,7 +414,7 @@ void tbm_add_page(TIDBitmap* tbm, BlockNumber pageno, Oid partitionOid)
*/
void tbm_union(TIDBitmap* a, const TIDBitmap* b)
{
Assert(!a->iterating);
Assert(a->iterating == TBM_NOT_ITERATING);
/* Nothing to do if b is empty */
if (b->nentries == 0) {
return;
@ -414,7 +488,7 @@ static void tbm_union_page(TIDBitmap* a, const PagetableEntry* bpage)
*/
void tbm_intersect(TIDBitmap* a, const TIDBitmap* b)
{
Assert(!a->iterating);
Assert(a->iterating == TBM_NOT_ITERATING);
/* Nothing to do if a is empty */
if (a->nentries == 0) {
return;
@ -533,6 +607,39 @@ bool tbm_is_empty(const TIDBitmap* tbm)
return (tbm->nentries == 0);
}
static void tbm_sort_pages(TIDBitmap* tbm)
{
HASH_SEQ_STATUS status;
PagetableEntry* page = NULL;
int npages;
int nchunks;
if (tbm->spages == NULL && tbm->npages > 0) {
tbm->spages = (PagetableEntry**)MemoryContextAlloc(tbm->mcxt, tbm->npages * sizeof(PagetableEntry*));
}
if ((tbm->schunks == NULL) && tbm->nchunks > 0) {
tbm->schunks = (PagetableEntry**)MemoryContextAlloc(tbm->mcxt, tbm->nchunks * sizeof(PagetableEntry*));
}
hash_seq_init(&status, tbm->pagetable);
npages = nchunks = 0;
while ((page = (PagetableEntry*)hash_seq_search(&status)) != NULL) {
if (page->ischunk) {
tbm->schunks[nchunks++] = page;
} else {
tbm->spages[npages++] = page;
}
}
Assert(npages == tbm->npages);
Assert(nchunks == tbm->nchunks);
if (npages > 1) {
qsort(tbm->spages, npages, sizeof(PagetableEntry*), tbm_comparator);
}
if (nchunks > 1) {
qsort(tbm->schunks, nchunks, sizeof(PagetableEntry*), tbm_comparator);
}
}
/*
* tbm_begin_iterate - prepare to iterate through a TIDBitmap
*
@ -548,6 +655,7 @@ bool tbm_is_empty(const TIDBitmap* tbm)
*/
TBMIterator* tbm_begin_iterate(TIDBitmap* tbm)
{
Assert(tbm->iterating != TBM_ITERATING_SHARED);
TBMIterator* iterator = NULL;
/*
@ -570,43 +678,134 @@ TBMIterator* tbm_begin_iterate(TIDBitmap* tbm)
* attached to the bitmap not the iterator, so they can be used by more
* than one iterator.
*/
if (tbm->status == TBM_HASH && !tbm->iterating) {
HASH_SEQ_STATUS status;
PagetableEntry* page = NULL;
int npages;
int nchunks;
if (tbm->spages == NULL && tbm->npages > 0) {
tbm->spages = (PagetableEntry**)MemoryContextAlloc(tbm->mcxt, tbm->npages * sizeof(PagetableEntry*));
}
if ((tbm->schunks == NULL) && tbm->nchunks > 0) {
tbm->schunks = (PagetableEntry**)MemoryContextAlloc(tbm->mcxt, tbm->nchunks * sizeof(PagetableEntry*));
}
hash_seq_init(&status, tbm->pagetable);
npages = nchunks = 0;
while ((page = (PagetableEntry*)hash_seq_search(&status)) != NULL) {
if (page->ischunk) {
tbm->schunks[nchunks++] = page;
} else {
tbm->spages[npages++] = page;
}
}
Assert(npages == tbm->npages);
Assert(nchunks == tbm->nchunks);
if (npages > 1) {
qsort(tbm->spages, npages, sizeof(PagetableEntry*), tbm_comparator);
}
if (nchunks > 1) {
qsort(tbm->schunks, nchunks, sizeof(PagetableEntry*), tbm_comparator);
}
if (tbm->status == TBM_HASH && tbm->iterating == TBM_NOT_ITERATING) {
tbm_sort_pages(tbm);
}
tbm->iterating = true;
tbm->iterating = TBM_ITERATING_PRIVATE;
return iterator;
}
/*
* tbm_prepare_shared_iterate - prepare shared iteration state for a TIDBitmap.
*
* The necessary shared state will be allocated from the DSA passed to
* tbm_create, so that multiple processes can attach to it and iterate jointly.
*
* This will convert the pagetable hash into page and chunk array of the index
* into pagetable array.
*/
TBMSharedIteratorState* tbm_prepare_shared_iterate(TIDBitmap *tbm)
{
Assert(tbm->iterating != TBM_ITERATING_PRIVATE);
/*
* Allocate TBMSharedIteratorState from DSA to hold the shared members and
* lock, this will also be used by multiple worker for shared iterate.
*/
TBMSharedIteratorState *istate = (TBMSharedIteratorState*)MemoryContextAllocZero(tbm->mcxt,
sizeof(TBMSharedIteratorState));
/*
* If we have a hashtable, create and fill the sorted page lists, unless
* we already did that for a previous iterator. Note that the lists are
* attached to the bitmap not the iterator, so they can be used by more
* than one iterator.
*/
if (tbm->iterating == TBM_NOT_ITERATING) {
if (tbm->status == TBM_HASH) {
tbm_sort_pages(tbm);
}
pg_atomic_init_u32(&istate->pagetableRefcount, 0);
pg_atomic_init_u32(&istate->pagesRefcount, 0);
pg_atomic_init_u32(&istate->chunksRefcount, 0);
}
/*
* Store the TBM members in the shared state so that we can share them
* across multiple processes.
*/
istate->nentries = tbm->nentries;
istate->maxentries = tbm->maxentries;
istate->npages = tbm->npages;
istate->nchunks = tbm->nchunks;
istate->pagetable = tbm->pagetable;
istate->spages = tbm->spages;
istate->schunks = tbm->schunks;
istate->status = tbm->status;
int rc = memcpy_s(&istate->entry1, sizeof(PagetableEntry), &tbm->entry1, sizeof(PagetableEntry));
securec_check(rc, "", "");
/*
* For every shared iterator, referring to pagetable and iterator array,
* increase the refcount by 1 so that while freeing the shared iterator we
* don't free pagetable and iterator array until its refcount becomes 0.
*/
(void)pg_atomic_add_fetch_u32(&istate->pagetableRefcount, 1);
(void)pg_atomic_add_fetch_u32(&istate->pagesRefcount, 1);
(void)pg_atomic_add_fetch_u32(&istate->chunksRefcount, 1);
/* Initialize the iterator lock */
LWLockInitialize(&istate->lock, LWTRANCHE_TBM);
/* Initialize the shared iterator state */
istate->schunkbit = 0;
istate->schunkptr = 0;
istate->spageptr = 0;
tbm->iterating = TBM_ITERATING_SHARED;
return istate;
}
/*
* tbm_extract_page_tuple - extract the tuple offsets from a page
*
* The extracted offsets are stored into TBMIterateResult.
*/
static inline int tbm_extract_page_tuple(const PagetableEntry *page, TBMIterateResult *output)
{
int ntuples = 0;
for (int wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++) {
bitmapword w = page->words[wordnum];
if (w != 0) {
int off = wordnum * BITS_PER_BITMAPWORD + 1;
while (w != 0) {
if (w & 1) {
output->offsets[ntuples++] = (OffsetNumber)off;
}
off++;
w >>= 1;
}
}
}
return ntuples;
}
/*
* tbm_advance_schunkbit - Advance the schunkbit
*/
static inline void tbm_advance_schunkbit(const PagetableEntry *chunk, int *schunkbitp)
{
int schunkbit = *schunkbitp;
while (schunkbit < PAGES_PER_CHUNK) {
int wordnum = WORDNUM(schunkbit);
int bitnum = BITNUM(schunkbit);
if ((chunk->words[wordnum] & ((bitmapword)1 << (unsigned int)bitnum)) != 0) {
break;
}
schunkbit++;
}
*schunkbitp = schunkbit;
}
/*
* tbm_iterate - scan through next page of a TIDBitmap
*
@ -624,7 +823,7 @@ TBMIterateResult* tbm_iterate(TBMIterator* iterator)
TIDBitmap* tbm = iterator->tbm;
TBMIterateResult* output = &(iterator->output);
Assert(tbm->iterating);
Assert(tbm->iterating == TBM_ITERATING_PRIVATE);
/*
* If lossy chunk pages remain, make sure we've advanced schunkptr/
@ -634,15 +833,7 @@ TBMIterateResult* tbm_iterate(TBMIterator* iterator)
PagetableEntry* chunk = tbm->schunks[iterator->schunkptr];
int schunkbit = iterator->schunkbit;
while (schunkbit < PAGES_PER_CHUNK) {
int wordnum = WORDNUM(schunkbit);
int bitnum = BITNUM(schunkbit);
if ((chunk->words[wordnum] & ((bitmapword)1 << (unsigned int)bitnum)) != 0) {
break;
}
schunkbit++;
}
tbm_advance_schunkbit(chunk, &schunkbit);
if (schunkbit < PAGES_PER_CHUNK) {
iterator->schunkbit = schunkbit;
break;
@ -676,7 +867,6 @@ TBMIterateResult* tbm_iterate(TBMIterator* iterator)
if (iterator->spageptr < tbm->npages) {
PagetableEntry* page = NULL;
int ntuples;
int wordnum;
/* In ONE_PAGE state, we don't allocate an spages[] array */
if (tbm->status == TBM_ONE_PAGE) {
@ -686,22 +876,7 @@ TBMIterateResult* tbm_iterate(TBMIterator* iterator)
}
/* scan bitmap to extract individual offset numbers */
ntuples = 0;
for (wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++) {
bitmapword w = page->words[wordnum];
if (w != 0) {
int off = wordnum * BITS_PER_BITMAPWORD + 1;
while (w != 0) {
if (w & 1) {
output->offsets[ntuples++] = (OffsetNumber)off;
}
off++;
w >>= 1;
}
}
}
ntuples = tbm_extract_page_tuple(page, output);
output->blockno = page->entryNode.blockNo;
output->partitionOid = page->entryNode.partitionOid;
output->ntuples = ntuples;
@ -714,6 +889,91 @@ TBMIterateResult* tbm_iterate(TBMIterator* iterator)
return NULL;
}
/*
* tbm_shared_iterate - scan through next page of a TIDBitmap
*
* As above, but this will iterate using an iterator which is shared
* across multiple processes. We need to acquire the iterator LWLock,
* before accessing the shared members.
*/
TBMIterateResult *tbm_shared_iterate(TBMSharedIterator *iterator)
{
TBMIterateResult *output = &iterator->output;
TBMSharedIteratorState *istate = iterator->state;
/* Acquire the LWLock before accessing the shared members */
(void)LWLockAcquire(&istate->lock, LW_EXCLUSIVE);
/*
* If lossy chunk pages remain, make sure we've advanced schunkptr/
* schunkbit to the next set bit.
*/
while (istate->schunkptr < istate->nchunks) {
PagetableEntry *chunk = istate->schunks[istate->schunkptr];
int schunkbit = istate->schunkbit;
tbm_advance_schunkbit(chunk, &schunkbit);
if (schunkbit < PAGES_PER_CHUNK) {
istate->schunkbit = schunkbit;
break;
}
/* advance to next chunk */
istate->schunkptr++;
istate->schunkbit = 0;
}
/*
* If both chunk and per-page data remain, must output the numerically
* earlier page.
*/
if (istate->schunkptr < istate->nchunks) {
PagetableEntry *chunk = istate->schunks[istate->schunkptr];
PagetableEntryNode pnode;
pnode.blockNo = chunk->entryNode.blockNo + istate->schunkbit;
pnode.partitionOid = chunk->entryNode.partitionOid;
if (istate->spageptr >= istate->npages ||
IS_CHUNK_BEFORE_PAGE(pnode, istate->spages[istate->spageptr]->entryNode)) {
/* Return a lossy page indicator from the chunk */
output->blockno = pnode.blockNo;
output->partitionOid = pnode.partitionOid;
output->ntuples = -1;
output->recheck = true;
istate->schunkbit++;
LWLockRelease(&istate->lock);
return output;
}
}
if (istate->spageptr < istate->npages) {
PagetableEntry *page = NULL;
/* In ONE_PAGE state, we don't allocate an spages[] array */
if (istate->status == TBM_ONE_PAGE) {
page = &istate->entry1;
} else {
page = istate->spages[istate->spageptr];
}
/* scan bitmap to extract individual offset numbers */
int ntuples = tbm_extract_page_tuple(page, output);
output->blockno = page->entryNode.blockNo;
output->partitionOid = page->entryNode.partitionOid;
output->ntuples = ntuples;
output->recheck = page->recheck;
istate->spageptr++;
LWLockRelease(&istate->lock);
return output;
}
LWLockRelease(&istate->lock);
/* Nothing more in the bitmap */
return NULL;
}
/*
* tbm_end_iterate - finish an iteration over a TIDBitmap
*
@ -726,6 +986,17 @@ void tbm_end_iterate(TBMIterator* iterator)
pfree_ext(iterator);
}
/*
* tbm_end_shared_iterate - finish a shared iteration over a TIDBitmap
*
* This doesn't free any of the shared state associated with the iterator,
* just our backend-private state.
*/
void tbm_end_shared_iterate(TBMSharedIterator *iterator)
{
pfree_ext(iterator);
}
/*
* tbm_find_pageentry - find a PagetableEntry for the pageno
*
@ -919,7 +1190,7 @@ static void tbm_lossify(TIDBitmap* tbm)
* push nentries down to significantly less than maxentries, or else we'll
* just end up doing this again very soon. We shoot for maxentries/2.
*/
Assert(!tbm->iterating);
Assert(tbm->iterating == TBM_NOT_ITERATING);
Assert(tbm->status == TBM_HASH);
hash_seq_init(&status, tbm->pagetable);
@ -986,7 +1257,29 @@ static int tbm_comparator(const void* left, const void* right)
return 0;
}
bool tbm_is_global(const TIDBitmap* tbm)
/*
* tbm_attach_shared_iterate
*
* Allocate a backend-private iterator and attach the shared iterator state
* to it so that multiple processed can iterate jointly.
*
* We also converts the DSA pointers to local pointers and store them into
* our private iterator.
*/
TBMSharedIterator *tbm_attach_shared_iterate(TBMSharedIteratorState *istate)
{
/*
* Create the TBMSharedIterator struct, with enough trailing space to
* serve the needs of the TBMIterateResult sub-struct.
*/
TBMSharedIterator *iterator = (TBMSharedIterator *)palloc0(sizeof(TBMSharedIterator) +
MAX_TUPLES_PER_PAGE * sizeof(OffsetNumber));
iterator->state = istate;
return iterator;
}
bool tbm_is_global(const TIDBitmap *tbm)
{
return tbm->isGlobalPart;
}

View File

@ -1072,10 +1072,10 @@ static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, RangeT
break;
case RTE_VALUES:
/*
* The data for a VALUES clause is stored in the plan tree itself,
* so scanning it in a worker is fine.
*/
/* Check for parallel-restricted functions. */
if (has_parallel_hazard((Node *)rte->values_lists, false)) {
return;
}
break;
case RTE_CTE:
@ -1102,6 +1102,14 @@ static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, RangeT
if (has_parallel_hazard((Node *)rel->baserestrictinfo, false))
return;
/*
* Likewise, if the relation's outputs are not parallel-safe, give up.
* (Usually, they're just Vars, but sometimes they're not.)
*/
if (has_parallel_hazard((Node *)rel->reltargetlist, false)) {
return;
}
/* We have a winner. */
rel->consider_parallel = true;
}
@ -2918,6 +2926,26 @@ static void recurse_push_qual(Node* setOp, Query* topquery, RangeTblEntry* rte,
(int)nodeTag(setOp))));
}
}
/*
* create_partial_bitmap_paths
* Build partial bitmap heap path for the relation
*/
void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel, Path *bitmapqual)
{
/* Compute heap pages for bitmap heap scan */
double pages_fetched = compute_bitmap_pages(root, rel, bitmapqual, 1.0, NULL, NULL, rel->isPartitionedTable);
int parallel_workers = compute_parallel_worker(rel, pages_fetched, -1,
u_sess->attr.attr_sql.max_parallel_workers_per_gather);
if (parallel_workers <= 0) {
return;
}
add_partial_path(rel,
(Path *)create_bitmap_heap_path(root, rel, bitmapqual, NULL, 1.0, parallel_workers));
}
/*
* partIterator tries to inherit pathkeys from scan path
*

View File

@ -1444,10 +1444,10 @@ void cost_bitmap_heap_scan(
Cost startup_cost = 0;
Cost run_cost = 0;
Cost indexTotalCost;
Selectivity indexSelectivity;
QualCost qpqual_cost;
Cost cpu_per_tuple = 0.0;
Cost cost_per_page;
Cost cpu_run_cost;
double tuples_fetched;
double pages_fetched;
double spc_seq_page_cost, spc_random_page_cost;
@ -1493,52 +1493,15 @@ void cost_bitmap_heap_scan(
startup_cost += g_instance.cost_cxt.disable_cost;
}
/*
* Fetch total cost of obtaining the bitmap, as well as its total
* selectivity.
*/
cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
pages_fetched = compute_bitmap_pages(root, baserel, bitmapqual, loop_count,
&indexTotalCost, &tuples_fetched, ispartitionedindex);
startup_cost += indexTotalCost;
/* Fetch estimated page costs for tablespace containing table. */
get_tablespace_page_costs(baserel->reltablespace, &spc_random_page_cost, &spc_seq_page_cost);
/*
* Estimate number of main-table pages fetched.
*/
tuples_fetched = clamp_row_est(indexSelectivity * RELOPTINFO_LOCAL_FIELD(root, baserel, tuples));
T = (baserel->pages > 1) ? (double)baserel->pages : 1.0;
if (loop_count > 1) {
/*
* For repeated bitmap scans, scale up the number of tuples fetched in
* the Mackert and Lohman formula by the number of scans, so that we
* estimate the number of pages fetched by all the scans. Then
* pro-rate for one scan.
*/
pages_fetched = index_pages_fetched(tuples_fetched * loop_count,
(BlockNumber)baserel->pages,
get_indexpath_pages(bitmapqual),
root,
ispartitionedindex);
pages_fetched /= loop_count;
} else {
/*
* For a single scan, the number of heap pages that need to be fetched
* is the same as the Mackert and Lohman formula for the case T <= b
* (ie, no re-reads needed).
*/
pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
}
if (pages_fetched >= T) {
pages_fetched = T;
} else {
pages_fetched = ceil(pages_fetched);
}
/*
* For small numbers of pages we should charge spc_random_page_cost
* apiece, while if nearly all the table's pages are being read, it's more
@ -1566,8 +1529,19 @@ void cost_bitmap_heap_scan(
startup_cost += qpqual_cost.startup;
cpu_per_tuple = u_sess->attr.attr_sql.cpu_tuple_cost + qpqual_cost.per_tuple;
cpu_run_cost = cpu_per_tuple * tuples_fetched;
run_cost += cpu_per_tuple * tuples_fetched;
/* Adjust costing for parallelism, if used. */
if (path->parallel_workers > 0) {
double parallel_divisor = get_parallel_divisor(path);
/* The CPU cost is divided among all the workers. */
cpu_run_cost /= parallel_divisor;
path->rows = clamp_row_est(path->rows / parallel_divisor);
}
run_cost += cpu_run_cost;
path->startup_cost = startup_cost;
path->total_cost = startup_cost + run_cost;
@ -5932,6 +5906,66 @@ static double get_parallel_divisor(Path* path)
return parallel_divisor;
}
/*
* compute_bitmap_pages
*
* compute number of pages fetched from heap in bitmap heap scan.
*/
double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
double loop_count, Cost *cost, double *tuple, bool ispartitionedindex)
{
Cost indexTotalCost;
Selectivity indexSelectivity;
double pages_fetched;
double T = (baserel->pages > 1) ? (double)baserel->pages : 1.0;
/*
* Fetch total cost of obtaining the bitmap, as well as its total
* selectivity.
*/
cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
/*
* Estimate number of main-table pages fetched.
*/
double tuples_fetched = clamp_row_est(indexSelectivity * RELOPTINFO_LOCAL_FIELD(root, baserel, tuples));
if (loop_count > 1) {
/*
* For repeated bitmap scans, scale up the number of tuples fetched in
* the Mackert and Lohman formula by the number of scans, so that we
* estimate the number of pages fetched by all the scans. Then
* pro-rate for one scan.
*/
pages_fetched = index_pages_fetched(tuples_fetched * loop_count,
(BlockNumber)baserel->pages,
get_indexpath_pages(bitmapqual),
root,
ispartitionedindex);
pages_fetched /= loop_count;
} else {
/*
* For a single scan, the number of heap pages that need to be fetched
* is the same as the Mackert and Lohman formula for the case T <= b
* (ie, no re-reads needed).
*/
pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
}
if (pages_fetched >= T) {
pages_fetched = T;
} else {
pages_fetched = ceil(pages_fetched);
}
if (cost != NULL) {
*cost = indexTotalCost;
}
if (tuple != NULL) {
*tuple = tuples_fetched;
}
return pages_fetched;
}
/* it used to compute page_size in createplan.cpp */
double cost_page_size(double tuples, int width)
{

View File

@ -279,8 +279,13 @@ void create_index_paths(PlannerInfo* root, RelOptInfo* rel)
BitmapHeapPath* bpath = NULL;
bitmapqual = choose_bitmap_and(root, rel, bitindexpaths);
bpath = create_bitmap_heap_path(root, rel, bitmapqual, NULL, 1.0);
bpath = create_bitmap_heap_path(root, rel, bitmapqual, NULL, 1.0, 0);
add_path(root, rel, (Path*)bpath);
/* create a partial bitmap heap path */
if (rel->consider_parallel) {
create_partial_bitmap_paths(root, rel, bitmapqual);
}
}
/*
@ -348,7 +353,7 @@ void create_index_paths(PlannerInfo* root, RelOptInfo* rel)
/* And push that path into the mix */
required_outer = get_bitmap_tree_required_outer(bitmapqual);
loop_count = get_loop_count(root, required_outer);
bpath = create_bitmap_heap_path(root, rel, bitmapqual, required_outer, loop_count);
bpath = create_bitmap_heap_path(root, rel, bitmapqual, required_outer, loop_count, 0);
add_path(root, rel, (Path*)bpath);
}
}
@ -1650,6 +1655,11 @@ static Cost bitmap_scan_cost_est(PlannerInfo* root, RelOptInfo* rel, Path* ipath
bpath.path.pathkeys = NIL;
bpath.bitmapqual = ipath;
/*
* Check the cost of temporary path without considering parallelism.
* Parallel bitmap heap path will be considered at later stage.
*/
bpath.path.parallel_workers = 0;
cost_bitmap_heap_scan(&bpath.path, root, rel, bpath.path.param_info, ipath, get_loop_count(root, required_outer));
return bpath.path.total_cost;
@ -1685,6 +1695,11 @@ static Cost bitmap_and_cost_est(PlannerInfo* root, RelOptInfo* rel, List* paths)
bpath.path.pathkeys = NIL;
bpath.bitmapqual = (Path*)&apath;
/*
* Check the cost of temporary path without considering parallelism.
* Parallel bitmap heap path will be considered at later stage.
*/
bpath.path.parallel_workers = 0;
/* Now we can do cost_bitmap_heap_scan */
cost_bitmap_heap_scan(
&bpath.path, root, rel, bpath.path.param_info, (Path*)&apath, get_loop_count(root, required_outer));

View File

@ -43,6 +43,10 @@
#include "pgxc/pgxc.h"
#include "parser/parsetree.h"
#define IS_JOIN_TYPE_PARALLEL_SAFE(jointype) ((jointype) != JOIN_UNIQUE_OUTER && (jointype) != JOIN_FULL && \
(jointype) != JOIN_RIGHT && (jointype) != JOIN_RIGHT_SEMI && (jointype) != JOIN_RIGHT_ANTI && \
(jointype) != JOIN_RIGHT_ANTI_FULL)
static void try_partial_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
SpecialJoinInfo *sjinfo, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys,
List *mergeclauses, List *outersortkeys, List *innersortkeys);
@ -1098,8 +1102,8 @@ static void sort_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
* JOIN_RIGHT, because they can produce false null extended rows. Also,
* the resulting path must not be parameterized.
*/
if (joinrel->consider_parallel && save_jointype != JOIN_UNIQUE_OUTER && save_jointype != JOIN_FULL &&
save_jointype != JOIN_RIGHT && outerrel->partial_pathlist != NIL) {
if (joinrel->consider_parallel && IS_JOIN_TYPE_PARALLEL_SAFE(save_jointype)
&& outerrel->partial_pathlist != NIL) {
cheapest_partial_outer = (Path *)linitial(outerrel->partial_pathlist);
if (inner_path->parallel_safe) {
@ -1446,8 +1450,8 @@ static void match_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
* parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT,
* because they can produce false null extended rows.
*/
if (joinrel->consider_parallel && save_jointype != JOIN_UNIQUE_OUTER && save_jointype != JOIN_FULL &&
save_jointype != JOIN_RIGHT && outerrel->partial_pathlist != NIL) {
if (joinrel->consider_parallel && IS_JOIN_TYPE_PARALLEL_SAFE(save_jointype) &&
outerrel->partial_pathlist != NIL) {
if (nestjoinOK) {
consider_parallel_nestloop(root, joinrel, outerrel, innerrel, save_jointype, extra);
}
@ -1525,8 +1529,13 @@ static void consider_parallel_mergejoin(PlannerInfo *root, RelOptInfo *joinrel,
static void consider_parallel_nestloop(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel,
RelOptInfo* innerrel, JoinType jointype, JoinPathExtraData* extra)
{
JoinType saveJointype = jointype;
ListCell* lc1 = NULL;
if (jointype == JOIN_UNIQUE_INNER) {
jointype = JOIN_INNER;
}
foreach (lc1, outerrel->partial_pathlist) {
Path* outerpath = (Path*)lfirst(lc1);
List* pathkeys;
@ -1549,12 +1558,13 @@ static void consider_parallel_nestloop(PlannerInfo* root, RelOptInfo* joinrel, R
continue;
/*
* Like match_unsorted_outer, we only consider a single nestloop
* path when the jointype is JOIN_UNIQUE_INNER. But we have to scan
* cheapest_parameterized_paths to find the one we want to consider,
* because cheapest_total_path might not be parallel-safe.
* If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
* cheapest_total_path, and we have to unique-ify it. (We might
* be able to relax this to allow other safe, unparameterized
* inner paths, but right now create_unique_path is not on board
* with that.)
*/
if (jointype == JOIN_UNIQUE_INNER) {
if (saveJointype == JOIN_UNIQUE_INNER) {
if (!bms_is_empty(PATH_REQ_OUTER(innerpath)))
continue;
innerpath = (Path*)create_unique_path(root, innerrel, innerpath, extra->sjinfo);
@ -1795,8 +1805,8 @@ static void hash_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
* single set of match bits for each batch, but that will require
* figuring out a deadlock-free way to wait for the probe to finish.
*/
if (joinrel->consider_parallel && jointype != JOIN_UNIQUE_OUTER && jointype != JOIN_FULL &&
jointype != JOIN_RIGHT && outerrel->partial_pathlist != NIL) {
if (joinrel->consider_parallel && IS_JOIN_TYPE_PARALLEL_SAFE(save_jointype) &&
outerrel->partial_pathlist != NIL) {
Path* cheapest_partial_outer = NULL;
Path* cheapest_partial_inner = NULL;
Path* cheapest_safe_inner = NULL;
@ -1805,9 +1815,11 @@ static void hash_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
/*
* Can we use a partial inner plan too, so that we can build a
* shared hash table in parallel?
* shared hash table in parallel? We can't handle
* JOIN_UNIQUE_INNER because we can't guarantee uniqueness.
*/
if (innerrel->partial_pathlist != NIL && u_sess->attr.attr_sql.enable_parallel_hash) {
if (innerrel->partial_pathlist != NIL && save_jointype != JOIN_UNIQUE_INNER &&
u_sess->attr.attr_sql.enable_parallel_hash) {
cheapest_partial_inner = (Path*)linitial(innerrel->partial_pathlist);
try_partial_hashjoin_path(root,
joinrel,
@ -1822,22 +1834,14 @@ static void hash_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
/*
* Normally, given that the joinrel is parallel-safe, the cheapest
* total inner path will also be parallel-safe, but if not, we'll
* have to search cheapest_parameterized_paths for the cheapest
* unparameterized inner path.
* have to search for the cheapest safe, unparameterized inner
* path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
* inner path.
*/
if (cheapest_total_inner->parallel_safe) {
cheapest_safe_inner = cheapest_total_inner;
} else {
ListCell* lc;
foreach (lc, innerrel->cheapest_parameterized_paths) {
Path* innerpath = (Path*)lfirst(lc);
if (innerpath->parallel_safe && bms_is_empty(PATH_REQ_OUTER(innerpath))) {
cheapest_safe_inner = innerpath;
break;
}
}
} else if (save_jointype != JOIN_UNIQUE_INNER) {
cheapest_safe_inner = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
}
if (cheapest_safe_inner != NULL) {

View File

@ -100,6 +100,7 @@ static Scan* create_indexscan_plan(
static BitmapHeapScan* create_bitmap_scan_plan(
PlannerInfo* root, BitmapHeapPath* best_path, List* tlist, List* scan_clauses);
static Plan* create_bitmap_subplan(PlannerInfo* root, Path* bitmapqual, List** qual, List** indexqual, List** indexECs);
static void bitmap_subplan_mark_shared(Plan *plan);
static TidScan* create_tidscan_plan(PlannerInfo* root, TidPath* best_path, List* tlist, List* scan_clauses);
static SubqueryScan* create_subqueryscan_plan(PlannerInfo* root, Path* best_path, List* tlist, List* scan_clauses);
static FunctionScan* create_functionscan_plan(PlannerInfo* root, Path* best_path, List* tlist, List* scan_clauses);
@ -1837,7 +1838,12 @@ static Gather* create_gather_plan(PlannerInfo* root, GatherPath* best_path)
disuse_physical_tlist(subplan, best_path->subpath);
Gather* gather_plan = make_gather(subplan->targetlist,
/*
* Copy a new target list for gather, since in merge join case, it will change the targetlist.
* If we just use a pointer to subplan->targetlist, then it will change the subplan's targetlist
* at same time, which we don't want. Check prepare_sort_from_pathkeys for the targetlist.
*/
Gather* gather_plan = make_gather(list_copy(subplan->targetlist),
NIL,
best_path->path.parallel_workers,
SS_assign_special_param(root),
@ -1854,6 +1860,7 @@ static Gather* create_gather_plan(PlannerInfo* root, GatherPath* best_path)
case T_HashJoin:
case T_MergeJoin:
case T_NestLoop:
case T_BitmapHeapScan:
inherit_plan_locator_info(&gather_plan->plan, subplan);
break;
default:
@ -2621,20 +2628,27 @@ static BitmapHeapScan* create_bitmap_scan_plan(
PlannerInfo* root, BitmapHeapPath* best_path, List* tlist, List* scan_clauses)
{
Index baserelid = best_path->path.parent->relid;
Plan* bitmapqualplan = NULL;
List* bitmapqualorig = NIL;
List* indexquals = NIL;
List* indexECs = NIL;
List* qpqual = NIL;
ListCell* l = NULL;
BitmapHeapScan* scan_plan = NULL;
bool isGlobal = false;
/* it should be a base rel... */
Assert(baserelid > 0);
Assert(best_path->path.parent->rtekind == RTE_RELATION);
/* Process the bitmapqual tree into a Plan tree and qual lists */
bitmapqualplan = create_bitmap_subplan(root, best_path->bitmapqual, &bitmapqualorig, &indexquals, &indexECs);
Plan *bitmapqualplan = create_bitmap_subplan(root, best_path->bitmapqual, &bitmapqualorig, &indexquals, &indexECs);
if (IsA(best_path->bitmapqual, IndexPath)) {
isGlobal = CheckIndexPathUseGPI((IndexPath*)best_path->bitmapqual);
}
/* Don't support parallel bitmap scan for global partition index or adio is enabled */
if (!g_instance.attr.attr_storage.enable_adio_function && !isGlobal && best_path->path.parallel_aware) {
bitmap_subplan_mark_shared(bitmapqualplan);
}
/*
* The qpqual list must contain all restrictions not automatically handled
@ -2661,7 +2675,7 @@ static BitmapHeapScan* create_bitmap_scan_plan(
* to do it that way because predicate conditions need to be rechecked if
* the scan becomes lossy, so they have to be included in bitmapqualorig.
*/
qpqual = NIL;
List *qpqual = NIL;
foreach (l, scan_clauses) {
RestrictInfo* rinfo = (RestrictInfo*)lfirst(l);
Node* clause = (Node*)rinfo->clause;
@ -5293,13 +5307,32 @@ void copy_plan_costsize(Plan* dest, Plan* src)
}
}
/*****************************************************************************
/*
* bitmap_subplan_mark_shared
* Set isshared flag in bitmap subplan so that it will be created in
* shared memory.
*/
static void bitmap_subplan_mark_shared(Plan *plan)
{
if (IsA(plan, BitmapAnd)) {
bitmap_subplan_mark_shared((Plan*)linitial(((BitmapAnd *)plan)->bitmapplans));
} else if (IsA(plan, BitmapOr)) {
((BitmapOr *)plan)->isshared = true;
bitmap_subplan_mark_shared((Plan*)linitial(((BitmapOr *)plan)->bitmapplans));
} else if (IsA(plan, BitmapIndexScan)) {
((BitmapIndexScan *)plan)->isshared = true;
} else {
/* Maybe CStore index scan(T_CStoreIndexCtidScan), don't support parallel */
}
}
/* ****************************************************************************
*
* PLAN NODE BUILDING ROUTINES
* PLAN NODE BUILDING ROUTINES
*
* Some of these are exported because they are called to build plan nodes
* in contexts where we're not deriving the plan node from a path node.
*****************************************************************************/
* *************************************************************************** */
static SeqScan* make_seqscan(List* qptlist, List* qpqual, Index scanrelid)
{
SeqScan* node = makeNode(SeqScan);

View File

@ -488,7 +488,7 @@ void finalize_node_id(Plan* result_plan, int* plan_node_id, int* parent_node_id,
int save_parent_id = *parent_node_id;
(*plan_node_id)++;
(*num_plannodes)++;
if (result_plan->initPlan && IS_STREAM_PLAN) {
if (result_plan->initPlan && (IS_SINGLE_NODE || IS_STREAM_PLAN)) {
List* cteLinkList = NIL;
ListCell* lc = NULL;
foreach (lc, result_plan->initPlan) {
@ -496,6 +496,7 @@ void finalize_node_id(Plan* result_plan, int* plan_node_id, int* parent_node_id,
if (plan->subLinkType == CTE_SUBLINK)
cteLinkList = lappend(cteLinkList, plan);
}
#ifdef ENABLE_MULTIPLE_NODES
if (cteLinkList != NIL) {
if (IsA(result_plan, ValuesScan)) {
sprintf_rc = sprintf_s(u_sess->opt_cxt.not_shipping_info->not_shipping_reason,
@ -505,7 +506,8 @@ void finalize_node_id(Plan* result_plan, int* plan_node_id, int* parent_node_id,
mark_stream_unsupport();
}
}
if (IS_STREAM_PLAN)
#endif
if (IS_SINGLE_NODE || IS_STREAM_PLAN)
*initplans = list_concat(*initplans, list_copy(result_plan->initPlan));
}
switch (nodeTag(result_plan)) {
@ -876,7 +878,7 @@ void finalize_node_id(Plan* result_plan, int* plan_node_id, int* parent_node_id,
}
break;
}
if (IS_STREAM_PLAN) {
if (IS_STREAM_PLAN || IS_SINGLE_NODE) {
if (is_replicated_plan(result_plan) && is_execute_on_multinodes(result_plan)) {
List* nodelist = check_random_expr(result_plan);
if (list_length(nodelist) > 0) {
@ -924,6 +926,7 @@ void finalize_node_id(Plan* result_plan, int* plan_node_id, int* parent_node_id,
} else
subplan_ids[subplan->plan_id] = subplan_ids[0];
if (!has_finalized) {
#ifdef ENABLE_MULTIPLE_NODES
if (is_execute_on_coordinator(result_plan) ||
(is_execute_on_allnodes(result_plan) && !is_data_node_exec)) {
Plan* child_plan = NULL;
@ -973,6 +976,7 @@ void finalize_node_id(Plan* result_plan, int* plan_node_id, int* parent_node_id,
}
pushdown_execnodes(plan, result_plan->exec_nodes, false, true);
}
#endif
if (check_stream_support()) {
PlannerInfo* subroot = NULL;
Plan* child_root = NULL;

View File

@ -91,6 +91,7 @@ typedef struct {
typedef struct {
bool allow_restricted;
List *safe_param_ids; /* PARAM_EXEC Param IDs to treat as safe */
} has_parallel_hazard_arg;
@ -1168,6 +1169,7 @@ bool has_parallel_hazard(Node *node, bool allow_restricted)
has_parallel_hazard_arg context;
context.allow_restricted = allow_restricted;
context.safe_param_ids = NIL;
return has_parallel_hazard_walker(node, &context);
}
@ -1196,8 +1198,53 @@ static bool has_parallel_hazard_walker(Node *node, has_parallel_hazard_arg *cont
/* Recurse into subselects */
return query_tree_walker(query, (bool (*)())has_parallel_hazard_walker, context, 0);
} else if (IsA(node, SubPlan) || IsA(node, SubLink) || IsA(node, AlternativeSubPlan) || IsA(node, Param)) {
return true;
} else if (IsA(node, SubLink) || IsA(node, AlternativeSubPlan)) {
if (!context->allow_restricted) {
return true;
}
} else if (IsA(node, SubPlan)) {
/*
* Only parallel-safe SubPlans can be sent to workers. Within the
* testexpr of the SubPlan, Params representing the output columns of the
* subplan can be treated as parallel-safe, so temporarily add their IDs
* to the safe_param_ids list while examining the testexpr.
*/
SubPlan *subplan = (SubPlan *)node;
if (!subplan->parallel_safe && !context->allow_restricted) {
return true;
}
List *saveSafeParamIds = context->safe_param_ids;
context->safe_param_ids = list_concat(list_copy(context->safe_param_ids), list_copy(subplan->paramIds));
if (has_parallel_hazard_walker(subplan->testexpr, context)) {
return true; /* no need to restore safe_param_ids */
}
list_free(context->safe_param_ids);
context->safe_param_ids = saveSafeParamIds;
/* we must also check args, but no special Param treatment there */
if (has_parallel_hazard_walker((Node *) subplan->args, context)) {
return true;
}
/* don't want to recurse normally, so we're done */
return false;
} else if (IsA(node, Param)) {
/*
* We can't pass Params to workers at the moment either, so they are also
* parallel-restricted, unless they are PARAM_EXTERN Params or are
* PARAM_EXEC Params listed in safe_param_ids, meaning they could be
* either generated within workers or can be computed by the leader and
* then their value can be passed to workers.
*/
Param *param = (Param *)node;
if (param->paramkind == PARAM_EXTERN) {
return false;
}
if (param->paramkind != PARAM_EXEC || !list_member_int(context->safe_param_ids, param->paramid)) {
if (!context->allow_restricted) {
return true;
}
}
return false; /* nothing to recurse to */
}
/* This is just a notational convenience for callers. */

View File

@ -2058,8 +2058,8 @@ IndexPath* create_index_path(PlannerInfo* root, IndexOptInfo* index, List* index
* loop_count should match the value used when creating the component
* IndexPaths.
*/
BitmapHeapPath* create_bitmap_heap_path(
PlannerInfo* root, RelOptInfo* rel, Path* bitmapqual, Relids required_outer, double loop_count)
BitmapHeapPath* create_bitmap_heap_path(PlannerInfo* root, RelOptInfo* rel, Path* bitmapqual,
Relids required_outer, double loop_count, int parallel_degree)
{
BitmapHeapPath* pathnode = makeNode(BitmapHeapPath);
@ -2067,15 +2067,23 @@ BitmapHeapPath* create_bitmap_heap_path(
pathnode->path.parent = rel;
pathnode->path.param_info = get_baserel_parampathinfo(root, rel, required_outer);
pathnode->path.pathkeys = NIL; /* always unordered */
pathnode->path.parallel_aware = parallel_degree > 0 ? true : false;
pathnode->path.parallel_safe = rel->consider_parallel;
pathnode->path.parallel_workers = parallel_degree;
pathnode->bitmapqual = bitmapqual;
cost_bitmap_heap_scan(&pathnode->path, root, rel, pathnode->path.param_info, bitmapqual, loop_count);
#ifdef STREAMPLAN
/*
* We need to set locator_type for parallel query, cause we may send
* this value to bg worker. If not, locator_type is the initial value '\0',
* which make the later serialized plan truncated.
*/
pathnode->path.locator_type = rel->locator_type;
if (IS_STREAM_PLAN) {
pathnode->path.distribute_keys = rel->distribute_keys;
pathnode->path.locator_type = rel->locator_type;
/* add location information for bitmap heap path */
RangeTblEntry* rte = root->simple_rte_array[rel->relid];
@ -3971,7 +3979,7 @@ Path* reparameterize_path(PlannerInfo* root, Path* path, Relids required_outer,
case T_BitmapHeapScan: {
BitmapHeapPath* bpath = (BitmapHeapPath*)path;
return (Path*)create_bitmap_heap_path(root, rel, bpath->bitmapqual, required_outer, loop_count);
return (Path*)create_bitmap_heap_path(root, rel, bpath->bitmapqual, required_outer, loop_count, 0);
}
case T_SubqueryScan:
return create_subqueryscan_path(root, rel, path->pathkeys, required_outer);

View File

@ -647,7 +647,8 @@ RelOptInfo* build_join_rel(PlannerInfo* root, Relids joinrelids, RelOptInfo* out
* here.
*/
if (inner_rel->consider_parallel && outer_rel->consider_parallel &&
!has_parallel_hazard((Node *)restrictlist, false)) {
!has_parallel_hazard((Node *)restrictlist, false) &&
!has_parallel_hazard((Node *)joinrel->reltargetlist, false)) {
joinrel->consider_parallel = true;
}

View File

@ -1432,6 +1432,7 @@ void knl_t_bgworker_init(knl_t_bgworker_context* bgworker_cxt)
bgworker_cxt->pcxt_list = DLIST_STATIC_INIT(bgworker_cxt->pcxt_list);
bgworker_cxt->save_pgBufferUsage = NULL;
bgworker_cxt->hpm_context = NULL;
bgworker_cxt->memCxt = NULL;
}
void knl_t_msqueue_init(knl_t_msqueue_context* msqueue_cxt)

View File

@ -2066,7 +2066,6 @@ static void ExecutePlan(EState *estate, PlanState *planstate, bool use_parallel_
* process so we just end the loop...
*/
if (TupIsNull(slot)) {
(void)ExecShutdownNode(planstate);
ExecEarlyFree(planstate);
break;
}
@ -2136,6 +2135,14 @@ static void ExecutePlan(EState *estate, PlanState *planstate, bool use_parallel_
u_sess->instr_cxt.global_instr->SetPeakNodeMemory(planstate->plan->plan_node_id, peak_memory);
}
/*
* If we know we won't need to back up, we can release resources at this
* point.
*/
if (!(estate->es_top_eflags & EXEC_FLAG_BACKWARD)) {
(void)ExecShutdownNode(planstate);
}
if (use_parallel_mode) {
ExitParallelMode();
}

View File

@ -26,6 +26,7 @@
#include "executor/execParallel.h"
#include "executor/executor.h"
#include "executor/hashjoin.h"
#include "executor/nodeBitmapHeapscan.h"
#include "executor/nodeSeqscan.h"
#include "executor/nodeAppend.h"
#include "executor/nodeIndexscan.h"
@ -183,6 +184,9 @@ static bool ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateConte
case T_AppendState:
ExecAppendEstimate((AppendState*)planstate, e->pcxt);
break;
case T_BitmapHeapScanState:
ExecBitmapHeapEstimate((BitmapHeapScanState*)planstate, e->pcxt);
break;
default:
break;
}
@ -244,6 +248,13 @@ static bool ExecParallelInitializeDSM(PlanState *planstate, ExecParallelInitiali
cxt->pwCtx->queryInfo.pappend_num++;
}
break;
case T_BitmapHeapScanState:
if (planstate->plan->parallel_aware) {
ExecBitmapHeapInitializeDSM((BitmapHeapScanState*)planstate,
d->pcxt, cxt->pwCtx->queryInfo.bmscan_num);
cxt->pwCtx->queryInfo.bmscan_num++;
}
break;
case T_HashJoinState:
if (planstate->plan->parallel_aware) {
ExecHashJoinInitializeDSM((HashJoinState*)planstate, d->pcxt, cxt->pwCtx->queryInfo.jstate_num);
@ -425,6 +436,7 @@ ParallelExecutorInfo *ExecInitParallelPlan(PlanState *planstate, EState *estate,
queryInfo.pappend = (ParallelAppendState**)palloc0(sizeof(ParallelAppendState*) * e.nnodes);
queryInfo.jstate = (ParallelHashJoinState**)palloc0(sizeof(ParallelHashJoinState*) * e.nnodes);
queryInfo.shared_info = (SharedHashInfo**)palloc0(sizeof(SharedHashInfo*) * e.nnodes);
queryInfo.bmscan = (ParallelBitmapHeapState **)palloc0(sizeof(ParallelBitmapHeapState *) * e.nnodes);
/*
* Give parallel-aware nodes a chance to initialize their shared data.
@ -488,6 +500,11 @@ static bool ExecParallelReInitializeDSM(PlanState* planstate, ParallelContext* p
ExecAppendReInitializeDSM((AppendState*)planstate, pcxt);
}
break;
case T_BitmapHeapScanState:
if (planstate->plan->parallel_aware) {
ExecBitmapHeapReInitializeDSM((BitmapHeapScanState*)planstate, pcxt);
}
break;
case T_HashJoinState:
if (planstate->plan->parallel_aware) {
ExecHashJoinReInitializeDSM((HashJoinState*)planstate, pcxt);
@ -735,6 +752,11 @@ static bool ExecParallelInitializeWorker(PlanState *planstate, void *context)
ExecAppendInitializeWorker((AppendState*)planstate, context);
}
break;
case T_BitmapHeapScanState:
if (planstate->plan->parallel_aware) {
ExecBitmapHeapInitializeWorker((BitmapHeapScanState *)planstate, context);
}
break;
case T_HashState:
/* even when not parallel-aware, for EXPLAIN ANALYZE */
ExecHashInitializeWorker((HashState*)planstate, context);
@ -778,6 +800,8 @@ void ParallelQueryMain(void *seg)
/* Start up the executor, have it run the plan, and then shut it down. */
(void)ExecutorStart(queryDesc, cxt->pwCtx->queryInfo.eflags);
/* Special executor initialization steps for parallel workers */
Assert(t_thrd.bgworker_cxt.memCxt != NULL);
ExecParallelInitializeWorker(queryDesc->planstate, seg);
/* Pass down any tuple bound */

View File

@ -52,13 +52,22 @@
#include "gstrace/gstrace_infra.h"
#include "gstrace/access_gstrace.h"
#define WAIT_BITMAP_INIT_TIMEOUT 1 // 1s timeout for pthread_cond_timedwait
static TupleTableSlot* BitmapHbucketTblNext(BitmapHeapScanState* node);
static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node);
static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
#ifdef USE_PREFETCH
static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, const TBMIterateResult *tbmres,
TBMIterator *prefetch_iterator, TBMSharedIterator *shared_prefetch_it);
static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
#endif
static void bitgetpage(HeapScanDesc scan, TBMIterateResult* tbmres);
static void ExecInitPartitionForBitmapHeapScan(BitmapHeapScanState* scanstate, EState* estate);
static void ExecInitNextPartitionForBitmapHeapScan(BitmapHeapScanState* node);
static void BitmapHeapPrefetchNext(
BitmapHeapScanState* node, HeapScanDesc scan, const TIDBitmap* tbm, TBMIterator** prefetch_iterator);
static void BitmapHeapPrefetchNext(BitmapHeapScanState* node, HeapScanDesc scan, const TIDBitmap* tbm,
TBMIterator** prefetch_iterator, TBMSharedIterator** shared_prefetch_it);
static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
/* This struct is used for partition switch while prefetch pages */
typedef struct PrefetchNode {
@ -76,11 +85,20 @@ void BitmapHeapFree(BitmapHeapScanState* node)
tbm_end_iterate(node->prefetch_iterator);
node->prefetch_iterator = NULL;
}
if (node->shared_tbmiterator != NULL) {
tbm_end_shared_iterate(node->shared_tbmiterator);
node->shared_tbmiterator = NULL;
}
if (node->shared_prefetch_iterator != NULL) {
tbm_end_shared_iterate(node->shared_prefetch_iterator);
node->shared_prefetch_iterator = NULL;
}
if (node->tbm != NULL) {
tbm_free(node->tbm);
node->tbm = NULL;
}
node->tbmres = NULL;
node->initialized = false;
}
static TupleTableSlot* BitmapHbucketTblNext(BitmapHeapScanState* node)
{
@ -101,10 +119,104 @@ static TupleTableSlot* BitmapHbucketTblNext(BitmapHeapScanState* node)
BitmapHeapFree(node);
}
}
/* ----------------------------------------------------------------
* BitmapHeapNext
#ifdef USE_PREFETCH
/*
* BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
*/
static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, const TBMIterateResult *tbmres,
TBMIterator *prefetch_iterator, TBMSharedIterator *shared_prefetch_it)
{
ParallelBitmapHeapState *pstate = node->pstate;
if (pstate == NULL) {
if (node->prefetch_pages > 0) {
/* The main iterator has closed the distance by one page */
node->prefetch_pages--;
} else if (prefetch_iterator != NULL) {
/* Do not let the prefetch iterator get behind the main one */
TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno) {
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmodule(MOD_EXECUTOR),
errmsg("prefetch and main iterators are out of sync for BitmapHeapScan.")));
}
}
return;
}
if (u_sess->storage_cxt.target_prefetch_pages > 0) {
(void)pthread_mutex_lock(&pstate->cv_mtx);
if (pstate->prefetch_pages > 0) {
pstate->prefetch_pages--;
(void)pthread_mutex_unlock(&pstate->cv_mtx);
} else {
/* Release the mutex before iterating */
(void)pthread_mutex_unlock(&pstate->cv_mtx);
/*
* In case of shared mode, we can not ensure that the current
* blockno of the main iterator and that of the prefetch iterator
* are same. It's possible that whatever blockno we are
* prefetching will be processed by another process. Therefore,
* we don't validate the blockno here as we do in non-parallel
* case.
*/
if (shared_prefetch_it != NULL) {
(void)tbm_shared_iterate(shared_prefetch_it);
}
}
}
}
/*
* BitmapAdjustPrefetchTarget - Adjust the prefetch target
*
* Retrieve next tuple from the BitmapHeapScan node's currentRelation
* Increase prefetch target if it's not yet at the max. Note that
* we will increase it to zero after fetching the very first
* page/tuple, then to one after the second tuple is fetched, then
* it doubles as later pages are fetched.
*/
static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
{
ParallelBitmapHeapState *pstate = node->pstate;
if (pstate == NULL) {
if (node->prefetch_target >= u_sess->storage_cxt.target_prefetch_pages) {
/* don't increase any further */
} else if (node->prefetch_target >= u_sess->storage_cxt.target_prefetch_pages / 2) {
node->prefetch_target = u_sess->storage_cxt.target_prefetch_pages;
} else if (node->prefetch_target > 0) {
node->prefetch_target *= 2;
} else {
node->prefetch_target++;
}
return;
}
/* Do an unlocked check first to save spinlock acquisitions. */
if (pstate->prefetch_target < u_sess->storage_cxt.target_prefetch_pages) {
(void)pthread_mutex_lock(&pstate->cv_mtx);
if (pstate->prefetch_target >= u_sess->storage_cxt.target_prefetch_pages) {
/* don't increase any further */
} else if (pstate->prefetch_target >= u_sess->storage_cxt.target_prefetch_pages / 2) {
pstate->prefetch_target = u_sess->storage_cxt.target_prefetch_pages;
} else if (pstate->prefetch_target > 0) {
pstate->prefetch_target *= 2;
} else {
pstate->prefetch_target++;
}
(void)pthread_mutex_unlock(&pstate->cv_mtx);
}
}
#endif /* USE_PREFETCH */
/* ----------------------------------------------------------------
* BitmapHeapNext
*
* Retrieve next tuple from the BitmapHeapScan node's currentRelation
* ----------------------------------------------------------------
*/
static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
@ -113,10 +225,13 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
HeapScanDesc scan = NULL;
TIDBitmap* tbm = NULL;
TBMIterator* tbmiterator = NULL;
TBMSharedIterator *shared_tbmiterator = NULL;
TBMIterateResult* tbmres = NULL;
ParallelBitmapHeapState *pstate = node->pstate;
#ifdef USE_PREFETCH
TBMIterator* prefetch_iterator = NULL;
TBMSharedIterator* shared_prefetch_it = NULL;
#endif
OffsetNumber targoffset;
TupleTableSlot* slot = NULL;
@ -128,10 +243,15 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
slot = node->ss.ss_ScanTupleSlot;
scan = GetHeapScanDesc(node->ss.ss_currentScanDesc);
tbm = node->tbm;
tbmiterator = node->tbmiterator;
if (pstate == NULL) {
tbmiterator = node->tbmiterator;
} else {
shared_tbmiterator = node->shared_tbmiterator;
}
tbmres = node->tbmres;
#ifdef USE_PREFETCH
prefetch_iterator = node->prefetch_iterator;
shared_prefetch_it = node->shared_prefetch_iterator;
#endif
/*
@ -147,27 +267,79 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
* a lot of prefetching in a scan that stops after a few tuples because of
* a LIMIT.
*/
if (tbm == NULL) {
tbm = (TIDBitmap*)MultiExecProcNode(outerPlanState(node));
if (!node->initialized) {
if (pstate == NULL) {
tbm = (TIDBitmap*)MultiExecProcNode(outerPlanState(node));
if (tbm == NULL || !IsA(tbm, TIDBitmap)) {
ereport(ERROR,
(errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
errmodule(MOD_EXECUTOR),
errmsg("unrecognized result from subplan for BitmapHeapScan.")));
}
if (tbm == NULL || !IsA(tbm, TIDBitmap)) {
ereport(ERROR,
(errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
errmodule(MOD_EXECUTOR),
errmsg("unrecognized result from subplan for BitmapHeapScan.")));
}
node->tbm = tbm;
node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
node->tbmres = tbmres = NULL;
node->tbm = tbm;
node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
node->tbmres = tbmres = NULL;
#ifdef USE_PREFETCH
if (u_sess->storage_cxt.target_prefetch_pages > 0) {
node->prefetch_iterator = prefetch_iterator = tbm_begin_iterate(tbm);
node->prefetch_pages = 0;
node->prefetch_target = -1;
}
if (u_sess->storage_cxt.target_prefetch_pages > 0) {
node->prefetch_iterator = prefetch_iterator = tbm_begin_iterate(tbm);
node->prefetch_pages = 0;
node->prefetch_target = -1;
}
#endif
} else {
/*
* The leader will immediately come out of the function, but
* others will be blocked until leader populates the TBM and wakes
* them up.
*/
if (BitmapShouldInitializeSharedState(pstate)) {
tbm = (TIDBitmap *)MultiExecProcNode(outerPlanState(node));
if (tbm == NULL || !IsA(tbm, TIDBitmap)) {
ereport(ERROR,
(errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
errmodule(MOD_EXECUTOR),
errmsg("unrecognized result from subplan for BitmapHeapScan.")));
}
node->tbm = tbm;
/*
* Prepare to iterate over the TBM. This will return the
* dsa_pointer of the iterator state which will be used by
* multiple processes to iterate jointly.
*/
pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
#ifdef USE_PREFETCH
if (u_sess->storage_cxt.target_prefetch_pages > 0) {
pstate->prefetch_iterator = tbm_prepare_shared_iterate(tbm);
/*
* We don't need the mutex here as we haven't yet woke up
* others.
*/
pstate->prefetch_pages = 0;
pstate->prefetch_target = -1;
}
#endif
/* We have initialized the shared state so wake up others. */
BitmapDoneInitializingSharedState(pstate);
}
/* Allocate a private iterator and attach the shared state to it */
node->shared_tbmiterator = shared_tbmiterator = tbm_attach_shared_iterate(pstate->tbmiterator);
node->tbmres = tbmres = NULL;
#ifdef USE_PREFETCH
if (u_sess->storage_cxt.target_prefetch_pages > 0) {
node->shared_prefetch_iterator = tbm_attach_shared_iterate(pstate->prefetch_iterator);
shared_prefetch_it = node->shared_prefetch_iterator;
}
#endif
}
node->initialized = true;
}
for (;;) {
@ -178,37 +350,28 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
* Get next page of results if needed
*/
if (tbmres == NULL) {
node->tbmres = tbmres = tbm_iterate(tbmiterator);
if (pstate == NULL) {
node->tbmres = tbmres = tbm_iterate(tbmiterator);
} else {
node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
}
if (tbmres == NULL) {
/* no more entries in the bitmap */
break;
}
#ifdef USE_PREFETCH
if (node->prefetch_pages > 0) {
/* The main iterator has closed the distance by one page */
node->prefetch_pages--;
} else if (prefetch_iterator != NULL) {
/* Do not let the prefetch iterator get behind the main one */
TBMIterateResult* tbmpre = tbm_iterate(prefetch_iterator);
if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno) {
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmodule(MOD_EXECUTOR),
errmsg("prefetch and main iterators are out of sync for BitmapHeapScan.")));
}
}
BitmapAdjustPrefetchIterator(node, tbmres, prefetch_iterator, shared_prefetch_it);
#endif /* USE_PREFETCH */
/* Check whether switch partition-fake-rel, use rd_rel save */
if (BitmapNodeNeedSwitchPartRel(node)) {
if (pstate == NULL && BitmapNodeNeedSwitchPartRel(node)) {
GPISetCurrPartOid(node->gpi_scan, node->tbmres->partitionOid);
if (!GPIGetNextPartRelation(node->gpi_scan, CurrentMemoryContext, AccessShareLock)) {
/* If the current partition is invalid, the next page is directly processed */
tbmres = NULL;
#ifdef USE_PREFETCH
BitmapHeapPrefetchNext(node, scan, tbm, &prefetch_iterator);
BitmapHeapPrefetchNext(node, scan, tbm, &prefetch_iterator, &shared_prefetch_it);
#endif /* USE_PREFETCH */
continue;
}
@ -256,14 +419,7 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
* page/tuple, then to one after the second tuple is fetched, then
* it doubles as later pages are fetched.
*/
if (node->prefetch_target >= u_sess->storage_cxt.target_prefetch_pages)
/* don't increase any further */;
else if (node->prefetch_target >= u_sess->storage_cxt.target_prefetch_pages / 2)
node->prefetch_target = u_sess->storage_cxt.target_prefetch_pages;
else if (node->prefetch_target > 0)
node->prefetch_target *= 2;
else
node->prefetch_target++;
BitmapAdjustPrefetchTarget(node);
#endif /* USE_PREFETCH */
} else {
/*
@ -277,8 +433,18 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
* Try to prefetch at least a few pages even before we get to the
* second page if we don't stop reading after the first tuple.
*/
if (node->prefetch_target < u_sess->storage_cxt.target_prefetch_pages)
node->prefetch_target++;
if (pstate == NULL) {
if (node->prefetch_target < u_sess->storage_cxt.target_prefetch_pages) {
node->prefetch_target++;
}
} else if (pstate->prefetch_target < u_sess->storage_cxt.target_prefetch_pages) {
/* take spinlock while updating shared state */
(void)pthread_mutex_lock(&pstate->cv_mtx);
if (pstate->prefetch_target < u_sess->storage_cxt.target_prefetch_pages) {
pstate->prefetch_target++;
}
(void)pthread_mutex_unlock(&pstate->cv_mtx);
}
#endif /* USE_PREFETCH */
}
@ -291,7 +457,7 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
}
#ifdef USE_PREFETCH
BitmapHeapPrefetchNext(node, scan, tbm, &prefetch_iterator);
BitmapHeapPrefetchNext(node, scan, tbm, &prefetch_iterator, &shared_prefetch_it);
#endif /* USE_PREFETCH */
/*
@ -343,6 +509,20 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
return ExecClearTuple(slot);
}
/*
* BitmapDoneInitializingSharedState - Shared state is initialized
*
* By this time the leader has already populated the TBM and initialized the
* shared state so wake up other processes.
*/
static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
{
(void)pthread_mutex_lock(&pstate->cv_mtx);
pstate->state = BM_FINISHED;
(void)pthread_cond_broadcast(&pstate->cv);
(void)pthread_mutex_unlock(&pstate->cv_mtx);
}
/*
* bitgetpage - subroutine for BitmapHeapNext()
*
@ -624,6 +804,11 @@ BitmapHeapScanState* ExecInitBitmapHeapScan(BitmapHeapScan* node, EState* estate
scanstate->ss.isPartTbl = node->scan.isPartTbl;
scanstate->ss.currentSlot = 0;
scanstate->ss.partScanDirection = node->scan.partScanDirection;
scanstate->pscan_len = 0;
scanstate->initialized = false;
scanstate->shared_tbmiterator = NULL;
scanstate->shared_prefetch_iterator = NULL;
scanstate->pstate = NULL;
/* initilize Global partition index scan information */
GPIScanInit(&scanstate->gpi_scan);
@ -802,14 +987,18 @@ static void ExecInitPartitionForBitmapHeapScan(BitmapHeapScanState* scanstate, E
* to do on the current page, else we may uselessly prefetch the same
* page we are just about to request for real.
*/
void BitmapHeapPrefetchNext(
BitmapHeapScanState* node, HeapScanDesc scan, const TIDBitmap* tbm, TBMIterator** prefetch_iterator)
void BitmapHeapPrefetchNext(BitmapHeapScanState* node, HeapScanDesc scan, const TIDBitmap* tbm,
TBMIterator** prefetch_iterator, TBMSharedIterator** shared_prefetch_it)
{
if (*prefetch_iterator == NULL) {
ParallelBitmapHeapState *pstate = node->pstate;
if ((pstate == NULL && *prefetch_iterator == NULL) ||
(pstate != NULL && *shared_prefetch_it == NULL)) {
return;
}
ADIO_RUN()
{
Assert(shared_prefetch_it == NULL);
BlockNumber* blockList = NULL;
BlockNumber* blockListPtr = NULL;
PrefetchNode* prefetchNode = NULL;
@ -889,32 +1078,204 @@ void BitmapHeapPrefetchNext(
}
ADIO_ELSE()
{
Oid oldOid = GPIGetCurrPartOid(node->gpi_scan);
while (node->prefetch_pages < node->prefetch_target) {
TBMIterateResult* tbmpre = tbm_iterate(*prefetch_iterator);
Relation prefetchRel = scan->rs_rd;
if (tbmpre == NULL) {
/* No more pages to prefetch */
tbm_end_iterate(*prefetch_iterator);
node->prefetch_iterator = *prefetch_iterator = NULL;
break;
if (pstate == NULL) {
Oid oldOid = GPIGetCurrPartOid(node->gpi_scan);
while (node->prefetch_pages < node->prefetch_target) {
TBMIterateResult* tbmpre = tbm_iterate(*prefetch_iterator);
Relation prefetchRel = scan->rs_rd;
if (tbmpre == NULL) {
/* No more pages to prefetch */
tbm_end_iterate(*prefetch_iterator);
node->prefetch_iterator = *prefetch_iterator = NULL;
break;
}
node->prefetch_pages++;
if (tbm_is_global(node->tbm) && GPIScanCheckPartOid(node->gpi_scan, tbmpre->partitionOid)) {
GPISetCurrPartOid(node->gpi_scan, tbmpre->partitionOid);
if (!GPIGetNextPartRelation(node->gpi_scan, CurrentMemoryContext, AccessShareLock)) {
/* If the current partition is invalid, the next page is directly processed */
tbmpre = NULL;
continue;
} else {
prefetchRel = node->gpi_scan->fakePartRelation;
}
}
/* For posix_fadvise() we just send the one request */
PrefetchBuffer(prefetchRel, MAIN_FORKNUM, tbmpre->blockno);
}
node->prefetch_pages++;
if (tbm_is_global(node->tbm) && GPIScanCheckPartOid(node->gpi_scan, tbmpre->partitionOid)) {
GPISetCurrPartOid(node->gpi_scan, tbmpre->partitionOid);
if (!GPIGetNextPartRelation(node->gpi_scan, CurrentMemoryContext, AccessShareLock)) {
/* If the current partition is invalid, the next page is directly processed */
tbmpre = NULL;
continue;
} else {
prefetchRel = node->gpi_scan->fakePartRelation;
/* recover old oid after prefetch switch */
GPISetCurrPartOid(node->gpi_scan, oldOid);
} else if (pstate->prefetch_pages < pstate->prefetch_target) {
if (*shared_prefetch_it != NULL) {
while (1) {
bool do_prefetch = false;
/*
* Recheck under the mutex. If some other process has already
* done enough prefetching then we need not to do anything.
*/
(void)pthread_mutex_lock(&pstate->cv_mtx);
if (pstate->prefetch_pages < pstate->prefetch_target) {
pstate->prefetch_pages++;
do_prefetch = true;
}
(void)pthread_mutex_unlock(&pstate->cv_mtx);
if (!do_prefetch) {
return;
}
TBMIterateResult *tbmpre = tbm_shared_iterate(*shared_prefetch_it);
if (tbmpre == NULL) {
/* No more pages to prefetch */
tbm_end_shared_iterate(*shared_prefetch_it);
node->shared_prefetch_iterator = *shared_prefetch_it = NULL;
break;
}
PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
}
}
/* For posix_fadvise() we just send the one request */
PrefetchBuffer(prefetchRel, MAIN_FORKNUM, tbmpre->blockno);
}
/* recover old oid after prefetch switch */
GPISetCurrPartOid(node->gpi_scan, oldOid);
}
ADIO_END();
}
/* ----------------
* BitmapShouldInitializeSharedState
*
* The first process to come here and see the state to the BM_INITIAL
* will become the leader for the parallel bitmap scan and will be
* responsible for populating the TIDBitmap. The other processes will
* be blocked by the condition variable until the leader wakes them up.
* ---------------
*/
static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
{
SharedBitmapState state;
(void)pthread_mutex_lock(&pstate->cv_mtx);
while (1) {
CHECK_FOR_INTERRUPTS();
state = pstate->state;
if (pstate->state == BM_INITIAL) {
pstate->state = BM_INPROGRESS;
}
/* Exit if bitmap is done, or if we're the leader. */
if (state != BM_INPROGRESS) {
break;
}
/*
* Use pthread_cond_timedwait here in case of worker exit in error cases, and call
* CHECK_FOR_INTERRUPTS to handle the error msg from worker.
*/
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
ts.tv_sec += WAIT_BITMAP_INIT_TIMEOUT;
(void)pthread_cond_timedwait(&pstate->cv, &pstate->cv_mtx, &ts);
}
(void)pthread_mutex_unlock(&pstate->cv_mtx);
return (state == BM_INITIAL);
}
/* ----------------------------------------------------------------
* ExecBitmapHeapEstimate
*
* Compute the amount of space we'll need in the parallel
* query DSM, and inform pcxt->estimator about our needs.
* ----------------------------------------------------------------
*/
void ExecBitmapHeapEstimate(BitmapHeapScanState *node, ParallelContext *pcxt)
{
EState *estate = node->ss.ps.state;
node->pscan_len =
add_size(offsetof(ParallelBitmapHeapState, phs_snapshot_data), EstimateSnapshotSpace(estate->es_snapshot));
}
/* ----------------------------------------------------------------
* ExecBitmapHeapInitializeDSM
*
* Set up a parallel bitmap heap scan descriptor.
* ----------------------------------------------------------------
*/
void ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt, int nodeid)
{
knl_u_parallel_context *cxt = (knl_u_parallel_context *)pcxt->seg;
EState *estate = node->ss.ps.state;
ParallelBitmapHeapState *pstate = (ParallelBitmapHeapState*)MemoryContextAllocZero(cxt->memCtx,
node->pscan_len);
pstate->tbmiterator = NULL;
pstate->prefetch_iterator = NULL;
pstate->prefetch_pages = 0;
pstate->prefetch_target = 0;
pstate->state = BM_INITIAL;
pstate->plan_node_id = node->ss.ps.plan->plan_node_id;
pstate->pscan_len = node->pscan_len;
(void)pthread_cond_init(&pstate->cv, NULL);
(void)pthread_mutex_init(&pstate->cv_mtx, NULL);
SerializeSnapshot(estate->es_snapshot, pstate->phs_snapshot_data,
node->pscan_len - offsetof(ParallelBitmapHeapState, phs_snapshot_data));
cxt->pwCtx->queryInfo.bmscan[nodeid] = pstate;
node->pstate = pstate;
}
/* ----------------------------------------------------------------
* ExecBitmapHeapReInitializeDSM
*
* Reset shared state before beginning a fresh scan.
* ----------------------------------------------------------------
*/
void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
{
ParallelBitmapHeapState *pstate = node->pstate;
/* If there's no DSA, there are no workers; do nothing. */
if (t_thrd.bgworker_cxt.memCxt == NULL) {
return;
}
pstate->state = BM_INITIAL;
if (pstate->tbmiterator != NULL) {
tbm_free_shared_area(pstate->tbmiterator);
}
if (pstate->prefetch_iterator != NULL) {
tbm_free_shared_area(pstate->prefetch_iterator);
}
pstate->tbmiterator = NULL;
pstate->prefetch_iterator = NULL;
}
/* ----------------------------------------------------------------
* ExecBitmapHeapInitializeWorker
*
* Copy relevant information from TOC into planstate.
* ----------------------------------------------------------------
*/
void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, void *context)
{
Assert(t_thrd.bgworker_cxt.memCxt != NULL);
ParallelBitmapHeapState *pstate = NULL;
knl_u_parallel_context *cxt = (knl_u_parallel_context *)context;
for (int i = 0; i < cxt->pwCtx->queryInfo.bmscan_num; i++) {
if (node->ss.ps.plan->plan_node_id == cxt->pwCtx->queryInfo.bmscan[i]->plan_node_id) {
pstate = cxt->pwCtx->queryInfo.bmscan[i];
break;
}
}
if (pstate == NULL) {
ereport(ERROR, (errmsg("could not find plan info, plan node id:%d", node->ss.ps.plan->plan_node_id)));
}
node->pstate = pstate;
Snapshot snapshot = RestoreSnapshot(pstate->phs_snapshot_data,
pstate->pscan_len - offsetof(ParallelBitmapHeapState, phs_snapshot_data));
heap_scan_update_snapshot((HeapScanDesc)node->ss.ss_currentScanDesc, snapshot);
}

View File

@ -91,7 +91,8 @@ Node* MultiExecBitmapIndexScan(BitmapIndexScanState* node)
node->biss_result = NULL; /* reset for next time */
} else {
/* XXX should we use less than u_sess->attr.attr_memory.work_mem for this? */
tbm = tbm_create(u_sess->attr.attr_memory.work_mem * 1024L);
tbm = tbm_create(u_sess->attr.attr_memory.work_mem * 1024L,
((BitmapIndexScan *) node->ss.ps.plan)->isshared ? t_thrd.bgworker_cxt.memCxt : NULL);
/* If bitmapscan uses global partition index, set tbm to global */
if (RelationIsGlobalIndex(node->biss_RelationDesc)) {

View File

@ -125,7 +125,8 @@ Node* MultiExecBitmapOr(BitmapOrState* node)
/* first subplan */
if (result == NULL) {
/* XXX should we use less than u_sess->attr.attr_memory.work_mem for this? */
result = tbm_create(u_sess->attr.attr_memory.work_mem * 1024L);
result = tbm_create(u_sess->attr.attr_memory.work_mem * 1024L,
((BitmapOr *) node->ps.plan)->isshared ? t_thrd.bgworker_cxt.memCxt : NULL);
/* If bitmapscan uses global partition index, set tbm to global */
if (RelationIsGlobalIndex(((BitmapIndexScanState*)subnode)->biss_RelationDesc)) {
tbm_set_global(result, true);

View File

@ -2047,14 +2047,13 @@ bool ExecParallelScanHashBucket(HashJoinState* hjstate, ExprContext* econtext)
return true;
}
}
/*
* For right Semi/Anti join, we delete mathced tuples in HashTable to make next matching faster,
* so pointer hj_PreTuple is designed to follow the hj_CurTuple and to help us to clear the HashTable.
*/
if (hjstate->js.jointype == JOIN_RIGHT_SEMI || hjstate->js.jointype == JOIN_RIGHT_ANTI) {
hjstate->hj_PreTuple = hashTuple;
}
/*
* We don't support parallel right Semi/Anti join, so we don't set hj_PreTuple like ExecScanHashBucket
* did. Check ExecScanHashBucket and hash_inner_and_outer.
*/
Assert(hjstate->js.jointype != JOIN_RIGHT_SEMI);
Assert(hjstate->js.jointype != JOIN_RIGHT_ANTI);
hashTuple = ExecParallelHashNextTuple(hashtable, hashTuple);
}

View File

@ -1597,7 +1597,6 @@ void ExecHashJoinInitializeDSM(HashJoinState* state, ParallelContext* pcxt, int
/* Initialize the shared state in the hash node. */
hashNode = (HashState*)innerPlanState(state);
hashNode->parallel_state = pstate;
t_thrd.bgworker_cxt.memCxt = cxt->memCtx;
}
/* ----------------------------------------------------------------
@ -1669,5 +1668,4 @@ void ExecHashJoinInitializeWorker(HashJoinState* state, void* pwcxt)
hashNode = (HashState*)innerPlanState(state);
hashNode->parallel_state = pstate;
state->isParallel = true;
t_thrd.bgworker_cxt.memCxt = cxt->memCtx;
}

View File

@ -135,7 +135,7 @@ static bool collectMatchBitmap(GinBtreeData* btree, GinBtreeStack* stack, GinSca
/* Initialize empty bitmap result */
if (!isColStore) {
scanEntry->matchBitmap = tbm_create(u_sess->attr.attr_memory.work_mem * 1024L);
scanEntry->matchBitmap = tbm_create(u_sess->attr.attr_memory.work_mem * 1024L, NULL);
}
/* Null query cannot partial-match anything */

View File

@ -1857,6 +1857,15 @@ HeapTuple heapGetNextForVerify(HeapScanDesc scan, ScanDirection direction, bool&
return &(scan->rs_ctup);
}
void heap_scan_update_snapshot(HeapScanDesc scan, Snapshot snapshot)
{
Assert(IsMVCCSnapshot(snapshot));
RegisterSnapshot(snapshot);
scan->rs_snapshot = snapshot;
scan->rs_flags |= SO_TEMP_SNAPSHOT;
}
/* ----------------
* heap_parallelscan_estimate - estimate storage for ParallelHeapScanDesc
*

View File

@ -918,6 +918,7 @@ void ParallelWorkerMain(Datum main_arg)
/* Set flag to indicate that we're initializing a parallel worker. */
t_thrd.bgworker_cxt.InitializingParallelWorker = true;
t_thrd.bgworker_cxt.memCxt = ctx->memCtx;
/* Establish signal handlers. */
gspqsignal(SIGTERM, die);

View File

@ -104,6 +104,8 @@ void *dsm_create(void)
u_sess->parallel_ctx[i].used = true;
slist_init(&u_sess->parallel_ctx[i].on_detach);
t_thrd.bgworker_cxt.memCxt = u_sess->parallel_ctx[i].memCtx;
return &(u_sess->parallel_ctx[i]);
}
}

View File

@ -158,7 +158,8 @@ static const char *BuiltinTrancheNames[] = {
"PLdebugger",
"SharedTupleStore",
"parallel_append",
"ParallelHashJoinLock"
"ParallelHashJoinLock",
"TidBitMapLock"
};
static void RegisterLWLockTranches(void);

View File

@ -101,6 +101,10 @@ extern void heapgetpage(HeapScanDesc scan, BlockNumber page);
extern void heap_rescan(HeapScanDesc scan, ScanKey key);
extern void heap_endscan(HeapScanDesc scan);
extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
/*
* Update snapshot used by the scan.
*/
extern void heap_scan_update_snapshot(HeapScanDesc scan, Snapshot snapshot);
extern Size heap_parallelscan_estimate(Snapshot snapshot);
extern void heap_parallelscan_initialize(ParallelHeapScanDesc target, Size pscan_len, Relation relation,

View File

@ -15,10 +15,15 @@
#define NODEBITMAPHEAPSCAN_H
#include "nodes/execnodes.h"
#include "access/parallel.h"
extern BitmapHeapScanState* ExecInitBitmapHeapScan(BitmapHeapScan* node, EState* estate, int eflags);
extern TupleTableSlot* ExecBitmapHeapScan(BitmapHeapScanState* node);
extern void ExecEndBitmapHeapScan(BitmapHeapScanState* node);
extern void ExecReScanBitmapHeapScan(BitmapHeapScanState* node);
extern void ExecBitmapHeapEstimate(BitmapHeapScanState *node, ParallelContext *pcxt);
extern void ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt, int nodeid);
extern void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt);
extern void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, void *context);
#endif /* NODEBITMAPHEAPSCAN_H */

View File

@ -2059,6 +2059,7 @@ struct ParallelHeapScanDescData;
struct ParallelIndexScanDescData;
struct ParallelHashJoinState;
struct SharedHashInfo;
struct ParallelBitmapHeapState;
typedef uint64 XLogRecPtr;
typedef struct ParallelQueryInfo {
struct SharedExecutorInstrumentation* instrumentation;
@ -2079,6 +2080,8 @@ typedef struct ParallelQueryInfo {
ParallelHashJoinState** jstate;
int hash_num;
SharedHashInfo** shared_info;
int bmscan_num;
ParallelBitmapHeapState **bmscan;
} ParallelQueryInfo;
struct BTShared;

View File

@ -21,6 +21,7 @@
#include "executor/instrument.h"
#include "nodes/params.h"
#include "nodes/plannodes.h"
#include "nodes/tidbitmap.h"
#include "storage/pagecompress.h"
#include "utils/bloom_filter.h"
#include "utils/reltrigger.h"
@ -1714,15 +1715,65 @@ typedef struct BitmapIndexScanState {
} BitmapIndexScanState;
/* ----------------
* BitmapHeapScanState information
* SharedBitmapState information
*
* bitmapqualorig execution state for bitmapqualorig expressions
* tbm bitmap obtained from child index scan(s)
* tbmiterator iterator for scanning current pages
* tbmres current-page data
* prefetch_iterator iterator for prefetching ahead of current page
* prefetch_pages # pages prefetch iterator is ahead of current
* prefetch_target target prefetch distance
* BM_INITIAL TIDBitmap creation is not yet started, so first worker
* to see this state will set the state to BM_INPROGRESS
* and that process will be responsible for creating
* TIDBitmap.
* BM_INPROGRESS TIDBitmap creation is in progress; workers need to
* sleep until it's finished.
* BM_FINISHED TIDBitmap creation is done, so now all workers can
* proceed to iterate over TIDBitmap.
* ----------------
*/
typedef enum {
BM_INITIAL,
BM_INPROGRESS,
BM_FINISHED
} SharedBitmapState;
/* ----------------
* ParallelBitmapHeapState information
* tbmiterator iterator for scanning current pages
* prefetch_iterator iterator for prefetching ahead of current page
* mutex mutual exclusion for the prefetching variable
* and state
* prefetch_pages # pages prefetch iterator is ahead of current
* prefetch_target current target prefetch distance
* state current state of the TIDBitmap
* cv conditional wait variable
* phs_snapshot_data snapshot data shared to workers
* ----------------
*/
typedef struct ParallelBitmapHeapState {
TBMSharedIteratorState *tbmiterator;
TBMSharedIteratorState *prefetch_iterator;
Size pscan_len;
int prefetch_pages;
int prefetch_target;
int plan_node_id;
SharedBitmapState state;
pthread_mutex_t cv_mtx;
pthread_cond_t cv;
char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
} ParallelBitmapHeapState;
/* ----------------
* BitmapHeapScanState information
*
* bitmapqualorig execution state for bitmapqualorig expressions
* tbm bitmap obtained from child index scan(s)
* tbmiterator iterator for scanning current pages
* tbmres current-page data
* prefetch_iterator iterator for prefetching ahead of current page
* prefetch_pages # pages prefetch iterator is ahead of current
* prefetch_target target prefetch distance
* pscan_len size of the shared memory for parallel bitmap
* initialized is node is ready to iterate
* shared_tbmiterator shared iterator
* shared_prefetch_iterator shared iterator for prefetching
* pstate shared state for parallel bitmap scan
* ----------------
*/
typedef struct BitmapHeapScanState {
@ -1735,6 +1786,11 @@ typedef struct BitmapHeapScanState {
int prefetch_pages;
int prefetch_target;
GPIScanDesc gpi_scan; /* global partition index scan use information */
Size pscan_len;
bool initialized;
TBMSharedIterator *shared_tbmiterator;
TBMSharedIterator *shared_prefetch_iterator;
ParallelBitmapHeapState *pstate;
} BitmapHeapScanState;
/* ----------------

View File

@ -507,6 +507,7 @@ typedef struct BitmapAnd {
*/
typedef struct BitmapOr {
Plan plan;
bool isshared;
List* bitmapplans;
} BitmapOr;
@ -683,6 +684,7 @@ typedef struct IndexOnlyScan {
typedef struct BitmapIndexScan {
Scan scan;
Oid indexid; /* OID of index to scan */
bool isshared;
char* indexname; /* name of index to scan */
List* indexqual; /* list of index quals (OpExprs) */
List* indexqualorig; /* the same in original form */

View File

@ -32,6 +32,8 @@ typedef struct TIDBitmap TIDBitmap;
/* Likewise, TBMIterator is private */
typedef struct TBMIterator TBMIterator;
typedef struct TBMSharedIterator TBMSharedIterator;
typedef struct TBMSharedIteratorState TBMSharedIteratorState;
/* Result structure for tbm_iterate */
typedef struct {
@ -44,8 +46,9 @@ typedef struct {
} TBMIterateResult;
/* function prototypes in nodes/tidbitmap.c */
extern TIDBitmap* tbm_create(long maxbytes);
extern TIDBitmap* tbm_create(long maxbytes, MemoryContext dsa);
extern void tbm_free(TIDBitmap* tbm);
extern void tbm_free_shared_area(TBMSharedIteratorState *istate);
extern void tbm_add_tuples(
TIDBitmap* tbm, const ItemPointer tids, int ntids, bool recheck, Oid partitionOid = InvalidOid);
@ -57,8 +60,12 @@ extern void tbm_intersect(TIDBitmap* a, const TIDBitmap* b);
extern bool tbm_is_empty(const TIDBitmap* tbm);
extern TBMIterator* tbm_begin_iterate(TIDBitmap* tbm);
extern TBMSharedIteratorState* tbm_prepare_shared_iterate(TIDBitmap *tbm);
extern TBMIterateResult* tbm_iterate(TBMIterator* iterator);
extern TBMIterateResult* tbm_shared_iterate(TBMSharedIterator *iterator);
extern void tbm_end_iterate(TBMIterator* iterator);
extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
extern TBMSharedIterator *tbm_attach_shared_iterate(TBMSharedIteratorState* istate);
extern bool tbm_is_global(const TIDBitmap* tbm);
extern void tbm_set_global(TIDBitmap* tbm, bool isGlobal);
#endif /* TIDBITMAP_H */

View File

@ -179,6 +179,8 @@ extern double estimate_hash_num_distinct(PlannerInfo* root, List* hashkey, Path*
double local_ndistinct, double global_ndistinct, bool* usesinglestats);
extern RelOptInfo* find_join_input_rel(PlannerInfo* root, Relids relids);
extern double compute_sort_disk_cost(double input_bytes, double sort_mem_bytes);
extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel,
Path *bitmapqual, double loop_count, Cost *cost, double *tuple, bool ispartitionedindex);
extern double approx_tuple_count(PlannerInfo* root, JoinPath* path, List* quals);
extern void set_rel_path_rows(Path* path, RelOptInfo* rel, ParamPathInfo* param_info);

View File

@ -66,8 +66,8 @@ extern bool CheckBitmapHeapPathContainGlobalOrLocal(Path* bitmapqual);
extern bool check_bitmap_heap_path_index_unusable(Path* bitmapqual, RelOptInfo* baserel);
extern bool is_partitionIndex_Subpath(Path* subpath);
extern bool is_pwj_path(Path* pwjpath);
extern BitmapHeapPath* create_bitmap_heap_path(
PlannerInfo* root, RelOptInfo* rel, Path* bitmapqual, Relids required_outer, double loop_count);
extern BitmapHeapPath *create_bitmap_heap_path(PlannerInfo *root, RelOptInfo *rel, Path *bitmapqual,
Relids required_outer, double loop_count, int parallel_degree);
extern BitmapAndPath* create_bitmap_and_path(PlannerInfo* root, RelOptInfo* rel, List* bitmapquals);
extern BitmapOrPath* create_bitmap_or_path(PlannerInfo* root, RelOptInfo* rel, List* bitmapquals);
extern TidPath* create_tidscan_path(PlannerInfo* root, RelOptInfo* rel, List* tidquals);

View File

@ -21,6 +21,7 @@ extern RelOptInfo* standard_join_search(PlannerInfo* root, int levels_needed, Li
extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel);
extern int compute_parallel_worker(const RelOptInfo *rel, double heap_pages, double index_pages, int max_workers);
extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel, Path *bitmapqual);
extern void set_rel_size(PlannerInfo* root, RelOptInfo* rel, Index rti, RangeTblEntry* rte);

View File

@ -146,6 +146,7 @@ enum BuiltinTrancheIds {
LWTRANCHE_SHARED_TUPLESTORE,
LWTRANCHE_PARALLEL_APPEND,
LWTRANCHE_PARALLEL_HASH_JOIN,
LWTRANCHE_TBM,
/*
* Each trancheId above should have a corresponding item in BuiltinTrancheNames;
*/

View File

@ -82,18 +82,24 @@ explain (costs off) select * from a where a1 > 4 union all select * from b where
(10 rows)
explain (costs off) select * from c where c1 in (select a1 from a union select b1 from b);
QUERY PLAN
---------------------------------
QUERY PLAN
------------------------------------------------
Hash Join
Hash Cond: (a.a1 = c.c1)
-> HashAggregate
Group By Key: a.a1
-> Append
-> Seq Scan on a
-> Seq Scan on b
-> Gather
Number of Workers: 1
-> Parallel Seq Scan on a
-> Gather
Number of Workers: 1
-> Parallel Seq Scan on b
-> Hash
-> Seq Scan on c
(9 rows)
-> Gather
Number of Workers: 1
-> Parallel Seq Scan on c
(15 rows)
explain (costs off) select * from (select * from a union all select * from b) as ta, c where ta.a1 = c.c1;
QUERY PLAN

View File

@ -72,14 +72,539 @@ select count(*) from parallel_t1 where a <> 5000;
99999
(1 row)
--normal plan for bitmapscan
create index idx_parallel_t1 on parallel_t1(a);
analyze parallel_t1;
set enable_seqscan to off;
set enable_indexscan to off;
explain (costs off) select count(*) from parallel_t1 where a > 5000;
QUERY PLAN
--------------------------------------------------
Aggregate
-> Bitmap Heap Scan on parallel_t1
Recheck Cond: (a > 5000)
-> Bitmap Index Scan on idx_parallel_t1
Index Cond: (a > 5000)
(5 rows)
explain (costs off) select count(*) from parallel_t1 where a < 5000;
QUERY PLAN
--------------------------------------------------
Aggregate
-> Bitmap Heap Scan on parallel_t1
Recheck Cond: (a < 5000)
-> Bitmap Index Scan on idx_parallel_t1
Index Cond: (a < 5000)
(5 rows)
explain (costs off) select count(*) from parallel_t1 where a <> 5000;
QUERY PLAN
--------------------------------------------------
Aggregate
-> Bitmap Heap Scan on parallel_t1
Filter: (a <> 5000)
-> Bitmap Index Scan on idx_parallel_t1
(4 rows)
select count(*) from parallel_t1 where a > 5000;
count
-------
95000
(1 row)
select count(*) from parallel_t1 where a < 5000;
count
-------
4999
(1 row)
select count(*) from parallel_t1 where a <> 5000;
count
-------
99999
(1 row)
reset enable_seqscan;
reset enable_indexscan;
--set parallel parameter
set force_parallel_mode=on;
set parallel_setup_cost=0;
set parallel_tuple_cost=0.000005;
set max_parallel_workers_per_gather=2;
set min_parallel_table_scan_size=0;
set min_parallel_index_scan_size=0;
set parallel_leader_participation=on;
--rescan case
create table test_with_rescan(dm int, sj_dm int, name text);
insert into test_with_rescan values(1,0,'universe');
insert into test_with_rescan values(2,1,'galaxy');
insert into test_with_rescan values(3,2,'sun');
insert into test_with_rescan values(4,3,'earth');
insert into test_with_rescan values(5,4,'asia');
insert into test_with_rescan values(6,5,'China');
insert into test_with_rescan values(7,6,'shaanxi');
insert into test_with_rescan values(8,7,'xian');
insert into test_with_rescan values(9,8,'huawei');
insert into test_with_rescan values(10,9,'v10');
insert into test_with_rescan values(11,10,'v10-3L');
insert into test_with_rescan values(12,11,'gauss');
insert into test_with_rescan values(13,12,'test');
insert into test_with_rescan values(14,13,'test');
insert into test_with_rescan values(15,14,'test');
insert into test_with_rescan values(16,15,'test');
insert into test_with_rescan values(17,16,'test');
insert into test_with_rescan values(18,17,'test');
insert into test_with_rescan values(19,18,'test');
insert into test_with_rescan values(20,19,'test');
create index on test_with_rescan(dm);
create index on test_with_rescan(sj_dm);
create index on test_with_rescan(name);
analyze test_with_rescan;
explain (costs off)
WITH recursive t_result AS (
select * from(
SELECT dm,sj_dm,name,1 as level
FROM test_with_rescan
WHERE sj_dm < 10 order by dm limit 6 offset 2)
UNION all
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
FROM t_result t1
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
)
SELECT *
FROM t_result t;
QUERY PLAN
--------------------------------------------------------------------------
CTE Scan on t_result t
CTE t_result
-> Recursive Union
-> Limit
-> Sort
Sort Key: test_with_rescan.dm
-> Gather
Number of Workers: 1
-> Parallel Seq Scan on test_with_rescan
Filter: (sj_dm < 10)
-> Hash Join
Hash Cond: (t1.dm = t2.sj_dm)
-> WorkTable Scan on t_result t1
-> Hash
-> Gather
Number of Workers: 1
-> Parallel Seq Scan on test_with_rescan t2
(17 rows)
WITH recursive t_result AS (
select * from(
SELECT dm,sj_dm,name,1 as level
FROM test_with_rescan
WHERE sj_dm < 10 order by dm limit 6 offset 2)
UNION all
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
FROM t_result t1
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
)
SELECT *
FROM t_result t order by 1,2,3,4;
dm | sj_dm | name | level
----+-------+-------------------------------------------------------------------------------------------------------------------------------------+-------
3 | 2 | sun | 1
4 | 3 | earth | 1
4 | 3 | earth > sun | 2
5 | 4 | asia | 1
5 | 4 | asia > earth | 2
5 | 4 | asia > earth > sun | 3
6 | 5 | China | 1
6 | 5 | China > asia | 2
6 | 5 | China > asia > earth | 3
6 | 5 | China > asia > earth > sun | 4
7 | 6 | shaanxi | 1
7 | 6 | shaanxi > China | 2
7 | 6 | shaanxi > China > asia | 3
7 | 6 | shaanxi > China > asia > earth | 4
7 | 6 | shaanxi > China > asia > earth > sun | 5
8 | 7 | xian | 1
8 | 7 | xian > shaanxi | 2
8 | 7 | xian > shaanxi > China | 3
8 | 7 | xian > shaanxi > China > asia | 4
8 | 7 | xian > shaanxi > China > asia > earth | 5
8 | 7 | xian > shaanxi > China > asia > earth > sun | 6
9 | 8 | huawei > xian | 2
9 | 8 | huawei > xian > shaanxi | 3
9 | 8 | huawei > xian > shaanxi > China | 4
9 | 8 | huawei > xian > shaanxi > China > asia | 5
9 | 8 | huawei > xian > shaanxi > China > asia > earth | 6
9 | 8 | huawei > xian > shaanxi > China > asia > earth > sun | 7
10 | 9 | v10 > huawei > xian | 3
10 | 9 | v10 > huawei > xian > shaanxi | 4
10 | 9 | v10 > huawei > xian > shaanxi > China | 5
10 | 9 | v10 > huawei > xian > shaanxi > China > asia | 6
10 | 9 | v10 > huawei > xian > shaanxi > China > asia > earth | 7
10 | 9 | v10 > huawei > xian > shaanxi > China > asia > earth > sun | 8
11 | 10 | v10-3L > v10 > huawei > xian | 4
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi | 5
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China | 6
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia | 7
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 8
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 9
12 | 11 | gauss > v10-3L > v10 > huawei > xian | 5
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi | 6
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 7
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 8
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 9
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 10
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian | 6
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 7
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 8
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 9
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 10
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 11
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian | 7
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 8
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 9
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 10
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 11
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 12
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian | 8
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 9
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 10
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 11
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 12
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 13
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 9
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 10
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 11
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 12
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 13
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 14
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 10
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 11
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 12
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 13
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 14
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 15
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 11
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 12
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 13
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 14
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 15
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 16
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 12
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 13
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 14
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 15
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 16
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 17
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 13
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 14
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 15
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 16
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 17
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 18
(93 rows)
--increate cpu_tuple_cost, disable seqscan, test parallel index scan
set enable_seqscan=off;
set cpu_tuple_cost=1000;
explain (costs off)
WITH recursive t_result AS (
select * from(
SELECT dm,sj_dm,name,1 as level
FROM test_with_rescan
WHERE sj_dm < 10 order by dm limit 6 offset 2)
UNION all
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
FROM t_result t1
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
)
SELECT *
FROM t_result t;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------
CTE Scan on t_result t
CTE t_result
-> Recursive Union
-> Limit
-> Sort
Sort Key: test_with_rescan.dm
-> Gather
Number of Workers: 1
-> Parallel Index Scan using test_with_rescan_sj_dm_idx on test_with_rescan
Index Cond: (sj_dm < 10)
-> Merge Join
Merge Cond: (t1.dm = t2.sj_dm)
-> Sort
Sort Key: t1.dm
-> WorkTable Scan on t_result t1
-> Sort
Sort Key: t2.sj_dm
-> Gather
Number of Workers: 1
-> Parallel Index Scan using test_with_rescan_sj_dm_idx on test_with_rescan t2
(20 rows)
WITH recursive t_result AS (
select * from(
SELECT dm,sj_dm,name,1 as level
FROM test_with_rescan
WHERE sj_dm < 10 order by dm limit 6 offset 2)
UNION all
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
FROM t_result t1
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
)
SELECT *
FROM t_result t order by 1,2,3,4;
dm | sj_dm | name | level
----+-------+-------------------------------------------------------------------------------------------------------------------------------------+-------
3 | 2 | sun | 1
4 | 3 | earth | 1
4 | 3 | earth > sun | 2
5 | 4 | asia | 1
5 | 4 | asia > earth | 2
5 | 4 | asia > earth > sun | 3
6 | 5 | China | 1
6 | 5 | China > asia | 2
6 | 5 | China > asia > earth | 3
6 | 5 | China > asia > earth > sun | 4
7 | 6 | shaanxi | 1
7 | 6 | shaanxi > China | 2
7 | 6 | shaanxi > China > asia | 3
7 | 6 | shaanxi > China > asia > earth | 4
7 | 6 | shaanxi > China > asia > earth > sun | 5
8 | 7 | xian | 1
8 | 7 | xian > shaanxi | 2
8 | 7 | xian > shaanxi > China | 3
8 | 7 | xian > shaanxi > China > asia | 4
8 | 7 | xian > shaanxi > China > asia > earth | 5
8 | 7 | xian > shaanxi > China > asia > earth > sun | 6
9 | 8 | huawei > xian | 2
9 | 8 | huawei > xian > shaanxi | 3
9 | 8 | huawei > xian > shaanxi > China | 4
9 | 8 | huawei > xian > shaanxi > China > asia | 5
9 | 8 | huawei > xian > shaanxi > China > asia > earth | 6
9 | 8 | huawei > xian > shaanxi > China > asia > earth > sun | 7
10 | 9 | v10 > huawei > xian | 3
10 | 9 | v10 > huawei > xian > shaanxi | 4
10 | 9 | v10 > huawei > xian > shaanxi > China | 5
10 | 9 | v10 > huawei > xian > shaanxi > China > asia | 6
10 | 9 | v10 > huawei > xian > shaanxi > China > asia > earth | 7
10 | 9 | v10 > huawei > xian > shaanxi > China > asia > earth > sun | 8
11 | 10 | v10-3L > v10 > huawei > xian | 4
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi | 5
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China | 6
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia | 7
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 8
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 9
12 | 11 | gauss > v10-3L > v10 > huawei > xian | 5
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi | 6
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 7
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 8
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 9
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 10
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian | 6
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 7
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 8
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 9
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 10
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 11
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian | 7
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 8
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 9
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 10
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 11
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 12
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian | 8
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 9
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 10
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 11
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 12
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 13
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 9
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 10
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 11
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 12
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 13
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 14
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 10
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 11
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 12
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 13
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 14
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 15
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 11
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 12
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 13
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 14
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 15
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 16
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 12
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 13
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 14
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 15
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 16
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 17
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 13
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 14
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 15
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 16
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 17
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 18
(93 rows)
--disable indexscan, test parallel bitmap scan
set enable_indexscan to off;
explain (costs off)
WITH recursive t_result AS (
select * from(
SELECT dm,sj_dm,name,1 as level
FROM test_with_rescan
WHERE sj_dm < 10 order by dm limit 6 offset 2)
UNION all
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
FROM t_result t1
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
)
SELECT *
FROM t_result t;
QUERY PLAN
---------------------------------------------------------------------------------------
CTE Scan on t_result t
CTE t_result
-> Recursive Union
-> Limit
-> Sort
Sort Key: test_with_rescan.dm
-> Gather
Number of Workers: 1
-> Parallel Bitmap Heap Scan on test_with_rescan
Recheck Cond: (sj_dm < 10)
-> Bitmap Index Scan on test_with_rescan_sj_dm_idx
Index Cond: (sj_dm < 10)
-> Nested Loop
-> WorkTable Scan on t_result t1
-> Bitmap Heap Scan on test_with_rescan t2
Recheck Cond: (sj_dm = t1.dm)
-> Bitmap Index Scan on test_with_rescan_sj_dm_idx
Index Cond: (sj_dm = t1.dm)
(18 rows)
WITH recursive t_result AS (
select * from(
SELECT dm,sj_dm,name,1 as level
FROM test_with_rescan
WHERE sj_dm < 10 order by dm limit 6 offset 2)
UNION all
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
FROM t_result t1
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
)
SELECT *
FROM t_result t order by 1,2,3,4;
dm | sj_dm | name | level
----+-------+-------------------------------------------------------------------------------------------------------------------------------------+-------
3 | 2 | sun | 1
4 | 3 | earth | 1
4 | 3 | earth > sun | 2
5 | 4 | asia | 1
5 | 4 | asia > earth | 2
5 | 4 | asia > earth > sun | 3
6 | 5 | China | 1
6 | 5 | China > asia | 2
6 | 5 | China > asia > earth | 3
6 | 5 | China > asia > earth > sun | 4
7 | 6 | shaanxi | 1
7 | 6 | shaanxi > China | 2
7 | 6 | shaanxi > China > asia | 3
7 | 6 | shaanxi > China > asia > earth | 4
7 | 6 | shaanxi > China > asia > earth > sun | 5
8 | 7 | xian | 1
8 | 7 | xian > shaanxi | 2
8 | 7 | xian > shaanxi > China | 3
8 | 7 | xian > shaanxi > China > asia | 4
8 | 7 | xian > shaanxi > China > asia > earth | 5
8 | 7 | xian > shaanxi > China > asia > earth > sun | 6
9 | 8 | huawei > xian | 2
9 | 8 | huawei > xian > shaanxi | 3
9 | 8 | huawei > xian > shaanxi > China | 4
9 | 8 | huawei > xian > shaanxi > China > asia | 5
9 | 8 | huawei > xian > shaanxi > China > asia > earth | 6
9 | 8 | huawei > xian > shaanxi > China > asia > earth > sun | 7
10 | 9 | v10 > huawei > xian | 3
10 | 9 | v10 > huawei > xian > shaanxi | 4
10 | 9 | v10 > huawei > xian > shaanxi > China | 5
10 | 9 | v10 > huawei > xian > shaanxi > China > asia | 6
10 | 9 | v10 > huawei > xian > shaanxi > China > asia > earth | 7
10 | 9 | v10 > huawei > xian > shaanxi > China > asia > earth > sun | 8
11 | 10 | v10-3L > v10 > huawei > xian | 4
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi | 5
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China | 6
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia | 7
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 8
11 | 10 | v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 9
12 | 11 | gauss > v10-3L > v10 > huawei > xian | 5
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi | 6
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 7
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 8
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 9
12 | 11 | gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 10
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian | 6
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 7
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 8
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 9
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 10
13 | 12 | test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 11
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian | 7
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 8
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 9
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 10
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 11
14 | 13 | test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 12
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian | 8
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 9
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 10
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 11
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 12
15 | 14 | test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 13
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 9
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 10
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 11
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 12
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 13
16 | 15 | test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 14
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 10
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 11
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 12
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 13
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 14
17 | 16 | test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 15
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 11
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 12
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 13
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 14
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 15
18 | 17 | test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 16
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 12
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 13
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 14
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 15
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 16
19 | 18 | test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 17
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian | 13
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi | 14
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China | 15
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia | 16
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth | 17
20 | 19 | test > test > test > test > test > test > test > test > gauss > v10-3L > v10 > huawei > xian > shaanxi > China > asia > earth > sun | 18
(93 rows)
drop table test_with_rescan;
reset enable_seqscan;
reset enable_indexscan;
reset cpu_tuple_cost;
--parallel plan for seq scan
set enable_bitmapscan to off;
set enable_indexscan to off;
explain (costs off) select count(*) from parallel_t1;
QUERY PLAN
----------------------------------------------
@ -174,6 +699,100 @@ explain (costs off,analyse on,verbose on) select count(*) from parallel_t1;
--? Total runtime: [0-9]*\.[0-9]*\.\.[0-9]*\.[0-9]* ms
(10 rows)
reset enable_indexscan;
reset enable_bitmapscan;
--parallel plan for bitmap scan
--onepage case
CREATE TABLE onepage1 (val int, val2 int);
ALTER TABLE onepage1 ADD PRIMARY KEY(val, val2);
NOTICE: ALTER TABLE / ADD PRIMARY KEY will create implicit index "onepage1_pkey" for table "onepage1"
insert into onepage1 (select * from generate_series(1, 5) a, generate_series(1, 5) b);
CREATE TABLE onepage2 as select * from onepage1;
explain select * from onepage2 natural join onepage1 where onepage2.val > 2 and onepage1.val < 4;
QUERY PLAN
-----------------------------------------------------------------------------------------------
Gather (cost=15.04..32.69 rows=1 width=8)
Number of Workers: 2
-> Parallel Hash Join (cost=15.04..32.69 rows=1 width=8)
Hash Cond: ((onepage2.val = onepage1.val) AND (onepage2.val2 = onepage1.val2))
-> Parallel Seq Scan on onepage2 (cost=0.00..17.60 rows=5 width=8)
Filter: ((val > 2) AND (val < 4))
-> Parallel Hash (cost=14.97..14.97 rows=5 width=8)
-> Parallel Bitmap Heap Scan on onepage1 (cost=4.36..14.97 rows=5 width=8)
Recheck Cond: ((val < 4) AND (val > 2))
-> Bitmap Index Scan on onepage1_pkey (cost=0.00..4.36 rows=11 width=0)
Index Cond: ((val < 4) AND (val > 2))
(11 rows)
select * from onepage2 natural join onepage1 where onepage2.val > 2 and onepage1.val < 4 order by 1,2;
val | val2
-----+------
3 | 1
3 | 2
3 | 3
3 | 4
3 | 5
(5 rows)
drop table onepage1;
drop table onepage2;
set enable_seqscan to off;
set enable_indexscan to off;
explain (costs off) select count(*) from parallel_t1 where a > 5000;
QUERY PLAN
--------------------------------------------------------
Aggregate
-> Gather
Number of Workers: 2
-> Parallel Bitmap Heap Scan on parallel_t1
Recheck Cond: (a > 5000)
-> Bitmap Index Scan on idx_parallel_t1
Index Cond: (a > 5000)
(7 rows)
explain (costs off) select count(*) from parallel_t1 where a < 5000;
QUERY PLAN
--------------------------------------------------------
Aggregate
-> Gather
Number of Workers: 2
-> Parallel Bitmap Heap Scan on parallel_t1
Recheck Cond: (a < 5000)
-> Bitmap Index Scan on idx_parallel_t1
Index Cond: (a < 5000)
(7 rows)
explain (costs off) select count(*) from parallel_t1 where a <> 5000;
QUERY PLAN
--------------------------------------------------------
Aggregate
-> Gather
Number of Workers: 2
-> Parallel Bitmap Heap Scan on parallel_t1
Filter: (a <> 5000)
-> Bitmap Index Scan on idx_parallel_t1
(6 rows)
select count(*) from parallel_t1 where a > 5000;
count
-------
95000
(1 row)
select count(*) from parallel_t1 where a < 5000;
count
-------
4999
(1 row)
select count(*) from parallel_t1 where a <> 5000;
count
-------
99999
(1 row)
reset enable_seqscan;
reset enable_indexscan;
--clean up
reset force_parallel_mode;
reset parallel_setup_cost;
@ -442,6 +1061,57 @@ select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
reset enable_hashjoin;
reset enable_nestloop;
reset enable_indexscan;
--parallel with subplan
drop table subplan_tb1;
ERROR: table "subplan_tb1" does not exist
create table subplan_tb1(a int, b varchar);
insert into subplan_tb1 values (0, NULL);
insert into subplan_tb1 values (1, NULL);
insert into subplan_tb1 values (2, NULL);
insert into subplan_tb1 values (3, NULL);
explain (verbose,costs off) select boo_1.a
from subplan_tb1 boo_1 inner join subplan_tb1 boo_2
on ( case when boo_1.a in (
select boo_3.a from subplan_tb1 boo_3 ) then boo_1.a end ) in
(select max( boo.a ) column_009 from subplan_tb1 boo);
QUERY PLAN
---------------------------------------------------------------------------------------------------
Merge Join
Output: boo_1.a
Merge Cond: ((max(boo.a)) = (CASE WHEN (hashed SubPlan 1) THEN boo_1.a ELSE NULL::integer END))
-> Sort
Output: (max(boo.a))
Sort Key: (max(boo.a))
-> Nested Loop
Output: (max(boo.a))
-> Aggregate
Output: max(boo.a)
-> Gather
Output: boo.a
Number of Workers: 2
-> Parallel Seq Scan on public.subplan_tb1 boo
Output: boo.a
-> Gather
Output: boo_2.a, boo_2.b
Number of Workers: 2
-> Parallel Seq Scan on public.subplan_tb1 boo_2
Output: boo_2.a, boo_2.b
-> Sort
Output: boo_1.a, (CASE WHEN (hashed SubPlan 1) THEN boo_1.a ELSE NULL::integer END)
Sort Key: (CASE WHEN (hashed SubPlan 1) THEN boo_1.a ELSE NULL::integer END)
-> Gather
Output: boo_1.a, CASE WHEN (hashed SubPlan 1) THEN boo_1.a ELSE NULL::integer END
Number of Workers: 2
-> Parallel Seq Scan on public.subplan_tb1 boo_1
Output: boo_1.a
SubPlan 1
-> Gather
Output: boo_3.a
Number of Workers: 2
-> Parallel Seq Scan on public.subplan_tb1 boo_3
Output: boo_3.a
(34 rows)
--clean up
drop table parallel_t1;
drop table parallel_t2;

View File

@ -29,8 +29,10 @@ test: misc
test: stats
test: alter_system_set
# parallel query
test: parallel_query parallel_nested_loop parallel_hashjoin parallel_append parallel_create_index
# parallel query, don't put more than 2 parallel query testcases into one test group
test: parallel_query parallel_nested_loop
test: parallel_hashjoin parallel_append
test: parallel_create_index
#dispatch from 13
test: function

View File

@ -26,5 +26,7 @@ test: upsert_grammer_test_02 upsert_restriction upsert_composite
test: upsert_trigger_test upsert_explain
test: upsert_clean
# test parallel query
test: parallel_query parallel_nested_loop parallel_hashjoin parallel_append parallel_create_index
# test parallel query, don't put more than 2 parallel query testcases into one test group
test: parallel_query parallel_nested_loop
test: parallel_hashjoin parallel_append
test: parallel_create_index

View File

@ -14,15 +14,149 @@ select count(*) from parallel_t1 where a > 5000;
select count(*) from parallel_t1 where a < 5000;
select count(*) from parallel_t1 where a <> 5000;
--normal plan for bitmapscan
create index idx_parallel_t1 on parallel_t1(a);
analyze parallel_t1;
set enable_seqscan to off;
set enable_indexscan to off;
explain (costs off) select count(*) from parallel_t1 where a > 5000;
explain (costs off) select count(*) from parallel_t1 where a < 5000;
explain (costs off) select count(*) from parallel_t1 where a <> 5000;
select count(*) from parallel_t1 where a > 5000;
select count(*) from parallel_t1 where a < 5000;
select count(*) from parallel_t1 where a <> 5000;
reset enable_seqscan;
reset enable_indexscan;
--set parallel parameter
set force_parallel_mode=on;
set parallel_setup_cost=0;
set parallel_tuple_cost=0.000005;
set max_parallel_workers_per_gather=2;
set min_parallel_table_scan_size=0;
set min_parallel_index_scan_size=0;
set parallel_leader_participation=on;
--rescan case
create table test_with_rescan(dm int, sj_dm int, name text);
insert into test_with_rescan values(1,0,'universe');
insert into test_with_rescan values(2,1,'galaxy');
insert into test_with_rescan values(3,2,'sun');
insert into test_with_rescan values(4,3,'earth');
insert into test_with_rescan values(5,4,'asia');
insert into test_with_rescan values(6,5,'China');
insert into test_with_rescan values(7,6,'shaanxi');
insert into test_with_rescan values(8,7,'xian');
insert into test_with_rescan values(9,8,'huawei');
insert into test_with_rescan values(10,9,'v10');
insert into test_with_rescan values(11,10,'v10-3L');
insert into test_with_rescan values(12,11,'gauss');
insert into test_with_rescan values(13,12,'test');
insert into test_with_rescan values(14,13,'test');
insert into test_with_rescan values(15,14,'test');
insert into test_with_rescan values(16,15,'test');
insert into test_with_rescan values(17,16,'test');
insert into test_with_rescan values(18,17,'test');
insert into test_with_rescan values(19,18,'test');
insert into test_with_rescan values(20,19,'test');
create index on test_with_rescan(dm);
create index on test_with_rescan(sj_dm);
create index on test_with_rescan(name);
analyze test_with_rescan;
explain (costs off)
WITH recursive t_result AS (
select * from(
SELECT dm,sj_dm,name,1 as level
FROM test_with_rescan
WHERE sj_dm < 10 order by dm limit 6 offset 2)
UNION all
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
FROM t_result t1
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
)
SELECT *
FROM t_result t;
WITH recursive t_result AS (
select * from(
SELECT dm,sj_dm,name,1 as level
FROM test_with_rescan
WHERE sj_dm < 10 order by dm limit 6 offset 2)
UNION all
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
FROM t_result t1
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
)
SELECT *
FROM t_result t order by 1,2,3,4;
--increate cpu_tuple_cost, disable seqscan, test parallel index scan
set enable_seqscan=off;
set cpu_tuple_cost=1000;
explain (costs off)
WITH recursive t_result AS (
select * from(
SELECT dm,sj_dm,name,1 as level
FROM test_with_rescan
WHERE sj_dm < 10 order by dm limit 6 offset 2)
UNION all
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
FROM t_result t1
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
)
SELECT *
FROM t_result t;
WITH recursive t_result AS (
select * from(
SELECT dm,sj_dm,name,1 as level
FROM test_with_rescan
WHERE sj_dm < 10 order by dm limit 6 offset 2)
UNION all
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
FROM t_result t1
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
)
SELECT *
FROM t_result t order by 1,2,3,4;
--disable indexscan, test parallel bitmap scan
set enable_indexscan to off;
explain (costs off)
WITH recursive t_result AS (
select * from(
SELECT dm,sj_dm,name,1 as level
FROM test_with_rescan
WHERE sj_dm < 10 order by dm limit 6 offset 2)
UNION all
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
FROM t_result t1
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
)
SELECT *
FROM t_result t;
WITH recursive t_result AS (
select * from(
SELECT dm,sj_dm,name,1 as level
FROM test_with_rescan
WHERE sj_dm < 10 order by dm limit 6 offset 2)
UNION all
SELECT t2.dm,t2.sj_dm,t2.name||' > '||t1.name,t1.level+1
FROM t_result t1
JOIN test_with_rescan t2 ON t2.sj_dm = t1.dm
)
SELECT *
FROM t_result t order by 1,2,3,4;
drop table test_with_rescan;
reset enable_seqscan;
reset enable_indexscan;
reset cpu_tuple_cost;
--parallel plan for seq scan
set enable_bitmapscan to off;
set enable_indexscan to off;
explain (costs off) select count(*) from parallel_t1;
explain (costs off) select count(*) from parallel_t1 where a = 5000;
explain (costs off) select count(*) from parallel_t1 where a > 5000;
@ -34,6 +168,29 @@ select count(*) from parallel_t1 where a > 5000;
select count(*) from parallel_t1 where a < 5000;
select count(*) from parallel_t1 where a <> 5000;
explain (costs off,analyse on,verbose on) select count(*) from parallel_t1;
reset enable_indexscan;
reset enable_bitmapscan;
--parallel plan for bitmap scan
--onepage case
CREATE TABLE onepage1 (val int, val2 int);
ALTER TABLE onepage1 ADD PRIMARY KEY(val, val2);
insert into onepage1 (select * from generate_series(1, 5) a, generate_series(1, 5) b);
CREATE TABLE onepage2 as select * from onepage1;
explain select * from onepage2 natural join onepage1 where onepage2.val > 2 and onepage1.val < 4;
select * from onepage2 natural join onepage1 where onepage2.val > 2 and onepage1.val < 4 order by 1,2;
drop table onepage1;
drop table onepage2;
set enable_seqscan to off;
set enable_indexscan to off;
explain (costs off) select count(*) from parallel_t1 where a > 5000;
explain (costs off) select count(*) from parallel_t1 where a < 5000;
explain (costs off) select count(*) from parallel_t1 where a <> 5000;
select count(*) from parallel_t1 where a > 5000;
select count(*) from parallel_t1 where a < 5000;
select count(*) from parallel_t1 where a <> 5000;
reset enable_seqscan;
reset enable_indexscan;
--clean up
reset force_parallel_mode;
@ -121,6 +278,20 @@ reset enable_hashjoin;
reset enable_nestloop;
reset enable_indexscan;
--parallel with subplan
drop table subplan_tb1;
create table subplan_tb1(a int, b varchar);
insert into subplan_tb1 values (0, NULL);
insert into subplan_tb1 values (1, NULL);
insert into subplan_tb1 values (2, NULL);
insert into subplan_tb1 values (3, NULL);
explain (verbose,costs off) select boo_1.a
from subplan_tb1 boo_1 inner join subplan_tb1 boo_2
on ( case when boo_1.a in (
select boo_3.a from subplan_tb1 boo_3 ) then boo_1.a end ) in
(select max( boo.a ) column_009 from subplan_tb1 boo);
--clean up
drop table parallel_t1;
drop table parallel_t2;