MOT code refactoring

This commit is contained in:
Vinoth Veeraraghavan 2020-12-16 12:24:39 +08:00
parent 088fefe2a8
commit 11a1a2ffc7
29 changed files with 2297 additions and 1774 deletions

View File

@ -60,7 +60,7 @@ bool OccTransactionManager::Init()
return result;
}
bool OccTransactionManager::QuickVersionCheck(const Access* access)
bool OccTransactionManager::CheckVersion(const Access* access)
{
// We always validate on committed rows!
const Row* row = access->GetRowFromHeader();
@ -71,7 +71,7 @@ bool OccTransactionManager::QuickHeaderValidation(const Access* access)
{
if (access->m_type != INS) {
// For WR/DEL/RD_FOR_UPDATE lets verify CSN
return QuickVersionCheck(access);
return CheckVersion(access);
} else {
// Lets verify the inserts
// For upgrade we verify the row
@ -157,59 +157,70 @@ RC OccTransactionManager::LockRows(TxnManager* txMan, uint32_t& numRowsLock)
return rc;
}
RC OccTransactionManager::LockHeaders(TxnManager* txMan, uint32_t& numSentinelsLock)
bool OccTransactionManager::LockHeadersNoWait(TxnManager* txMan, uint32_t& numSentinelsLock)
{
RC rc = RC_OK;
uint64_t sleepTime = 1;
uint64_t thdId = txMan->GetThdId();
TxnOrderedSet_t& orderedSet = txMan->m_accessMgr->GetOrderedRowSet();
numSentinelsLock = 0;
if (m_validationNoWait) {
while (numSentinelsLock != m_writeSetSize) {
for (const auto& raPair : orderedSet) {
const Access* ac = raPair.second;
if (ac->m_type == RD) {
continue;
}
Sentinel* sent = ac->m_origSentinel;
if (!sent->TryLock(thdId)) {
break;
}
numSentinelsLock++;
if (ac->m_params.IsPrimaryUpgrade()) {
ac->m_auxRow->m_rowHeader.Lock();
}
// New insert row is already committed!
// Check if row has changed in sentinel
if (!QuickHeaderValidation(ac)) {
rc = RC_ABORT;
goto final;
}
while (numSentinelsLock != m_writeSetSize) {
for (const auto& raPair : orderedSet) {
const Access* ac = raPair.second;
if (ac->m_type == RD) {
continue;
}
Sentinel* sent = ac->m_origSentinel;
if (!sent->TryLock(thdId)) {
break;
}
numSentinelsLock++;
if (ac->m_params.IsPrimaryUpgrade()) {
ac->m_auxRow->m_rowHeader.Lock();
}
// New insert row is already committed!
// Check if row has changed in sentinel
if (!QuickHeaderValidation(ac)) {
return false;
}
}
if (numSentinelsLock != m_writeSetSize) {
ReleaseHeaderLocks(txMan, numSentinelsLock);
numSentinelsLock = 0;
if (m_preAbort) {
for (const auto& acPair : orderedSet) {
const Access* ac = acPair.second;
if (!QuickHeaderValidation(ac)) {
return RC_ABORT;
}
if (numSentinelsLock != m_writeSetSize) {
ReleaseHeaderLocks(txMan, numSentinelsLock);
numSentinelsLock = 0;
if (m_preAbort) {
for (const auto& acPair : orderedSet) {
const Access* ac = acPair.second;
if (!QuickHeaderValidation(ac)) {
return false;
}
}
if (sleepTime > LOCK_TIME_OUT) {
rc = RC_ABORT;
goto final;
} else {
if (IsHighContention() == false) {
CpuCyclesLevelTime::Sleep(5);
} else {
usleep(m_dynamicSleep);
}
sleepTime = sleepTime << 1;
}
}
if (sleepTime > LOCK_TIME_OUT) {
return false;
} else {
if (IsHighContention() == false) {
CpuCyclesLevelTime::Sleep(5);
} else {
usleep(m_dynamicSleep);
}
sleepTime = sleepTime << 1;
}
}
}
return true;
}
RC OccTransactionManager::LockHeaders(TxnManager* txMan, uint32_t& numSentinelsLock)
{
RC rc = RC_OK;
uint64_t thdId = txMan->GetThdId();
TxnOrderedSet_t& orderedSet = txMan->m_accessMgr->GetOrderedRowSet();
numSentinelsLock = 0;
if (m_validationNoWait) {
if (!LockHeadersNoWait(txMan, numSentinelsLock)) {
rc = RC_ABORT;
goto final;
}
} else {
for (const auto& raPair : orderedSet) {
@ -223,8 +234,8 @@ RC OccTransactionManager::LockHeaders(TxnManager* txMan, uint32_t& numSentinelsL
if (ac->m_params.IsPrimaryUpgrade()) {
ac->m_auxRow->m_rowHeader.Lock();
}
// New insert row is already commited!
// Check if row has chainged in sentinel
// New insert row is already committed!
// Check if row has chained in sentinel
if (!QuickHeaderValidation(ac)) {
rc = RC_ABORT;
goto final;
@ -235,30 +246,34 @@ final:
return rc;
}
RC OccTransactionManager::ValidateOcc(TxnManager* txMan)
bool OccTransactionManager::PreAllocStableRow(TxnManager* txMan)
{
uint32_t numSentinelLock = 0;
m_rowsLocked = false;
int isolationLevel = txMan->GetTxnIsoLevel();
TxnAccess* tx = txMan->m_accessMgr.Get();
RC rc = RC_OK;
const uint32_t rowCount = tx->m_rowCnt;
if (GetGlobalConfiguration().m_enableCheckpoint) {
GetCheckpointManager()->BeginCommit(txMan);
m_writeSetSize = 0;
m_rowsSetSize = 0;
m_deleteSetSize = 0;
m_insertSetSize = 0;
m_txnCounter++;
if (rowCount == 0) {
// READONLY
return rc;
TxnOrderedSet_t& orderedSet = txMan->m_accessMgr->GetOrderedRowSet();
for (const auto& raPair : orderedSet) {
const Access* access = raPair.second;
if (access->m_type == RD) {
continue;
}
if (access->m_params.IsPrimarySentinel()) {
if (!GetCheckpointManager()->PreAllocStableRow(txMan, access->GetRowFromHeader(), access->m_type)) {
GetCheckpointManager()->FreePreAllocStableRows(txMan);
GetCheckpointManager()->EndCommit(txMan);
return false;
}
}
}
}
return true;
}
uint32_t readSetSize = 0;
TxnOrderedSet_t& orderedSet = tx->GetOrderedRowSet();
MOT_ASSERT(rowCount == orderedSet.size());
/* 1.Perform Quick Version check */
bool OccTransactionManager::QuickVersionCheck(TxnManager* txMan, uint32_t& readSetSize)
{
int isolationLevel = txMan->GetTxnIsoLevel();
TxnOrderedSet_t& orderedSet = txMan->m_accessMgr->GetOrderedRowSet();
readSetSize = 0;
for (const auto& raPair : orderedSet) {
const Access* ac = raPair.second;
if (ac->m_params.IsPrimarySentinel()) {
@ -290,11 +305,41 @@ RC OccTransactionManager::ValidateOcc(TxnManager* txMan)
if (m_preAbort) {
if (!QuickHeaderValidation(ac)) {
rc = RC_ABORT;
goto final;
return false;
}
}
}
return true;
}
RC OccTransactionManager::ValidateOcc(TxnManager* txMan)
{
uint32_t numSentinelLock = 0;
m_rowsLocked = false;
TxnAccess* tx = txMan->m_accessMgr.Get();
RC rc = RC_OK;
const uint32_t rowCount = tx->m_rowCnt;
m_writeSetSize = 0;
m_rowsSetSize = 0;
m_deleteSetSize = 0;
m_insertSetSize = 0;
m_txnCounter++;
if (rowCount == 0) {
// READONLY
return rc;
}
uint32_t readSetSize = 0;
TxnOrderedSet_t& orderedSet = tx->GetOrderedRowSet();
MOT_ASSERT(rowCount == orderedSet.size());
/* Perform Quick Version check */
if (!QuickVersionCheck(txMan, readSetSize)) {
rc = RC_ABORT;
goto final;
}
MOT_LOG_DEBUG("Validate OCC rowCnt=%u RD=%u WR=%u\n", tx->m_rowCnt, tx->m_rowCnt - m_writeSetSize, m_writeSetSize);
rc = LockHeaders(txMan, numSentinelLock);
@ -302,35 +347,23 @@ RC OccTransactionManager::ValidateOcc(TxnManager* txMan)
goto final;
}
// validate rows in the read set and write set
// Validate rows in the read set and write set
if (readSetSize > 0) {
if (!ValidateReadSet(txMan)) {
rc = RC_ABORT;
goto final;
}
}
if (!ValidateWriteSet(txMan)) {
rc = RC_ABORT;
goto final;
}
if (GetGlobalConfiguration().m_enableCheckpoint) {
GetCheckpointManager()->BeginCommit(txMan);
for (const auto& raPair : orderedSet) {
const Access* access = raPair.second;
if (access->m_type == RD) {
continue;
}
if (access->m_params.IsPrimarySentinel()) {
if (!GetCheckpointManager()->PreAllocStableRow(txMan, access->GetRowFromHeader(), access->m_type)) {
GetCheckpointManager()->FreePreAllocStableRows(txMan);
GetCheckpointManager()->EndCommit(txMan);
rc = RC_MEMORY_ALLOCATION_ERROR;
goto final;
}
}
}
// Pre-allocate stable row according to the checkpoint state.
if (!PreAllocStableRow(txMan)) {
rc = RC_MEMORY_ALLOCATION_ERROR;
goto final;
}
final:
@ -352,19 +385,10 @@ void OccTransactionManager::RollbackInserts(TxnManager* txMan)
return txMan->UndoInserts();
}
void OccTransactionManager::WriteChanges(TxnManager* txMan)
void OccTransactionManager::ApplyWrite(TxnManager* txMan)
{
if (m_writeSetSize == 0 && m_insertSetSize == 0) {
return;
}
LockRows(txMan, m_rowsSetSize);
MOTConfiguration& cfg = GetGlobalConfiguration();
TxnOrderedSet_t& orderedSet = txMan->m_accessMgr->GetOrderedRowSet();
// Stable rows for checkpoint needs to be created (copied from original row) before modifying the global rows.
if (cfg.m_enableCheckpoint) {
if (GetGlobalConfiguration().m_enableCheckpoint) {
TxnOrderedSet_t& orderedSet = txMan->m_accessMgr->GetOrderedRowSet();
for (const auto& raPair : orderedSet) {
const Access* access = raPair.second;
if (access->m_type == RD) {
@ -377,6 +401,20 @@ void OccTransactionManager::WriteChanges(TxnManager* txMan)
}
}
}
}
void OccTransactionManager::WriteChanges(TxnManager* txMan)
{
if (m_writeSetSize == 0 && m_insertSetSize == 0) {
return;
}
LockRows(txMan, m_rowsSetSize);
// Stable rows for checkpoint needs to be created (copied from original row) before modifying the global rows.
ApplyWrite(txMan);
TxnOrderedSet_t& orderedSet = txMan->m_accessMgr->GetOrderedRowSet();
// Update CSN with all relevant information on global rows
// For deletes invalidate sentinels - rows still locked!

View File

@ -149,22 +149,32 @@ private:
* true, cc may fail during the verification (i.e. it may produce
* false positive reports).
*/
bool QuickVersionCheck(const Access* access);
bool CheckVersion(const Access* access);
/** @brief Validate Header for insert */
/** @brief Validate Header for insert */
bool QuickHeaderValidation(const Access* access);
bool QuickVersionCheck(TxnManager* txMan, uint32_t& readSetSize);
bool LockHeadersNoWait(TxnManager* txMan, uint32_t& numSentinelsLock);
void ReleaseHeaderLocks(TxnManager* txMan, uint32_t numOfLocks);
/** release all the locked rows */
/** @brief Release all the locked rows */
void ReleaseRowsLocks(TxnManager* txMan, uint32_t numOfLocks);
/** @var validate the read set */
/** @brief Validate the read set */
bool ValidateReadSet(TxnManager* txMan);
/** @brief validate the write set */
/** @brief Validate the write set */
bool ValidateWriteSet(TxnManager* txMan);
/** @brief Pre-allocates stable row according to the checkpoint state. */
bool PreAllocStableRow(TxnManager* txMan);
/** @brief Sets stable row according to the checkpoint state. */
void ApplyWrite(TxnManager* txMan);
/** @var transaction counter */
uint32_t m_txnCounter;

View File

@ -56,12 +56,94 @@ static bool ParseCmdLineSectionName(const mot_string& line, mot_string& sectionP
return result;
}
#define REPORT_PARSE_ERROR(errorCode, format, ...) \
do { \
#define CMDLINE_REPORT_PARSE_ERROR_AND_BREAK(errorCode, format, ...) \
{ \
MOT_REPORT_ERROR(errorCode, "Load Configuration", format, ##__VA_ARGS__); \
parseError = true; \
break; \
} while (0);
}
static ConfigSection* GetCmdLineConfigSection(
const mot_string& sectionFullName, mot_list<ConfigSection*>& parsedSections, ConfigSectionMap& sectionMap)
{
mot_string sectionPath;
mot_string sectionName;
ConfigSectionMap::iterator itr = sectionMap.find(sectionFullName);
if (itr == sectionMap.end()) {
if (!ConfigFileParser::BreakSectionName(sectionFullName, sectionPath, sectionName, CMDLINE_SEP)) {
MOT_REPORT_ERROR(MOT_ERROR_INTERNAL, "Load Configuration", "Failed to parse section name");
return nullptr;
}
ConfigSection* currentSection = ConfigSection::CreateConfigSection(sectionPath.c_str(), sectionName.c_str());
if (currentSection == nullptr) {
MOT_REPORT_ERROR(
MOT_ERROR_OOM, "Load Configuration", "Failed to allocate memory for configuration section");
return nullptr;
}
if (!parsedSections.push_back(currentSection)) {
MOT_REPORT_ERROR(MOT_ERROR_OOM, "Load Configuration", "Failed to insert parsed section");
return nullptr;
}
itr = sectionMap.insert(ConfigSectionMap::value_type(sectionFullName, currentSection)).first;
}
return itr->second;
}
static bool AddCmdLineArrayConfigItem(ConfigSection* currentSection, const mot_string& sectionFullName,
const mot_string& key, const mot_string& value, uint64_t arrayIndex)
{
// create array if not created yet
ConfigArray* configArray = currentSection->ModifyConfigArray(key.c_str());
if (configArray == nullptr) {
configArray = ConfigArray::CreateConfigArray(sectionFullName.c_str(), key.c_str());
if (configArray == nullptr) {
MOT_REPORT_ERROR(MOT_ERROR_OOM, "Load Configuration", "Failed to allocate memory for configuration array");
return false;
}
if (!currentSection->AddConfigItem(configArray)) {
MOT_REPORT_ERROR(
MOT_ERROR_OOM, "Load Configuration", "Failed to add configuration array to parent section");
return false;
}
}
// create item and add it to array
if (arrayIndex != configArray->GetConfigItemCount()) {
// array items must be ordered
MOT_REPORT_ERROR(MOT_ERROR_INVALID_CFG,
"Load Configuration",
"Failed to parse command line arguments: array %s items not well-ordered, expecting %u, got %" PRIu64,
configArray->GetName(),
configArray->GetConfigItemCount(),
arrayIndex);
return false;
}
ConfigItem* configItem = ConfigFileParser::MakeArrayConfigValue(sectionFullName, arrayIndex, value);
if (configItem == nullptr) {
MOT_REPORT_ERROR(MOT_ERROR_OOM,
"Load Configuration",
"Failed to create array configuration value from raw value: %s",
value.c_str());
return false;
}
if (!configArray->AddConfigItem(configItem)) {
MOT_REPORT_ERROR(MOT_ERROR_OOM,
"Load Configuration",
"Failed to add %" PRIu64 "th item to configuration array %s",
arrayIndex,
configArray->GetName());
return false;
}
return true;
}
ConfigTree* CmdLineConfigLoader::ParseCmdLine(char** argv, int argc)
{
@ -76,8 +158,6 @@ ConfigTree* CmdLineConfigLoader::ParseCmdLine(char** argv, int argc)
mot_string line;
mot_string sectionFullName;
mot_string keyValuePart;
mot_string sectionPath;
mot_string sectionName;
ConfigSection* currentSection = nullptr;
mot_list<ConfigSection*> parsedSections;
ConfigSectionMap sectionMap;
@ -89,86 +169,49 @@ ConfigTree* CmdLineConfigLoader::ParseCmdLine(char** argv, int argc)
for (int i = 0; i < argc && !parseError; ++i) {
if (!line.assign(argv[i])) {
REPORT_PARSE_ERROR(MOT_ERROR_OOM, "Failed to allocate memory for next command line argument");
CMDLINE_REPORT_PARSE_ERROR_AND_BREAK(
MOT_ERROR_OOM, "Failed to allocate memory for next command line argument");
}
if ((line.length() <= 2) || line[0] != '-' || line[1] != '-') {
MOT_LOG_TRACE("Skipping ill-formed command line argument: %s", argv[i]);
} else {
if (!ParseCmdLineSectionName(line, sectionFullName, keyValuePart)) {
REPORT_PARSE_ERROR(MOT_ERROR_INTERNAL, "Failed to parse environment variable");
} else {
// get the configuration section
ConfigSectionMap::iterator itr = sectionMap.find(sectionFullName);
if (itr == sectionMap.end()) {
if (!ConfigFileParser::BreakSectionName(sectionFullName, sectionPath, sectionName, CMDLINE_SEP)) {
REPORT_PARSE_ERROR(MOT_ERROR_INTERNAL, "Failed to parse section name");
}
currentSection = ConfigSection::CreateConfigSection(sectionPath.c_str(), sectionName.c_str());
if (currentSection == nullptr) {
REPORT_PARSE_ERROR(MOT_ERROR_OOM, "Failed to allocate memory for configuration section");
}
if (!parsedSections.push_back(currentSection)) {
REPORT_PARSE_ERROR(MOT_ERROR_OOM, "Failed to insert parsed section");
}
itr = sectionMap.insert(ConfigSectionMap::value_type(sectionFullName, currentSection)).first;
}
currentSection = itr->second;
continue;
}
// parse the key-value part
if (!ConfigFileParser::ParseKeyValue(
keyValuePart, sectionFullName, key, value, arrayIndex, hasArrayIndex)) {
// key-value line malformed
parseError = true;
} else {
// check for array item
if (!hasArrayIndex) {
ConfigItem* configItem = ConfigFileParser::MakeConfigValue(sectionFullName, key, value);
if (configItem == nullptr) {
REPORT_PARSE_ERROR(MOT_ERROR_INTERNAL,
"Failed to create configuration value from raw key/value: %s/%s",
key.c_str(),
value.c_str());
} else if (!currentSection->AddConfigItem(configItem)) {
REPORT_PARSE_ERROR(MOT_ERROR_OOM, "Failed to add configuration value to parent section");
}
} else {
// create array if not created yet
ConfigArray* configArray = currentSection->ModifyConfigArray(key.c_str());
if (configArray == nullptr) {
configArray = ConfigArray::CreateConfigArray(sectionFullName.c_str(), key.c_str());
if (configArray == nullptr) {
REPORT_PARSE_ERROR(MOT_ERROR_OOM, "Failed to allocate memory for configuration array");
} else if (!currentSection->AddConfigItem(configArray)) {
REPORT_PARSE_ERROR(
MOT_ERROR_OOM, "Failed to add configuration array to parent section");
}
}
// create item and add it to array
if (arrayIndex != configArray->GetConfigItemCount()) {
// array items must be ordered
REPORT_PARSE_ERROR(MOT_ERROR_INVALID_CFG,
"Failed to parse command line arguments: array %s items not well-ordered, "
"expecting %u, got %" PRIu64 " (command line argument: %s)",
configArray->GetName(),
configArray->GetConfigItemCount(),
arrayIndex,
argv[i]);
} else {
ConfigItem* configItem =
ConfigFileParser::MakeArrayConfigValue(sectionFullName, arrayIndex, value);
if (configItem == nullptr) {
REPORT_PARSE_ERROR(MOT_ERROR_OOM,
"Failed to create array configuration value from raw value: %s",
value.c_str());
} else if (!configArray->AddConfigItem(configItem)) {
REPORT_PARSE_ERROR(MOT_ERROR_OOM,
"Failed to add %" PRIu64 "th item to configuration array %s",
arrayIndex,
configArray->GetName());
}
}
}
}
if (!ParseCmdLineSectionName(line, sectionFullName, keyValuePart)) {
CMDLINE_REPORT_PARSE_ERROR_AND_BREAK(MOT_ERROR_INTERNAL, "Failed to parse command line argument");
}
// get the configuration section
currentSection = GetCmdLineConfigSection(sectionFullName, parsedSections, sectionMap);
if (currentSection == nullptr) {
CMDLINE_REPORT_PARSE_ERROR_AND_BREAK(MOT_ERROR_INTERNAL, "Failed to get configuration section");
}
// parse the key-value part
if (!ConfigFileParser::ParseKeyValue(keyValuePart, sectionFullName, key, value, arrayIndex, hasArrayIndex)) {
// key-value line malformed
CMDLINE_REPORT_PARSE_ERROR_AND_BREAK(MOT_ERROR_INTERNAL, "Failed to parse key/value");
}
// check for array item
if (!hasArrayIndex) {
ConfigItem* configItem = ConfigFileParser::MakeConfigValue(sectionFullName, key, value);
if (configItem == nullptr) {
CMDLINE_REPORT_PARSE_ERROR_AND_BREAK(MOT_ERROR_INTERNAL,
"Failed to create configuration value from raw key/value: %s/%s",
key.c_str(),
value.c_str());
} else if (!currentSection->AddConfigItem(configItem)) {
CMDLINE_REPORT_PARSE_ERROR_AND_BREAK(
MOT_ERROR_OOM, "Failed to add configuration value to parent section");
}
} else {
if (!AddCmdLineArrayConfigItem(currentSection, sectionFullName, key, value, arrayIndex)) {
CMDLINE_REPORT_PARSE_ERROR_AND_BREAK(MOT_ERROR_INTERNAL,
"Failed to add array item with arrayIndex %lu (command line argument: %s)",
arrayIndex,
line.c_str());
}
}
}

View File

@ -123,12 +123,12 @@ ConfigItem* ConfigFileParser::MakeConfigValue(
return result;
}
ConfigItem* ConfigFileParser::MakeArrayConfigValue(const mot_string& path, int arrayIndex, const mot_string& value)
ConfigItem* ConfigFileParser::MakeArrayConfigValue(const mot_string& path, uint64_t arrayIndex, const mot_string& value)
{
ConfigItem* result = nullptr;
mot_string key;
if (!key.format("%d", arrayIndex)) {
MOT_REPORT_ERROR(MOT_ERROR_INTERNAL, "Load Configuration", "Failed to format array index %d", arrayIndex);
if (!key.format("%lu", arrayIndex)) {
MOT_REPORT_ERROR(MOT_ERROR_INTERNAL, "Load Configuration", "Failed to format array index %lu", arrayIndex);
} else {
result = MakeConfigValue(path, key, value);
}

View File

@ -77,7 +77,7 @@ public:
* @param value The value of the configuration item.
* @return The resulting configuration item, or null pointer if failed.
*/
static ConfigItem* MakeArrayConfigValue(const mot_string& path, int arrayIndex, const mot_string& value);
static ConfigItem* MakeArrayConfigValue(const mot_string& path, uint64_t arrayIndex, const mot_string& value);
/**
* @brief Creates a signed integer configuration value.

View File

@ -60,12 +60,93 @@ static bool ParsePropsSectionName(const mot_string& line, mot_string& sectionPat
return result;
}
#define REPORT_PARSE_ERROR(errorCode, format, ...) \
do { \
#define PROPS_REPORT_PARSE_ERROR_AND_BREAK(errorCode, format, ...) \
{ \
MOT_REPORT_ERROR(errorCode, "Load Configuration", format, ##__VA_ARGS__); \
parseError = true; \
break; \
} while (0);
}
static ConfigSection* GetPropsConfigSection(const mot_string& sectionFullName, mot_list<ConfigSection*>& parsedSections,
ConfigSectionMap& sectionMap)
{
mot_string sectionPath;
mot_string sectionName;
mot_map<mot_string, ConfigSection*>::iterator itr = sectionMap.find(sectionFullName);
if (itr == sectionMap.end()) {
if (!ConfigFileParser::BreakSectionName(sectionFullName, sectionPath, sectionName)) {
MOT_REPORT_ERROR(MOT_ERROR_INTERNAL, "Load Configuration", "Failed to parse section name");
return nullptr;
}
ConfigSection* currentSection = ConfigSection::CreateConfigSection(sectionPath.c_str(), sectionName.c_str());
if (currentSection == nullptr) {
MOT_REPORT_ERROR(MOT_ERROR_OOM, "Load Configuration", "Failed to allocate configuration section");
return nullptr;
}
if (!parsedSections.push_back(currentSection)) {
MOT_REPORT_ERROR(MOT_ERROR_OOM, "Load Configuration", "Failed to add parsed section");
return nullptr;
}
itr = sectionMap.insert(ConfigSectionMap::value_type(sectionFullName, currentSection)).first;
}
return itr->second;
}
static bool AddPropsArrayConfigItem(const char* configFilePath, const FileLineReader& reader,
const mot_string& line, ConfigSection* currentSection, const mot_string& sectionFullName, const mot_string& key,
const mot_string& value, uint64_t arrayIndex)
{
ConfigArray* configArray = currentSection->ModifyConfigArray(key.c_str());
if (configArray == nullptr) {
configArray = ConfigArray::CreateConfigArray(sectionFullName.c_str(), key.c_str());
if (configArray == nullptr) {
MOT_REPORT_ERROR(MOT_ERROR_OOM, "Load Configuration", "Failed to allocate configuration array");
return false;
}
if (!currentSection->AddConfigItem(configArray)) {
MOT_REPORT_ERROR(
MOT_ERROR_OOM, "Load Configuration", "Failed to add configuration array to parent section");
return false;
}
}
if (arrayIndex != configArray->GetConfigItemCount()) {
// array items must be ordered
MOT_REPORT_ERROR(MOT_ERROR_INVALID_CFG,
"Load Configuration",
"Failed to parse configuration file %s at line %u: %s (array %s items not "
"well-ordered, expecting %u, got %" PRIu64 ")",
configFilePath,
reader.GetLineNumber(),
line.c_str(),
configArray->GetName(),
configArray->GetConfigItemCount(),
arrayIndex);
return false;
}
ConfigItem* configItem = ConfigFileParser::MakeArrayConfigValue(sectionFullName, arrayIndex, value);
if (configItem == nullptr) {
MOT_REPORT_ERROR(MOT_ERROR_INTERNAL, "Load Configuration", "Failed to create configuration array value");
return false;
}
if (!configArray->AddConfigItem(configItem)) {
MOT_REPORT_ERROR(MOT_ERROR_OOM,
"Load Configuration",
"Failed to add %" PRIu64 "th item to configuration array %s",
arrayIndex,
configArray->GetName());
return false;
}
return true;
}
ConfigTree* PropsConfigFileLoader::LoadConfigFile(const char* configFilePath)
{
@ -79,8 +160,6 @@ ConfigTree* PropsConfigFileLoader::LoadConfigFile(const char* configFilePath)
mot_string line;
mot_string sectionFullName;
mot_string keyValuePart;
mot_string sectionPath;
mot_string sectionName;
ConfigSection* currentSection = nullptr;
mot_list<ConfigSection*> parsedSections;
ConfigSectionMap sectionMap;
@ -101,93 +180,59 @@ ConfigTree* PropsConfigFileLoader::LoadConfigFile(const char* configFilePath)
while (!reader.Eof() && !parseError) {
// parse next non-empty line
if (!line.assign(reader.GetLine().c_str())) {
REPORT_PARSE_ERROR(MOT_ERROR_OOM, "Failed to allocate memory for next line");
PROPS_REPORT_PARSE_ERROR_AND_BREAK(MOT_ERROR_OOM, "Failed to allocate memory for next line");
}
line.trim();
if (line.length() && line[0] != '#') {
// break tokens to section and key-value by last separator
MOT_LOG_DEBUG("Parsing config line: %s", line.c_str());
if (!ParsePropsSectionName(line, sectionFullName, keyValuePart)) {
REPORT_PARSE_ERROR(MOT_ERROR_INVALID_CFG,
"Failed to parse configuration file %s at line %d: %s (section name line malformed)",
PROPS_REPORT_PARSE_ERROR_AND_BREAK(MOT_ERROR_INVALID_CFG,
"Failed to parse configuration file %s at line %u: %s (section name line malformed)",
configFilePath,
reader.GetLineNumber(),
line.c_str());
} else {
// get the configuration section
mot_map<mot_string, ConfigSection*>::iterator itr = sectionMap.find(sectionFullName);
if (itr == sectionMap.end()) {
if (!ConfigFileParser::BreakSectionName(sectionFullName, sectionPath, sectionName)) {
REPORT_PARSE_ERROR(MOT_ERROR_INTERNAL, "Failed to parse section name");
}
currentSection = ConfigSection::CreateConfigSection(sectionPath.c_str(), sectionName.c_str());
if (currentSection == nullptr) {
REPORT_PARSE_ERROR(MOT_ERROR_OOM, "Failed to allocate configuration section");
}
if (!parsedSections.push_back(currentSection)) {
REPORT_PARSE_ERROR(MOT_ERROR_INTERNAL, "Failed to add parsed section");
}
itr = sectionMap.insert(ConfigSectionMap::value_type(sectionFullName, currentSection)).first;
}
currentSection = itr->second;
}
// parse the key-value part
if (!ConfigFileParser::ParseKeyValue(
keyValuePart, sectionFullName, key, value, arrayIndex, hasArrayIndex)) {
// key-value line malformed
REPORT_PARSE_ERROR(MOT_ERROR_INVALID_CFG,
"Failed to parse configuration file %s at line %d: %s (key/value malformed)",
// get the configuration section
currentSection = GetPropsConfigSection(sectionFullName, parsedSections, sectionMap);
if (currentSection == nullptr) {
PROPS_REPORT_PARSE_ERROR_AND_BREAK(MOT_ERROR_INTERNAL, "Failed to get configuration section");
}
// parse the key-value part
if (!ConfigFileParser::ParseKeyValue(
keyValuePart, sectionFullName, key, value, arrayIndex, hasArrayIndex)) {
// key-value line malformed
PROPS_REPORT_PARSE_ERROR_AND_BREAK(MOT_ERROR_INVALID_CFG,
"Failed to parse configuration file %s at line %u: %s (key/value malformed)",
configFilePath,
reader.GetLineNumber(),
line.c_str());
}
// check for array item
if (!hasArrayIndex) {
ConfigItem* configItem = ConfigFileParser::MakeConfigValue(sectionFullName, key, value);
if (configItem == nullptr) {
PROPS_REPORT_PARSE_ERROR_AND_BREAK(MOT_ERROR_INTERNAL,
"Failed to parse configuration file %s at line %u: %s (invalid type specification?)",
configFilePath,
reader.GetLineNumber(),
line.c_str());
} else if (!currentSection->AddConfigItem(configItem, true)) {
PROPS_REPORT_PARSE_ERROR_AND_BREAK(
MOT_ERROR_OOM, "Failed to add configuration item to parent section");
}
} else {
if (!AddPropsArrayConfigItem(
configFilePath, reader, line, currentSection, sectionFullName, key, value, arrayIndex)) {
PROPS_REPORT_PARSE_ERROR_AND_BREAK(MOT_ERROR_OOM,
"Failed to add array item with arrayIndex %lu in configuration file %s at line %u: %s",
arrayIndex,
configFilePath,
reader.GetLineNumber(),
line.c_str());
} else {
// check for array item
if (!hasArrayIndex) {
ConfigItem* configItem = ConfigFileParser::MakeConfigValue(sectionFullName, key, value);
if (configItem == nullptr) {
REPORT_PARSE_ERROR(MOT_ERROR_INTERNAL,
"Failed to parse configuration file %s at line %d: %s (invalid type specification?)",
configFilePath,
reader.GetLineNumber(),
line.c_str());
} else if (!currentSection->AddConfigItem(configItem, true)) {
REPORT_PARSE_ERROR(MOT_ERROR_OOM, "Failed to add configuration item to parent section");
}
} else {
ConfigArray* configArray = currentSection->ModifyConfigArray(key.c_str());
if (configArray == nullptr) {
configArray = ConfigArray::CreateConfigArray(sectionFullName.c_str(), key.c_str());
if (configArray == nullptr) {
REPORT_PARSE_ERROR(MOT_ERROR_OOM, "Failed to allocate configuration array");
} else if (!currentSection->AddConfigItem(configArray)) {
REPORT_PARSE_ERROR(
MOT_ERROR_OOM, "Failed to add configuration array to parent section");
}
}
if (arrayIndex != configArray->GetConfigItemCount()) {
// array items must be ordered
REPORT_PARSE_ERROR(MOT_ERROR_INVALID_CFG,
"Failed to parse configuration file %s at line %d: %s (array %s items not "
"well-ordered, expecting %u, got %" PRIu64 ")",
configFilePath,
reader.GetLineNumber(),
line.c_str(),
configArray->GetName(),
configArray->GetConfigItemCount(),
arrayIndex);
} else {
ConfigItem* configItem =
ConfigFileParser::MakeArrayConfigValue(sectionFullName, arrayIndex, value);
if (configItem == nullptr) {
REPORT_PARSE_ERROR(MOT_ERROR_INTERNAL, "Failed to create configuration array value");
} else if (!configArray->AddConfigItem(configItem)) {
REPORT_PARSE_ERROR(MOT_ERROR_OOM,
"Failed to add %" PRIu64 "th item to configuration array %s",
arrayIndex,
configArray->GetName());
}
}
}
}
}
}

View File

@ -741,6 +741,56 @@ static void MakeChunkResident(MemRawChunkHeader* chunk)
}
}
static MemRawChunkHeader* AllocateChunkFromKernel(MemRawChunkPool* chunkPool, size_t allocSize, size_t align)
{
MemRawChunkHeader* chunk = nullptr;
if (chunkPool->m_allocType == MEM_ALLOC_GLOBAL) {
if (g_memGlobalCfg.m_chunkAllocPolicy == MEM_ALLOC_POLICY_LOCAL) {
// allocate from specific local node
chunk = (MemRawChunkHeader*)MemNumaAllocAlignedLocal(allocSize, align, chunkPool->m_node);
} else if (g_memGlobalCfg.m_chunkAllocPolicy == MEM_ALLOC_POLICY_CHUNK_INTERLEAVED) {
// allocate chunk from next node (round robin)
chunk = (MemRawChunkHeader*)NumaAllocInterleavedChunk(allocSize, align);
} else if (g_memGlobalCfg.m_chunkAllocPolicy == MEM_ALLOC_POLICY_PAGE_INTERLEAVED) {
// allocate chunk from all nodes (interleaved on page boundary)
chunk = (MemRawChunkHeader*)MemNumaAllocAlignedGlobal(allocSize, align);
} else if (g_memGlobalCfg.m_chunkAllocPolicy == MEM_ALLOC_POLICY_NATIVE) {
// allocate from kernel using malloc()
int res = posix_memalign((void**)&chunk, align, allocSize);
if (res != 0) {
MOT_REPORT_SYSTEM_ERROR_CODE(
res, posix_memalign, "Chunk Allocation", "Failed to allocate aligned 2MB chunk");
chunk = nullptr;
}
} else {
MOT_REPORT_ERROR(MOT_ERROR_INTERNAL,
"Chunk Allocation",
"Invalid chunk allocation policy: %u",
(unsigned)g_memGlobalCfg.m_chunkAllocPolicy);
return nullptr;
}
if (chunk) {
MemoryStatisticsProvider::m_provider->AddGlobalChunksReserved(allocSize);
}
} else {
if (g_memGlobalCfg.m_chunkAllocPolicy == MEM_ALLOC_POLICY_NATIVE) {
int res = posix_memalign((void**)&chunk, align, allocSize);
if (res != 0) {
MOT_REPORT_SYSTEM_ERROR_CODE(
res, posix_memalign, "Chunk Allocation", "Failed to allocate aligned 2MB chunk");
chunk = nullptr;
}
} else {
chunk = (MemRawChunkHeader*)MemNumaAllocAlignedLocal(allocSize, align, chunkPool->m_node);
}
if (chunk) {
MemoryStatisticsProvider::m_provider->AddLocalChunksReserved(allocSize);
DetailedMemoryStatisticsProvider::m_provider->AddLocalChunksReserved(chunkPool->m_node, allocSize);
}
}
return chunk;
}
static MemRawChunkHeader* AllocateChunk(MemRawChunkPool* chunkPool)
{
if (chunkPool->m_asyncReserveData) {
@ -765,50 +815,7 @@ static MemRawChunkHeader* AllocateChunk(MemRawChunkPool* chunkPool)
"Allocating chunk for chunk pool %s on node %d from kernel", chunkPool->m_poolName, chunkPool->m_node);
size_t allocSize = MEM_CHUNK_SIZE_MB * MEGA_BYTE;
size_t align = allocSize;
if (chunkPool->m_allocType == MEM_ALLOC_GLOBAL) {
if (g_memGlobalCfg.m_chunkAllocPolicy == MEM_ALLOC_POLICY_LOCAL) {
// allocate from specific local node
chunk = (MemRawChunkHeader*)MemNumaAllocAlignedLocal(allocSize, align, chunkPool->m_node);
} else if (g_memGlobalCfg.m_chunkAllocPolicy == MEM_ALLOC_POLICY_CHUNK_INTERLEAVED) {
// allocate chunk from next node (round robin)
chunk = (MemRawChunkHeader*)NumaAllocInterleavedChunk(allocSize, align);
} else if (g_memGlobalCfg.m_chunkAllocPolicy == MEM_ALLOC_POLICY_PAGE_INTERLEAVED) {
// allocate chunk from all nodes (interleaved on page boundary)
chunk = (MemRawChunkHeader*)MemNumaAllocAlignedGlobal(allocSize, align);
} else if (g_memGlobalCfg.m_chunkAllocPolicy == MEM_ALLOC_POLICY_NATIVE) {
// allocate from kernel using malloc()
int res = posix_memalign((void**)&chunk, align, allocSize);
if (res != 0) {
MOT_REPORT_SYSTEM_ERROR_CODE(
res, posix_memalign, "Chunk Allocation", "Failed to allocate aligned 2MB chunk");
chunk = NULL;
}
} else {
MOT_REPORT_ERROR(MOT_ERROR_INTERNAL,
"Chunk Allocation",
"Invalid chunk allocation policy: %u",
(unsigned)g_memGlobalCfg.m_chunkAllocPolicy);
return NULL;
}
if (chunk) {
MemoryStatisticsProvider::m_provider->AddGlobalChunksReserved(allocSize);
}
} else {
if (g_memGlobalCfg.m_chunkAllocPolicy == MEM_ALLOC_POLICY_NATIVE) {
int res = posix_memalign((void**)&chunk, align, allocSize);
if (res != 0) {
MOT_REPORT_SYSTEM_ERROR_CODE(
res, posix_memalign, "Chunk Allocation", "Failed to allocate aligned 2MB chunk");
chunk = NULL;
}
} else {
chunk = (MemRawChunkHeader*)MemNumaAllocAlignedLocal(allocSize, align, chunkPool->m_node);
}
if (chunk) {
MemoryStatisticsProvider::m_provider->AddLocalChunksReserved(allocSize);
DetailedMemoryStatisticsProvider::m_provider->AddLocalChunksReserved(chunkPool->m_node, allocSize);
}
}
chunk = AllocateChunkFromKernel(chunkPool, allocSize, align);
if (chunk) {
if (chunkPool->m_reserveMode == MEM_RESERVE_PHYSICAL) {
MakeChunkResident(chunk);

View File

@ -864,60 +864,117 @@ static bool HasAvailableMemory()
return false;
}
static void EAccessToString(StringBuffer* stringBuffer, int prot, int flags, int fd)
{
struct stat st;
int openMode = -1;
// Check if a file descriptor refers to a non-regular file
// Or MAP_PRIVATE was requested, but fd is not open for reading
// Or MAP_SHARED was requested and PROT_WRITE is set, but fd is not open in read/write (O_RDWR) mode.
// Or PROT_WRITE is set, but the file is append-only.
if (fstat(fd, &st) == 0 && !S_ISREG(st.st_mode)) {
StringBufferAppend(stringBuffer, "File descriptor %d refers to non-regular file", fd);
return;
}
// check if MAP_PRIVATE was requested, but fd is not open for reading.
#ifdef MAP_PRIVATE
if (flags & MAP_PRIVATE) {
openMode = fcntl(fd, F_GETFD, 0);
if (openMode >= 0 && (openMode & O_ACCMODE) == O_WRONLY) {
StringBufferAppend(
stringBuffer, "MAP_PRIVATE was requested, but the file descriptor is not open for reading");
return;
}
}
#endif
// Check if MAP_SHARED was requested and PROT_WRITE is set, but fd is not open in read/write (O_RDWR) mode.
// Or just PROT_WRITE is set, but the file is append-only.
#ifdef PROT_WRITE
if (prot & PROT_WRITE) {
openMode = fcntl(fd, F_GETFD, 0);
if (openMode >= 0) {
#ifdef MAP_SHARED
if (flags & MAP_SHARED) {
if ((openMode & O_ACCMODE) != O_RDWR) {
StringBufferAppend(
stringBuffer, "The file descriptor is not open for both reading and writing");
return;
}
}
#endif
// check if PROT_WRITE is set, but the file is append-only
if (openMode & O_APPEND) {
StringBufferAppend(stringBuffer, "The file descriptor is open for append");
return;
}
}
}
#endif
StringBufferAppend(stringBuffer, "Unknown access error");
}
static void EInvalToString(StringBuffer* stringBuffer, void* address, size_t length, int prot, int flags, off_t offset)
{
if (length == 0) {
StringBufferAppend(stringBuffer, "The mapped length is zero");
return;
}
if ((flags & MAP_PRIVATE) && (flags & MAP_SHARED)) {
StringBufferAppend(stringBuffer, "You must specify exactly one of MAP_PRIVATE or MAP_SHARED");
return;
}
if (!(flags & MAP_PRIVATE) && !(flags & MAP_SHARED)) {
StringBufferAppend(stringBuffer, "You must specify exactly one of MAP_PRIVATE or MAP_SHARED");
return;
}
int pageSize = sysconf(_SC_PAGESIZE);
if (pageSize > 0) {
unsigned mask = (pageSize - 1);
if ((uintptr_t)address & mask) {
StringBufferAppend(stringBuffer, "Address %p is not aligned to page size %d", address, pageSize);
return;
}
if ((unsigned long)length & mask) {
StringBufferAppend(
stringBuffer, "Mapped length %zu is not aligned to page size %d", length, pageSize);
return;
}
if ((unsigned long)offset & mask) {
StringBufferAppend(stringBuffer, "Offset %zu is not aligned to page size %d", offset, pageSize);
return;
}
}
StringBufferAppend(stringBuffer, "Unknown invalid value error");
}
static void ETxtBsyToString(StringBuffer* stringBuffer, int flags, int fd)
{
#ifdef MAP_DENYWRITE
if (flags & MAP_DENYWRITE) {
int openMode = fcntl(fd, F_GETFD, 0);
if (openMode >= 0) {
if (((openMode & O_ACCMODE) == O_WRONLY) || ((openMode & O_ACCMODE) == O_RDWR)) {
StringBufferAppend(stringBuffer,
"The mapping flag MAP_DENYWRITE is incompatible with the open mode of the file descriptor");
return;
}
}
}
#endif
StringBufferAppend(stringBuffer, "Unknown text file busy error");
}
static void MotSysNumaMMapErrorToString(
StringBuffer* stringBuffer, int errorCode, void* address, size_t length, int prot, int flags, int fd, off_t offset)
{
struct stat st;
int pageSize = -1;
int openMode = -1;
switch (errorCode) {
case EACCES:
// Check if a file descriptor refers to a non-regular file
// Or MAP_PRIVATE was requested, but fd is not open for reading
// Or MAP_SHARED was requested and PROT_WRITE is set, but fd is not open in read/write (O_RDWR) mode.
// Or PROT_WRITE is set, but the file is append-only.
if (fstat(fd, &st) == 0 && !S_ISREG(st.st_mode)) {
StringBufferAppend(stringBuffer, "File descriptor %d refers to non-regular file", fd);
break;
}
// check if MAP_PRIVATE was requested, but fd is not open for reading.
#ifdef MAP_PRIVATE
if (flags & MAP_PRIVATE) {
openMode = fcntl(fd, F_GETFD, 0);
if (openMode >= 0 && (openMode & O_ACCMODE) == O_WRONLY) {
StringBufferAppend(
stringBuffer, "MAP_PRIVATE was requested, but the file descriptor is not open for reading");
break;
}
}
#endif
// Check if MAP_SHARED was requested and PROT_WRITE is set, but fd is not open in read/write (O_RDWR) mode.
// Or just PROT_WRITE is set, but the file is append-only.
#ifdef PROT_WRITE
if (prot & PROT_WRITE) {
openMode = fcntl(fd, F_GETFD, 0);
if (openMode >= 0) {
#ifdef MAP_SHARED
if (flags & MAP_SHARED) {
if ((openMode & O_ACCMODE) != O_RDWR) {
StringBufferAppend(
stringBuffer, "The file descriptor is not open for both reading and writing");
break;
}
}
#endif
// check if PROT_WRITE is set, but the file is append-only
if (openMode & O_APPEND) {
StringBufferAppend(stringBuffer, "The file descriptor is open for append");
break;
}
}
}
#endif
StringBufferAppend(stringBuffer, "Unknown access error");
EAccessToString(stringBuffer, prot, flags, fd);
break;
case EAGAIN:
@ -932,37 +989,7 @@ static void MotSysNumaMMapErrorToString(
break;
case EINVAL:
if (length == 0) {
StringBufferAppend(stringBuffer, "The mapped length is zero");
break;
}
if ((flags & MAP_PRIVATE) && (flags & MAP_SHARED)) {
StringBufferAppend(stringBuffer, "You must specify exactly one of MAP_PRIVATE or MAP_SHARED");
break;
}
if (!(flags & MAP_PRIVATE) && !(flags & MAP_SHARED)) {
StringBufferAppend(stringBuffer, "You must specify exactly one of MAP_PRIVATE or MAP_SHARED");
break;
}
pageSize = sysconf(_SC_PAGESIZE);
if (pageSize > 0) {
unsigned mask = (pageSize - 1);
if ((uintptr_t)address & mask) {
StringBufferAppend(stringBuffer, "Address %p is not aligned to page size %d", address, pageSize);
break;
}
if ((unsigned long)length & mask) {
StringBufferAppend(
stringBuffer, "Mapped length %zu is not aligned to page size %d", length, pageSize);
break;
}
if ((unsigned long)offset & mask) {
StringBufferAppend(stringBuffer, "Offset %zu is not aligned to page size %d", offset, pageSize);
break;
}
}
StringBufferAppend(stringBuffer, "Unknown invalid value error");
EInvalToString(stringBuffer, address, length, prot, flags, offset);
break;
case ENFILE:
@ -1000,19 +1027,7 @@ static void MotSysNumaMMapErrorToString(
break;
case ETXTBSY:
#ifdef MAP_DENYWRITE
if (flags & MAP_DENYWRITE) {
openMode = fcntl(fd, F_GETFD, 0);
if (openMode >= 0) {
if (((openMode & O_ACCMODE) == O_WRONLY) || ((openMode & O_ACCMODE) == O_RDWR)) {
StringBufferAppend(stringBuffer,
"The mapping flag MAP_DENYWRITE is incompatible with the open mode of the file descriptor");
return;
}
}
}
#endif
StringBufferAppend(stringBuffer, "Unknown text file busy error");
ETxtBsyToString(stringBuffer, flags, fd);
break;
default:

View File

@ -131,30 +131,27 @@ int CheckpointWorkerPool::Checkpoint(Buffer* buffer, Sentinel* sentinel, int fd,
int wrote = 0;
isDeleted = false;
if (mainRow != nullptr) {
bool headerLocked = sentinel->TryLock(threadId);
if (headerLocked == false) {
if (mainRow->GetTwoPhaseMode() == true) {
MOT_LOG_DEBUG("checkpoint: row %p is 2pc", mainRow);
return wrote;
}
sentinel->Lock(threadId);
}
stableRow = sentinel->GetStable();
if (mainRow->IsRowDeleted()) {
if (stableRow) {
// Truly deleted and was not removed by txn manager
isDeleted = true;
} else {
MOT_LOG_DEBUG("Detected Deleted row without Stable Row!");
}
}
} else {
if (mainRow == nullptr) {
return 0;
}
if (unlikely(stableRow == nullptr)) {
stableRow = sentinel->GetStable();
bool headerLocked = sentinel->TryLock(threadId);
if (headerLocked == false) {
if (mainRow->GetTwoPhaseMode() == true) {
MOT_LOG_DEBUG("checkpoint: row %p is 2pc", mainRow);
return wrote;
}
sentinel->Lock(threadId);
}
stableRow = sentinel->GetStable();
if (mainRow->IsRowDeleted()) {
if (stableRow) {
// Truly deleted and was not removed by txn manager
isDeleted = true;
} else {
MOT_LOG_DEBUG("Detected Deleted row without Stable Row!");
}
}
bool statusBit = sentinel->GetStableStatus();
@ -166,29 +163,24 @@ int CheckpointWorkerPool::Checkpoint(Buffer* buffer, Sentinel* sentinel, int fd,
if (statusBit == !m_na) { /* has stable version */
if (stableRow == nullptr) {
break;
} else {
if (!Write(buffer, stableRow, fd)) {
wrote = -1;
} else {
if (isDeleted == false) {
CheckpointUtils::DestroyStableRow(stableRow);
sentinel->SetStable(nullptr);
}
wrote = 1;
}
break;
}
if (!Write(buffer, stableRow, fd)) {
wrote = -1;
} else {
if (isDeleted == false) {
CheckpointUtils::DestroyStableRow(stableRow);
sentinel->SetStable(nullptr);
}
wrote = 1;
}
break;
} else { /* no stable version */
if (stableRow == nullptr) {
if (deleted) {
wrote = 0;
break;
}
if (mainRow == nullptr) {
MOT_LOG_ERROR("CheckpointWorkerPool::checkpoint - null main row!");
wrote = 0;
break;
}
sentinel->SetStableStatus(!m_na);
if (!Write(buffer, mainRow, fd)) {
wrote = -1; // we failed to write, set error
@ -196,10 +188,11 @@ int CheckpointWorkerPool::Checkpoint(Buffer* buffer, Sentinel* sentinel, int fd,
wrote = 1;
}
break;
} else { /* should not happen! */
wrote = -1;
m_cpManager.OnError(ErrCodes::CALC, "Calc logic error - stable row");
}
/* should not happen! */
wrote = -1;
m_cpManager.OnError(ErrCodes::CALC, "Calc logic error - stable row");
}
} while (0);
@ -208,9 +201,7 @@ int CheckpointWorkerPool::Checkpoint(Buffer* buffer, Sentinel* sentinel, int fd,
sentinel->SetStable(nullptr);
}
if (mainRow != nullptr) {
sentinel->Release();
}
sentinel->Release();
return wrote;
}
@ -464,6 +455,17 @@ CheckpointWorkerPool::ErrCodes CheckpointWorkerPool::WriteTableMetadataFile(Tabl
return ErrCodes::SUCCESS;
}
bool CheckpointWorkerPool::FlushBuffer(int fd, Buffer* buffer)
{
if (buffer->Size() > 0) { // there is data in the buffer that needs to be written
if (CheckpointUtils::WriteFile(fd, (char*)buffer->Data(), buffer->Size()) != buffer->Size()) {
return false;
}
buffer->Reset();
}
return true;
}
CheckpointWorkerPool::ErrCodes CheckpointWorkerPool::WriteTableDataFile(Table* table, Buffer* buffer,
Sentinel** deletedList, GcManager* gcSession, uint16_t threadId, uint32_t& maxSegId, uint64_t& numOps)
{
@ -515,16 +517,15 @@ CheckpointWorkerPool::ErrCodes CheckpointWorkerPool::WriteTableDataFile(Table* t
currFileOps++;
curSegLen += table->GetTupleSize() + sizeof(CheckpointUtils::EntryHeader);
if (m_checkpointSegsize > 0 && curSegLen >= m_checkpointSegsize) {
if (buffer->Size() > 0) { // there is data in the buffer that needs to be written
if (CheckpointUtils::WriteFile(fd, (char*)buffer->Data(), buffer->Size()) != buffer->Size()) {
MOT_LOG_ERROR(
"CheckpointWorkerPool::WriteTableDataFile: failed to write data file %u for table %u",
maxSegId,
tableId);
errCode = ErrCodes::FILE_IO;
break;
}
buffer->Reset();
if (!FlushBuffer(fd, buffer)) {
MOT_LOG_ERROR(
"CheckpointWorkerPool::WriteTableDataFile: failed to write remaining buffer data (%u bytes) to "
"data file %u for table %u",
buffer->Size(),
maxSegId,
tableId);
errCode = ErrCodes::FILE_IO;
break;
}
/* FinishFile will reset the fd to -1 on success. */
@ -575,16 +576,15 @@ CheckpointWorkerPool::ErrCodes CheckpointWorkerPool::WriteTableDataFile(Table* t
return errCode;
}
if (buffer->Size() > 0) { // there is data in the buffer that needs to be written
if (CheckpointUtils::WriteFile(fd, (char*)buffer->Data(), buffer->Size()) != buffer->Size()) {
MOT_LOG_ERROR(
"CheckpointWorkerPool::WriteTableDataFile: failed to write (remaining) data file %u for table %u",
maxSegId,
tableId);
(void)CheckpointUtils::CloseFile(fd);
return ErrCodes::FILE_IO;
}
buffer->Reset();
if (!FlushBuffer(fd, buffer)) {
MOT_LOG_ERROR(
"CheckpointWorkerPool::WriteTableDataFile: failed to write remaining buffer data (%u bytes) to "
"data file %u for table %u",
buffer->Size(),
maxSegId,
tableId);
(void)CheckpointUtils::CloseFile(fd);
return ErrCodes::FILE_IO;
}
/* FinishFile will reset the fd to -1 on success. */

View File

@ -174,6 +174,8 @@ private:
ErrCodes WriteTableDataFile(Table* table, Buffer* buffer, Sentinel** deletedList, GcManager* gcSession,
uint16_t threadId, uint32_t& maxSegId, uint64_t& numOps);
bool FlushBuffer(int fd, Buffer* buffer);
// Workers
void* m_workers;

View File

@ -56,6 +56,71 @@ bool SessionManager::Initialize(uint32_t nodeCount, uint32_t threadCount)
void SessionManager::Destroy()
{}
void SessionManager::CleanupFailedSessionContext(
ConnectionId connectionId, SessionId sessionId, SessionInitPhase initPhase, SessionContext*& sessionContext)
{
MOT_ASSERT(initPhase != SESSION_INIT_DONE);
MOT_LOG_ERROR("Failed to create session object for thread id %" PRIu16, MOTCurrThreadId);
switch (initPhase) {
case RESERVE_THREAD_SLOT_PHASE:
StatisticsManager::GetInstance().UnreserveThreadSlot();
// fall through
case INIT_SESSION_PHASE:
// Remove from link list
sessionContext->GetTxnManager()->GcRemoveSession();
// remove from session map
if (!m_sessionContextMap.remove(MOT_GET_CURRENT_SESSION_ID())) {
MOT_LOG_WARN("Failed to remove session %u from global session map - not found",
MOT_GET_CURRENT_SESSION_ID());
}
// fall through
case ALLOC_SESSION_BUFFER_PHASE:
sessionContext->~SessionContext();
#ifdef MEM_SESSION_ACTIVE
MemSessionFree(sessionContext);
#else
free(sessionContext);
#endif
sessionContext = nullptr;
// fall through
case RESERVE_SESSION_MEMORY_PHASE:
#ifdef MEM_SESSION_ACTIVE
MemSessionUnreserve();
#endif
// fall through
case SETUP_MASSTREE_INFO_PHASE:
DestroyMasstreeThreadinfo();
// fall through
// we keep thread-level attribute of NUMA node identifier initialized, since on thread-pooled envelope
// other sessions might still use this worker thread
case SETUP_NODE_ID_PHASE:
// fall through
case ALLOC_CONNECTION_ID_PHASE:
FreeConnectionId(connectionId);
MOT_ASSERT(connectionId == MOT_GET_CURRENT_CONNECTION_ID());
MOT_ASSERT(sessionId == MOT_GET_CURRENT_SESSION_ID());
MOT_SET_CURRENT_CONNECTION_ID(INVALID_CONNECTION_ID);
MOT_SET_CURRENT_SESSION_ID(INVALID_SESSION_ID);
// fall through
// we keep thread-level attribute of thread identifier initialized, since on thread-pooled envelope
// other sessions might still use this worker thread
case ALLOC_THREAD_ID_PHASE:
// fall through
case SESSION_INIT_START:
default:
break;
}
}
SessionContext* SessionManager::CreateSessionContext(bool isLightSession /* = false */,
uint64_t reserveMemoryKb /* = 0 */, void* userData /* = nullptr */,
ConnectionId connectionId /* = INVALID_CONNECTION_ID */)
@ -71,19 +136,7 @@ SessionContext* SessionManager::CreateSessionContext(bool isLightSession /* = fa
}
// session initialization is complex, so let's divide it into phases, and cleanup once at the end if required
enum SessionInitPhase {
SESSION_INIT_START,
ALLOC_THREAD_ID_PHASE,
ALLOC_CONNECTION_ID_PHASE,
ALLOC_SESSION_ID_PHASE,
SETUP_NODE_ID_PHASE,
SETUP_MASSTREE_INFO_PHASE,
RESERVE_SESSION_MEMORY_PHASE,
ALLOC_SESSION_BUFFER_PHASE,
INIT_SESSION_PHASE,
RESERVE_THREAD_SLOT_PHASE,
SESSION_INIT_DONE
} initPhase = SESSION_INIT_START;
SessionInitPhase initPhase = SESSION_INIT_START;
SessionId sessionId = INVALID_SESSION_ID;
do { // instead of goto
@ -224,65 +277,8 @@ SessionContext* SessionManager::CreateSessionContext(bool isLightSession /* = fa
// cleanup if initialization failed
if (initPhase != SESSION_INIT_DONE) {
MOT_LOG_ERROR("Failed to create session object for thread id %" PRIu16, MOTCurrThreadId);
switch (initPhase) {
case RESERVE_THREAD_SLOT_PHASE:
StatisticsManager::GetInstance().UnreserveThreadSlot();
// fall through
case INIT_SESSION_PHASE:
// Remove from link list
sessionContext->GetTxnManager()->GcRemoveSession();
// remove from session map
if (!m_sessionContextMap.remove(MOT_GET_CURRENT_SESSION_ID())) {
MOT_LOG_WARN("Failed to remove session %u from global session map - not found",
MOT_GET_CURRENT_SESSION_ID());
}
// fall through
case ALLOC_SESSION_BUFFER_PHASE:
sessionContext->~SessionContext();
#ifdef MEM_SESSION_ACTIVE
MemSessionFree(sessionContext);
#else
free(sessionContext);
#endif
sessionContext = nullptr;
// fall through
case RESERVE_SESSION_MEMORY_PHASE:
#ifdef MEM_SESSION_ACTIVE
MemSessionUnreserve();
#endif
// fall through
case SETUP_MASSTREE_INFO_PHASE:
DestroyMasstreeThreadinfo();
// fall through
// we keep thread-level attribute of NUMA node identifier initialized, since on thread-pooled envelope
// other sessions might still use this worker thread
case SETUP_NODE_ID_PHASE:
// fall through
case ALLOC_CONNECTION_ID_PHASE:
FreeConnectionId(connectionId);
MOT_ASSERT(connectionId == MOT_GET_CURRENT_CONNECTION_ID());
MOT_ASSERT(sessionId == MOT_GET_CURRENT_SESSION_ID());
MOT_SET_CURRENT_CONNECTION_ID(INVALID_CONNECTION_ID);
MOT_SET_CURRENT_SESSION_ID(INVALID_SESSION_ID);
// fall through
// we keep thread-level attribute of thread identifier initialized, since on thread-pooled envelope
// other sessions might still use this worker thread
case ALLOC_THREAD_ID_PHASE:
// fall through
case SESSION_INIT_START:
default:
break;
}
// sessionContext will be freed and set to nullptr in CleanupFailedSessionContext
CleanupFailedSessionContext(connectionId, sessionId, initPhase, sessionContext);
}
return sessionContext;

View File

@ -108,6 +108,31 @@ public:
void ReportActiveSessions();
private:
// session initialization phases
enum SessionInitPhase {
SESSION_INIT_START,
ALLOC_THREAD_ID_PHASE,
ALLOC_CONNECTION_ID_PHASE,
ALLOC_SESSION_ID_PHASE,
SETUP_NODE_ID_PHASE,
SETUP_MASSTREE_INFO_PHASE,
RESERVE_SESSION_MEMORY_PHASE,
ALLOC_SESSION_BUFFER_PHASE,
INIT_SESSION_PHASE,
RESERVE_THREAD_SLOT_PHASE,
SESSION_INIT_DONE
};
/**
* @brief Helper to clean up if session initialization is failed.
* @param connectionId Connection ID.
* @param sessionId Session ID.
* @param initPhase Initialization phase.
* @param[in,out] sessionContext Session context, freed and set to nullptr if it was allocated.
*/
void CleanupFailedSessionContext(
ConnectionId connectionId, SessionId sessionId, SessionInitPhase initPhase, SessionContext*& sessionContext);
/** @typedef session map */
typedef ConcurrentMap<SessionId, SessionContext*> SessionContextMap;

View File

@ -1357,6 +1357,64 @@ uint64_t MOTConfiguration::ParseMemoryUnit(const char* memoryValue, uint64_t def
return memoryValueBytes;
}
static inline bool IsTimeUnitDays(mot_string& suffix)
{
if ((suffix.compare_no_case("d") == 0) || (suffix.compare_no_case("days") == 0) ||
(suffix.compare_no_case("day") == 0)) {
return true;
}
return false;
}
static inline bool IsTimeUnitHours(mot_string& suffix)
{
if ((suffix.compare_no_case("h") == 0) || (suffix.compare_no_case("hours") == 0) ||
(suffix.compare_no_case("hour") == 0)) {
return true;
}
return false;
}
static inline bool IsTimeUnitMinutes(mot_string& suffix)
{
if ((suffix.compare_no_case("m") == 0) || (suffix.compare_no_case("mins") == 0) ||
(suffix.compare_no_case("minutes") == 0) || (suffix.compare_no_case("min") == 0) ||
(suffix.compare_no_case("minute") == 0)) {
return true;
}
return false;
}
static inline bool IsTimeUnitSeconds(mot_string& suffix)
{
if ((suffix.compare_no_case("s") == 0) || (suffix.compare_no_case("secs") == 0) ||
(suffix.compare_no_case("seconds") == 0) || (suffix.compare_no_case("sec") == 0) ||
(suffix.compare_no_case("second") == 0)) {
return true;
}
return false;
}
static inline bool IsTimeUnitMilliSeconds(mot_string& suffix)
{
if ((suffix.compare_no_case("ms") == 0) || (suffix.compare_no_case("millis") == 0) ||
(suffix.compare_no_case("milliseconds") == 0) || (suffix.compare_no_case("milli") == 0) ||
(suffix.compare_no_case("millisecond") == 0)) {
return true;
}
return false;
}
static inline bool IsTimeUnitMicroSeconds(mot_string& suffix)
{
if ((suffix.compare_no_case("us") == 0) || (suffix.compare_no_case("micros") == 0) ||
(suffix.compare_no_case("microseconds") == 0) || (suffix.compare_no_case("micro") == 0) ||
(suffix.compare_no_case("microsecond") == 0)) {
return true;
}
return false;
}
uint64_t MOTConfiguration::ParseTimeValueMicros(const char* timeValue, uint64_t defaultValue, const char* cfgPath)
{
uint64_t timeValueMicors = defaultValue;
@ -1367,35 +1425,25 @@ uint64_t MOTConfiguration::ParseTimeValueMicros(const char* timeValue, uint64_t
} else if (*endptr == 0) {
MOT_LOG_WARN("Invalid %s time value format: %s (expecting unit type after value)", cfgPath, timeValue);
} else {
// get unit type and convert to mega-bytes
// get unit type and convert to milli-seconds
mot_string suffix(endptr);
suffix.trim();
if ((suffix.compare_no_case("d") == 0) || (suffix.compare_no_case("days") == 0) ||
(suffix.compare_no_case("day") == 0)) {
if (IsTimeUnitDays(suffix)) {
MOT_LOG_TRACE("Loaded %s: %u days", cfgPath, value);
timeValueMicors = ((uint64_t)value) * 24ull * 60ull * 60ull * 1000ull * 1000ull;
} else if ((suffix.compare_no_case("h") == 0) || (suffix.compare_no_case("hours") == 0) ||
(suffix.compare_no_case("hour") == 0)) {
} else if (IsTimeUnitHours(suffix)) {
MOT_LOG_TRACE("Loaded %s: %u hours", cfgPath, value);
timeValueMicors = ((uint64_t)value) * 60ull * 60ull * 1000ull * 1000ull;
} else if ((suffix.compare_no_case("m") == 0) || (suffix.compare_no_case("mins") == 0) ||
(suffix.compare_no_case("minutes") == 0) || (suffix.compare_no_case("min") == 0) ||
(suffix.compare_no_case("minute") == 0)) {
} else if (IsTimeUnitMinutes(suffix)) {
MOT_LOG_TRACE("Loaded %s: %u minutes", cfgPath, value);
timeValueMicors = ((uint64_t)value) * 60ull * 1000ull * 1000ull;
} else if ((suffix.compare_no_case("s") == 0) || (suffix.compare_no_case("secs") == 0) ||
(suffix.compare_no_case("seconds") == 0) || (suffix.compare_no_case("sec") == 0) ||
(suffix.compare_no_case("second") == 0)) {
} else if (IsTimeUnitSeconds(suffix)) {
MOT_LOG_TRACE("Loaded %s: %u seconds", cfgPath, value);
timeValueMicors = ((uint64_t)value) * 1000ull * 1000ull;
} else if ((suffix.compare_no_case("ms") == 0) || (suffix.compare_no_case("millis") == 0) ||
(suffix.compare_no_case("milliseconds") == 0) || (suffix.compare_no_case("milli") == 0) ||
(suffix.compare_no_case("millisecond") == 0)) {
} else if (IsTimeUnitMilliSeconds(suffix)) {
MOT_LOG_TRACE("Loaded %s: %u milli-seconds", cfgPath, value);
timeValueMicors = ((uint64_t)value) * 1000ull;
} else if ((suffix.compare_no_case("us") == 0) || (suffix.compare_no_case("micros") == 0) ||
(suffix.compare_no_case("microseconds") == 0) || (suffix.compare_no_case("micro") == 0) ||
(suffix.compare_no_case("microsecond") == 0)) {
} else if (IsTimeUnitMicroSeconds(suffix)) {
MOT_LOG_TRACE("Loaded %s: %u micro-seconds", cfgPath, value);
timeValueMicors = ((uint64_t)value);
} else {

View File

@ -49,14 +49,14 @@ bool CheckpointRecovery::Recover()
if (CheckpointControlFile::GetCtrlFile()->GetId() == CheckpointControlFile::invalidId) {
m_checkpointId = CheckpointControlFile::invalidId; // no mot control was found.
} else {
if (IsCheckpointValid(CheckpointControlFile::GetCtrlFile()->GetId())) {
m_checkpointId = CheckpointControlFile::GetCtrlFile()->GetId();
m_lsn = CheckpointControlFile::GetCtrlFile()->GetLsn();
m_lastReplayLsn = CheckpointControlFile::GetCtrlFile()->GetLastReplayLsn();
} else {
MOT_LOG_ERROR("CheckpointRecovery:: no valid checkpoint exist");
if (!IsCheckpointValid(CheckpointControlFile::GetCtrlFile()->GetId())) {
MOT_LOG_ERROR("CheckpointRecovery: no valid checkpoint exist");
return false;
}
m_checkpointId = CheckpointControlFile::GetCtrlFile()->GetId();
m_lsn = CheckpointControlFile::GetCtrlFile()->GetLsn();
m_lastReplayLsn = CheckpointControlFile::GetCtrlFile()->GetLastReplayLsn();
}
if (m_checkpointId != CheckpointControlFile::invalidId) {
@ -93,32 +93,9 @@ bool CheckpointRecovery::Recover()
return false;
}
MOT_LOG_INFO("CheckpointRecovery: starting to recover %lu tables from checkpoint id: %lu",
m_tableIds.size(),
m_checkpointId);
for (auto it = m_tableIds.begin(); it != m_tableIds.end(); ++it) {
if (!RecoverTableMetadata(*it)) {
MOT_LOG_ERROR("CheckpointRecovery: recovery of table %lu's metadata failed", *it);
return false;
}
}
std::vector<std::thread> threadPool;
for (uint32_t i = 0; i < m_numWorkers; ++i) {
threadPool.push_back(std::thread(CheckpointRecoveryWorker, this));
}
MOT_LOG_DEBUG("CheckpointRecovery:: waiting for all tasks to finish");
while (HaveTasks() && m_stopWorkers == false) {
sleep(1);
}
MOT_LOG_DEBUG("CheckpointRecovery: tasks finished (%s)", m_errorSet ? "error" : "ok");
for (auto& worker : threadPool) {
if (worker.joinable()) {
worker.join();
}
if (!PerformRecovery()) {
MOT_LOG_ERROR("CheckpointRecovery: perform checkpoint recovery failed");
return false;
}
if (m_errorSet) {
@ -147,6 +124,39 @@ bool CheckpointRecovery::Recover()
return true;
}
bool CheckpointRecovery::PerformRecovery()
{
MOT_LOG_INFO("CheckpointRecovery: starting to recover %lu tables from checkpoint id: %lu",
m_tableIds.size(),
m_checkpointId);
for (auto it = m_tableIds.begin(); it != m_tableIds.end(); ++it) {
if (!RecoverTableMetadata(*it)) {
MOT_LOG_ERROR("CheckpointRecovery: failed to recover table metadata for table %u", *it);
return false;
}
}
std::vector<std::thread> threadPool;
for (uint32_t i = 0; i < m_numWorkers; ++i) {
threadPool.push_back(std::thread(CheckpointRecoveryWorker, this));
}
MOT_LOG_DEBUG("CheckpointRecovery: waiting for all tasks to finish");
while (HaveTasks() && m_stopWorkers == false) {
sleep(1);
}
MOT_LOG_DEBUG("CheckpointRecovery: tasks finished (%s)", m_errorSet ? "error" : "ok");
for (auto& worker : threadPool) {
if (worker.joinable()) {
worker.join();
}
}
return true;
}
int CheckpointRecovery::FillTasksFromMapFile()
{
if (m_checkpointId == CheckpointControlFile::invalidId) {

View File

@ -133,6 +133,8 @@ private:
*/
uint32_t HaveTasks();
bool PerformRecovery();
/**
* @brief Recovers the in process two phase commit related transactions
* from the checkpoint data file.

View File

@ -841,6 +841,33 @@ void TxnInsertAction::ReportError(RC rc, InsItem* currentItem)
}
}
void TxnInsertAction::CleanupOptimisticInsert(
InsItem* currentItem, Sentinel* pIndexInsertResult, bool isInserted, bool isMappedToCache)
{
// Clean current aborted row and clean secondary indexes that were not inserts
// Clean first Object! - wither primary or secondary!
// Return Local Row to pull for PI
Table* table = currentItem->m_row->GetTable();
if (currentItem->getIndexOrder() == IndexOrder::INDEX_ORDER_PRIMARY) {
table->DestroyRow(currentItem->m_row);
}
if (isInserted == true) {
if (isMappedToCache == false) {
RC rc = pIndexInsertResult->RefCountUpdate(DEC, m_manager->GetThdId());
MOT::Index* index_ = pIndexInsertResult->GetIndex();
if (rc == RC::RC_INDEX_DELETE) {
// Memory reclamation need to release the key from the primary sentinel back to the pool
MOT_ASSERT(pIndexInsertResult->GetCounter() == 0);
Sentinel* outputSen = index_->IndexRemove(currentItem->m_key, m_manager->GetThdId());
MOT_ASSERT(outputSen != nullptr);
m_manager->GcSessionRecordRcu(
index_->GetIndexId(), outputSen, nullptr, Index::SentinelDtor, SENTINEL_SIZE(index_));
m_manager->m_accessMgr->IncreaseTableStat(table);
}
}
}
}
RC TxnInsertAction::ExecuteOptimisticInsert(Row* row)
{
Sentinel* pIndexInsertResult = nullptr;
@ -926,28 +953,7 @@ RC TxnInsertAction::ExecuteOptimisticInsert(Row* row)
end:
if ((rc != RC_OK) && (currentItem != EndCursor())) {
// Clean current aborted row and clean secondary indexes that were not inserts
// Clean first Object! - wither primary or secondary!
// Return Local Row to pull for PI
Table* table = currentItem->m_row->GetTable();
if (currentItem->getIndexOrder() == IndexOrder::INDEX_ORDER_PRIMARY) {
table->DestroyRow(currentItem->m_row);
}
if (isInserted == true) {
if (isMappedToCache == false) {
RC rc = pIndexInsertResult->RefCountUpdate(DEC, m_manager->GetThdId());
MOT::Index* index_ = pIndexInsertResult->GetIndex();
if (rc == RC::RC_INDEX_DELETE) {
// Memory reclamation need to release the key from the primary sentinel back to the pool
MOT_ASSERT(pIndexInsertResult->GetCounter() == 0);
Sentinel* outputSen = index_->IndexRemove(currentItem->m_key, m_manager->GetThdId());
MOT_ASSERT(outputSen != nullptr);
m_manager->GcSessionRecordRcu(
index_->GetIndexId(), outputSen, nullptr, Index::SentinelDtor, SENTINEL_SIZE(index_));
m_manager->m_accessMgr->IncreaseTableStat(table);
}
}
}
CleanupOptimisticInsert(currentItem, pIndexInsertResult, isInserted, isMappedToCache);
}
// Clean keys

View File

@ -180,6 +180,12 @@ private:
void ShrinkInsertSet();
/**
* @brief Cleans up the current aborted row.
*/
void CleanupOptimisticInsert(
InsItem* currentItem, Sentinel* pIndexInsertResult, bool isInserted, bool isMappedToCache);
// class non-copy-able, non-assignable, non-movable
/** @cond EXCLUDE_DOC */
TxnInsertAction(const TxnInsertAction&) = delete;

View File

@ -280,65 +280,70 @@ void RedoLog::WriteToLog()
}
}
RC RedoLog::SerializeTransaction()
RC RedoLog::SerializeDropIndex(TxnDDLAccess::DDLAccess* ddlAccess, bool hasDML, IdxDDLAccessMap& idxDDLMap)
{
RC status = RC_OK;
MOT::Index* index;
std::map<uint64_t, TxnDDLAccess::DDLAccess*> uixMap;
RC status = RC_ERROR;
MOT::Index* index = (MOT::Index*)ddlAccess->GetEntry();
if (!hasDML || !(!index->IsPrimaryKey() && index->IsUnique())) {
status = DropIndex(index);
} else {
IdxDDLAccessMap::iterator it = idxDDLMap.find(index->GetExtId());
if (it != idxDDLMap.end()) {
// we create and drop no need for both them
MOT_LOG_DEBUG("Erase create index: %s %lu", index->GetName().c_str(), index->GetExtId());
idxDDLMap.erase(it);
status = RC_OK;
} else {
idxDDLMap[index->GetExtId()] = ddlAccess;
status = DropIndex(index);
}
}
return status;
}
RC RedoLog::SerializeTransactionDDLs(IdxDDLAccessMap& idxDDLMap)
{
MOT::Index* index = nullptr;
bool hasDML = (m_txn->m_accessMgr->m_rowCnt > 0 && !m_txn->m_isLightSession);
TxnDDLAccess* transactionDDLAccess = m_txn->m_txnDdlAccess;
if (transactionDDLAccess != nullptr && transactionDDLAccess->Size() > 0) {
RC status = RC_ERROR;
for (uint16_t i = 0; i < transactionDDLAccess->Size(); i++) {
Table* truncatedTable = nullptr;
TxnDDLAccess::DDLAccess* DDLAccess = transactionDDLAccess->Get(i);
if (DDLAccess == nullptr) {
TxnDDLAccess::DDLAccess* ddlAccess = transactionDDLAccess->Get(i);
if (ddlAccess == nullptr) {
return RC_ERROR;
}
DDLAccessType accessType = DDLAccess->GetDDLAccessType();
DDLAccessType accessType = ddlAccess->GetDDLAccessType();
switch (accessType) {
case DDL_ACCESS_CREATE_TABLE:
status = CreateTable((Table*)DDLAccess->GetEntry());
status = CreateTable((Table*)ddlAccess->GetEntry());
break;
case DDL_ACCESS_DROP_TABLE:
status = DropTable((Table*)DDLAccess->GetEntry());
status = DropTable((Table*)ddlAccess->GetEntry());
break;
case DDL_ACCESS_CREATE_INDEX:
index = (MOT::Index*)DDLAccess->GetEntry();
index = (MOT::Index*)ddlAccess->GetEntry();
if (!hasDML || !(!index->IsPrimaryKey() && index->IsUnique())) {
status = CreateIndex(index);
} else {
// in case of unique secondary skip and send it after DML
MOT_LOG_DEBUG("Defer create index: %s %lu", index->GetName().c_str(), index->GetExtId());
uixMap[index->GetExtId()] = DDLAccess;
idxDDLMap[index->GetExtId()] = ddlAccess;
status = RC_OK;
}
break;
case DDL_ACCESS_DROP_INDEX:
index = (MOT::Index*)DDLAccess->GetEntry();
if (!hasDML || !(!index->IsPrimaryKey() && index->IsUnique())) {
status = DropIndex(index);
} else {
std::map<uint64_t, TxnDDLAccess::DDLAccess*>::iterator it = uixMap.find(index->GetExtId());
if (it != uixMap.end()) {
// we create and drop no need for both them
MOT_LOG_DEBUG("Erase create index: %s %lu", index->GetName().c_str(), index->GetExtId());
uixMap.erase(it);
status = RC_OK;
} else {
uixMap[index->GetExtId()] = DDLAccess;
status = DropIndex(index);
}
}
status = SerializeDropIndex(ddlAccess, hasDML, idxDDLMap);
break;
case DDL_ACCESS_TRUNCATE_TABLE:
// in case of truncate table the DDLAccess entry holds the
// the old indexes. We need to serialize the tableId. In this
// case we take it from the ddl access Oid.
truncatedTable = GetTableManager()->GetTableByExternal(DDLAccess->GetOid());
truncatedTable = GetTableManager()->GetTableByExternal(ddlAccess->GetOid());
if (truncatedTable == nullptr) {
// This should not happen. Truncate table is protected
// by lock. While doing truncate table, the table cannot
@ -358,11 +363,12 @@ RC RedoLog::SerializeTransaction()
}
}
if (m_txn->m_isLightSession) {
MOT_LOG_DEBUG("Serialize DDL light session finished");
return RC_OK;
}
return RC_OK;
}
RC RedoLog::SerializeTransactionDMLs()
{
RC status = RC_OK;
for (uint32_t index = 0; index < m_txn->m_accessMgr->m_rowCnt; index++) {
Access* access = m_txn->m_accessMgr->GetAccessPtr(index);
if (access != nullptr) {
@ -398,16 +404,40 @@ RC RedoLog::SerializeTransaction()
}
}
return RC_OK;
}
RC RedoLog::SerializeTransaction()
{
IdxDDLAccessMap idxDDLMap;
RC status = SerializeTransactionDDLs(idxDDLMap);
if (status != RC_OK) {
MOT_LOG_ERROR("Failed to serialize DDLs: %d", status);
return status;
}
if (m_txn->m_isLightSession) {
MOT_LOG_DEBUG("Serialize DDL light session finished");
return RC_OK;
}
status = SerializeTransactionDMLs();
if (status != RC_OK) {
MOT_LOG_ERROR("Failed to serialize DMLs: %d", status);
return status;
}
// create operations for unique indexes
std::map<uint64_t, TxnDDLAccess::DDLAccess*>::iterator it = uixMap.begin();
while (it != uixMap.end()) {
IdxDDLAccessMap::iterator it = idxDDLMap.begin();
while (it != idxDDLMap.end()) {
DDLAccessType accessType = it->second->GetDDLAccessType();
switch (accessType) {
case DDL_ACCESS_CREATE_INDEX:
index = (MOT::Index*)it->second->GetEntry();
case DDL_ACCESS_CREATE_INDEX: {
MOT::Index* index = (MOT::Index*)it->second->GetEntry();
MOT_LOG_DEBUG("Send create index: %s %lu", index->GetName().c_str(), index->GetExtId());
status = CreateIndex(index);
break;
}
default:
break;
}
@ -418,7 +448,7 @@ RC RedoLog::SerializeTransaction()
it++;
}
uixMap.clear();
idxDDLMap.clear();
return RC_OK;
}

View File

@ -27,6 +27,7 @@
#include "redo_log_buffer.h"
#include "bitmapset.h"
#include "txn_ddl_access.h"
namespace MOT {
class MOTConfiguration;
@ -182,6 +183,9 @@ public:
}
private:
/* Map of Index ID to DDLAccess. */
typedef std::map<uint64_t, TxnDDLAccess::DDLAccess*> IdxDDLAccessMap;
/**
* @brief When the buffer is full, flush its contents to the log
*/
@ -192,6 +196,21 @@ private:
*/
void ResetBuffer();
/**
* @brief Writes the whole transaction's DDLs into the redo buffer
* @param[out] idxDDLMap Map of Index ID to DDLAccess.
* @return The status of the operation.
*/
RC SerializeTransactionDDLs(IdxDDLAccessMap& idxDDLMap);
RC SerializeDropIndex(TxnDDLAccess::DDLAccess* ddlAccess, bool hasDML, IdxDDLAccessMap& idxDDLMap);
/**
* @brief Writes the whole transaction's DMLs into the redo buffer
* @return The status of the operation.
*/
RC SerializeTransactionDMLs();
/* Member variables */
RedoLogHandler* m_redoLogHandler;
RedoLogBuffer* m_redoBuffer;

View File

@ -965,6 +965,51 @@ static void MOTBeginForeignModify(
festate->m_currTxn->SetTxnIsoLevel(u_sess->utils_cxt.XactIsoLevel);
}
static TupleTableSlot* IterateForeignScanStopAtFirst(
ForeignScanState* node, MOTFdwStateSt* festate, TupleTableSlot* slot)
{
MOT::RC rc = MOT::RC_OK;
ForeignScan* fscan = (ForeignScan*)node->ss.ps.plan;
festate->m_execExprs = (List*)ExecInitExpr((Expr*)fscan->fdw_exprs, (PlanState*)node);
festate->m_econtext = node->ss.ps.ps_ExprContext;
MOTAdaptor::CreateKeyBuffer(node->ss.ss_currentRelation, festate, 0);
MOT::Sentinel* Sentinel =
festate->m_bestIx->m_ix->IndexReadSentinel(&festate->m_stateKey[0], festate->m_currTxn->GetThdId());
MOT::Row* currRow = festate->m_currTxn->RowLookup(festate->m_internalCmdOper, Sentinel, rc);
if (currRow != NULL) {
MOTAdaptor::UnpackRow(
slot, festate->m_table, festate->m_attrsUsed, const_cast<uint8_t*>(currRow->GetData()));
node->ss.is_scan_end = true;
fscan->scan.scan_qual_optimized = true;
ExecStoreVirtualTuple(slot);
if (festate->m_ctidNum > 0) {
HeapTuple resultTup = ExecFetchSlotTuple(slot);
MOTRecConvertSt cv;
cv.m_u.m_ptr = (uint64_t)currRow->GetPrimarySentinel();
resultTup->t_self = cv.m_u.m_self;
HeapTupleSetXmin(resultTup, InvalidTransactionId);
HeapTupleSetXmax(resultTup, InvalidTransactionId);
HeapTupleHeaderSetCmin(resultTup->t_data, InvalidTransactionId);
}
festate->m_rowsFound++;
return slot;
}
if (rc != MOT::RC_OK) {
if (MOT_IS_SEVERE()) {
MOT_REPORT_ERROR(MOT_ERROR_INTERNAL, "MOTIterateForeignScan", "Failed to lookup row");
MOT_LOG_ERROR_STACK("Failed to lookup row");
}
CleanQueryStatesOnError(festate->m_currTxn);
report_pg_error(rc,
(void*)(festate->m_currTxn->m_errIx != nullptr ? festate->m_currTxn->m_errIx->GetName().c_str()
: "unknown"),
(void*)festate->m_currTxn->m_errMsgBuf);
return nullptr;
}
return nullptr;
}
/*
*
*/
@ -985,45 +1030,7 @@ static TupleTableSlot* MOTIterateForeignScan(ForeignScanState* node)
(void)ExecClearTuple(slot);
if (stopAtFirst) {
ForeignScan* fscan = (ForeignScan*)node->ss.ps.plan;
festate->m_execExprs = (List*)ExecInitExpr((Expr*)fscan->fdw_exprs, (PlanState*)node);
festate->m_econtext = node->ss.ps.ps_ExprContext;
MOTAdaptor::CreateKeyBuffer(node->ss.ss_currentRelation, festate, 0);
MOT::Sentinel* Sentinel =
festate->m_bestIx->m_ix->IndexReadSentinel(&festate->m_stateKey[0], festate->m_currTxn->GetThdId());
currRow = festate->m_currTxn->RowLookup(festate->m_internalCmdOper, Sentinel, rc);
if (currRow != NULL) {
MOTAdaptor::UnpackRow(
slot, festate->m_table, festate->m_attrsUsed, const_cast<uint8_t*>(currRow->GetData()));
node->ss.is_scan_end = true;
fscan->scan.scan_qual_optimized = true;
ExecStoreVirtualTuple(slot);
if (festate->m_ctidNum > 0) {
HeapTuple resultTup = ExecFetchSlotTuple(slot);
MOTRecConvertSt cv;
cv.m_u.m_ptr = (uint64_t)currRow->GetPrimarySentinel();
resultTup->t_self = cv.m_u.m_self;
HeapTupleSetXmin(resultTup, InvalidTransactionId);
HeapTupleSetXmax(resultTup, InvalidTransactionId);
HeapTupleHeaderSetCmin(resultTup->t_data, InvalidTransactionId);
}
festate->m_rowsFound++;
return slot;
}
if (rc != MOT::RC_OK) {
if (MOT_IS_SEVERE()) {
MOT_REPORT_ERROR(MOT_ERROR_INTERNAL, "MOTIterateForeignScan", "Failed to lookup row");
MOT_LOG_ERROR_STACK("Failed to lookup row");
}
CleanQueryStatesOnError(festate->m_currTxn);
report_pg_error(rc,
(void*)(festate->m_currTxn->m_errIx != nullptr ? festate->m_currTxn->m_errIx->GetName().c_str()
: "unknown"),
(void*)festate->m_currTxn->m_errMsgBuf);
return nullptr;
}
return nullptr;
return IterateForeignScanStopAtFirst(node, festate, slot);
}
if (!festate->m_cursorOpened) {

View File

@ -909,23 +909,12 @@ void MOTAdaptor::OpenCursor(Relation rel, MOTFdwStateSt* festate)
} while (0);
}
static MOT::RC TableFieldType(const ColumnDef* colDef, MOT::MOT_CATALOG_FIELD_TYPES& type, int16* typeLen, bool& isBlob)
static void VarLenFieldType(
Form_pg_type typeDesc, Oid typoid, int32_t colLen, int16* typeLen, bool& isBlob, MOT::RC& res)
{
MOT::RC res = MOT::RC_OK;
Oid typoid;
Type tup;
Form_pg_type typeDesc;
int32_t colLen;
if (colDef->typname->arrayBounds != nullptr)
return MOT::RC_UNSUPPORTED_COL_TYPE_ARR;
tup = typenameType(nullptr, colDef->typname, &colLen);
typeDesc = ((Form_pg_type)GETSTRUCT(tup));
typoid = HeapTupleGetOid(tup);
*typeLen = typeDesc->typlen;
if (*typeLen < 0) {
isBlob = false;
res = MOT::RC_OK;
if (typeDesc->typlen < 0) {
*typeLen = colLen;
switch (typeDesc->typstorage) {
case 'p':
@ -952,6 +941,27 @@ static MOT::RC TableFieldType(const ColumnDef* colDef, MOT::MOT_CATALOG_FIELD_TY
break;
}
}
}
static MOT::RC TableFieldType(const ColumnDef* colDef, MOT::MOT_CATALOG_FIELD_TYPES& type, int16* typeLen, bool& isBlob)
{
MOT::RC res = MOT::RC_OK;
Oid typoid;
Type tup;
Form_pg_type typeDesc;
int32_t colLen;
if (colDef->typname->arrayBounds != nullptr) {
return MOT::RC_UNSUPPORTED_COL_TYPE_ARR;
}
tup = typenameType(nullptr, colDef->typname, &colLen);
typeDesc = ((Form_pg_type)GETSTRUCT(tup));
typoid = HeapTupleGetOid(tup);
*typeLen = typeDesc->typlen;
// Get variable-length field length.
VarLenFieldType(typeDesc, typoid, colLen, typeLen, isBlob, res);
switch (typoid) {
case CHAROID:
@ -1020,97 +1030,101 @@ static MOT::RC TableFieldType(const ColumnDef* colDef, MOT::MOT_CATALOG_FIELD_TY
res = MOT::RC_UNSUPPORTED_COL_TYPE;
}
if (tup)
if (tup) {
ReleaseSysCache(tup);
}
return res;
}
MOT::RC MOTAdaptor::CreateIndex(IndexStmt* index, ::TransactionId tid)
void MOTAdaptor::ValidateCreateIndex(IndexStmt* stmt, MOT::Table* table, MOT::TxnManager* txn)
{
MOT::RC res;
EnsureSafeThreadAccessInline();
MOT::TxnManager* txn = GetSafeTxn(__FUNCTION__);
txn->SetTransactionId(tid);
MOT::Table* table = txn->GetTableByExternalId(index->relation->foreignOid);
if (table == nullptr) {
ereport(ERROR,
(errmodule(MOD_MOT),
errcode(ERRCODE_UNDEFINED_TABLE),
errmsg("Table not found for oid %u", index->relation->foreignOid)));
return MOT::RC_ERROR;
}
if (index->primary) {
if (stmt->primary) {
if (!table->IsTableEmpty(txn->GetThdId())) {
ereport(ERROR,
(errmodule(MOD_MOT),
errcode(ERRCODE_FDW_ERROR),
errmsg(
"Table %s is not empty, create primary index is not allowed", table->GetTableName().c_str())));
return MOT::RC_ERROR;
return;
}
} else if (table->GetNumIndexes() == MAX_NUM_INDEXES) {
ereport(ERROR,
(errmodule(MOD_MOT),
errcode(ERRCODE_FDW_TOO_MANY_INDEXES),
errmsg("Can not create index, max number of indexes %u reached", MAX_NUM_INDEXES)));
return MOT::RC_ERROR;
return;
}
elog(LOG,
"creating %s index %s (OID: %u), for table: %s",
(index->primary ? "PRIMARY" : "SECONDARY"),
index->idxname,
index->indexOid,
index->relation->relname);
uint64_t keyLength = 0;
MOT::Index* ix = nullptr;
MOT::IndexOrder index_order = MOT::IndexOrder::INDEX_ORDER_SECONDARY;
MOT::IndexingMethod indexing_method;
MOT::IndexTreeFlavor flavor;
if (strcmp(index->accessMethod, "btree") == 0) {
// Use the default index tree flavor from configuration file
indexing_method = MOT::IndexingMethod::INDEXING_METHOD_TREE;
flavor = MOT::GetGlobalConfiguration().m_indexTreeFlavor;
} else {
if (strcmp(stmt->accessMethod, "btree") != 0) {
ereport(ERROR, (errmodule(MOD_MOT), errmsg("MOT supports indexes of type BTREE only (btree or btree_art)")));
return MOT::RC_ERROR;
return;
}
if (list_length(index->indexParams) > (int)MAX_KEY_COLUMNS) {
if (list_length(stmt->indexParams) > (int)MAX_KEY_COLUMNS) {
ereport(ERROR,
(errmodule(MOD_MOT),
errcode(ERRCODE_FDW_TOO_MANY_INDEX_COLUMNS),
errmsg("Can't create index"),
errdetail(
"Number of columns exceeds %d max allowed %u", list_length(index->indexParams), MAX_KEY_COLUMNS)));
"Number of columns exceeds %d max allowed %u", list_length(stmt->indexParams), MAX_KEY_COLUMNS)));
return;
}
}
MOT::RC MOTAdaptor::CreateIndex(IndexStmt* stmt, ::TransactionId tid)
{
MOT::RC res;
EnsureSafeThreadAccessInline();
MOT::TxnManager* txn = GetSafeTxn(__FUNCTION__);
txn->SetTransactionId(tid);
MOT::Table* table = txn->GetTableByExternalId(stmt->relation->foreignOid);
if (table == nullptr) {
ereport(ERROR,
(errmodule(MOD_MOT),
errcode(ERRCODE_UNDEFINED_TABLE),
errmsg("Table not found for oid %u", stmt->relation->foreignOid)));
return MOT::RC_ERROR;
}
ValidateCreateIndex(stmt, table, txn);
elog(LOG,
"creating %s index %s (OID: %u), for table: %s",
(stmt->primary ? "PRIMARY" : "SECONDARY"),
stmt->idxname,
stmt->indexOid,
stmt->relation->relname);
uint64_t keyLength = 0;
MOT::Index* index = nullptr;
MOT::IndexOrder index_order = MOT::IndexOrder::INDEX_ORDER_SECONDARY;
// Use the default index tree flavor from configuration file
MOT::IndexingMethod indexing_method = MOT::IndexingMethod::INDEXING_METHOD_TREE;
MOT::IndexTreeFlavor flavor = MOT::GetGlobalConfiguration().m_indexTreeFlavor;
// check if we have primary and delete previous definition
if (index->primary) {
if (stmt->primary) {
index_order = MOT::IndexOrder::INDEX_ORDER_PRIMARY;
}
ix = MOT::IndexFactory::CreateIndex(index_order, indexing_method, flavor);
if (ix == nullptr) {
index = MOT::IndexFactory::CreateIndex(index_order, indexing_method, flavor);
if (index == nullptr) {
report_pg_error(MOT::RC_ABORT);
return MOT::RC_ABORT;
}
ix->SetExtId(index->indexOid);
ix->SetNumTableFields((uint32_t)table->GetFieldCount());
index->SetExtId(stmt->indexOid);
index->SetNumTableFields((uint32_t)table->GetFieldCount());
int count = 0;
ListCell* lc = nullptr;
foreach (lc, index->indexParams) {
foreach (lc, stmt->indexParams) {
IndexElem* ielem = (IndexElem*)lfirst(lc);
uint64_t colid = table->GetFieldId((ielem->name != nullptr ? ielem->name : ielem->indexcolname));
if (colid == (uint64_t)-1) { // invalid column
delete ix;
delete index;
ereport(ERROR,
(errmodule(MOD_MOT),
errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
@ -1123,7 +1137,7 @@ MOT::RC MOTAdaptor::CreateIndex(IndexStmt* index, ::TransactionId tid)
// Temp solution for NULLs, do not allow index creation on column that does not carry not null flag
if (!MOT::GetGlobalConfiguration().m_allowIndexOnNullableColumn && !col->m_isNotNull) {
delete ix;
delete index;
ereport(ERROR,
(errmodule(MOD_MOT),
errcode(ERRCODE_FDW_INDEX_ON_NULLABLE_COLUMN_NOT_ALLOWED),
@ -1134,7 +1148,7 @@ MOT::RC MOTAdaptor::CreateIndex(IndexStmt* index, ::TransactionId tid)
// Temp solution, we have to support DECIMAL and NUMERIC indexes as well
if (col->m_type == MOT::MOT_CATALOG_FIELD_TYPES::MOT_TYPE_DECIMAL) {
delete ix;
delete index;
ereport(ERROR,
(errmodule(MOD_MOT),
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@ -1143,7 +1157,7 @@ MOT::RC MOTAdaptor::CreateIndex(IndexStmt* index, ::TransactionId tid)
return MOT::RC_ERROR;
}
if (col->m_keySize > MAX_KEY_SIZE) {
delete ix;
delete index;
ereport(ERROR,
(errmodule(MOD_MOT),
errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
@ -1153,21 +1167,21 @@ MOT::RC MOTAdaptor::CreateIndex(IndexStmt* index, ::TransactionId tid)
}
keyLength += col->m_keySize;
ix->SetLenghtKeyFields(count, colid, col->m_keySize);
index->SetLenghtKeyFields(count, colid, col->m_keySize);
count++;
}
ix->SetNumIndexFields(count);
index->SetNumIndexFields(count);
if ((res = ix->IndexInit(keyLength, index->unique, index->idxname, nullptr)) != MOT::RC_OK) {
delete ix;
if ((res = index->IndexInit(keyLength, stmt->unique, stmt->idxname, nullptr)) != MOT::RC_OK) {
delete index;
report_pg_error(res);
return res;
}
res = txn->CreateIndex(table, ix, index->primary);
res = txn->CreateIndex(table, index, stmt->primary);
if (res != MOT::RC_OK) {
delete ix;
delete index;
if (res == MOT::RC_TABLE_EXCEEDS_MAX_INDEXES) {
ereport(ERROR,
(errmodule(MOD_MOT),
@ -1175,7 +1189,7 @@ MOT::RC MOTAdaptor::CreateIndex(IndexStmt* index, ::TransactionId tid)
errmsg("Can not create index, max number of indexes %u reached", MAX_NUM_INDEXES)));
return MOT::RC_TABLE_EXCEEDS_MAX_INDEXES;
} else {
report_pg_error(txn->m_err, index->idxname, txn->m_errMsgBuf);
report_pg_error(txn->m_err, stmt->idxname, txn->m_errMsgBuf);
return MOT::RC_UNIQUE_VIOLATION;
}
}
@ -1183,26 +1197,111 @@ MOT::RC MOTAdaptor::CreateIndex(IndexStmt* index, ::TransactionId tid)
return MOT::RC_OK;
}
MOT::RC MOTAdaptor::CreateTable(CreateForeignTableStmt* table, ::TransactionId tid)
void MOTAdaptor::AddTableColumns(MOT::Table* table, List *tableElts, bool& hasBlob)
{
hasBlob = false;
ListCell* cell = nullptr;
foreach (cell, tableElts) {
int16 typeLen = 0;
bool isBlob = false;
MOT::MOT_CATALOG_FIELD_TYPES colType;
ColumnDef* colDef = (ColumnDef*)lfirst(cell);
if (colDef == nullptr || colDef->typname == nullptr) {
delete table;
table = nullptr;
ereport(ERROR,
(errmodule(MOD_MOT),
errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
errmsg("Column definition is not complete"),
errdetail("target table is a foreign table")));
break;
}
MOT::RC res = TableFieldType(colDef, colType, &typeLen, isBlob);
if (res != MOT::RC_OK) {
delete table;
table = nullptr;
report_pg_error(res, colDef, (void*)(int64)typeLen);
break;
}
hasBlob |= isBlob;
if (colType == MOT::MOT_CATALOG_FIELD_TYPES::MOT_TYPE_DECIMAL) {
if (list_length(colDef->typname->typmods) > 0) {
bool canMakeShort = true;
int precision = 0;
int scale = 0;
int count = 0;
ListCell* c = nullptr;
foreach (c, colDef->typname->typmods) {
Node* d = (Node*)lfirst(c);
if (!IsA(d, A_Const)) {
canMakeShort = false;
break;
}
A_Const* ac = (A_Const*)d;
if (ac->val.type != T_Integer) {
canMakeShort = false;
break;
}
if (count == 0) {
precision = ac->val.val.ival;
} else {
scale = ac->val.val.ival;
}
count++;
}
if (canMakeShort) {
int len = 0;
len += scale / DEC_DIGITS;
len += (scale % DEC_DIGITS > 0 ? 1 : 0);
precision -= scale;
len += precision / DEC_DIGITS;
len += (precision % DEC_DIGITS > 0 ? 1 : 0);
typeLen = sizeof(MOT::DecimalSt) + len * sizeof(NumericDigit);
}
}
}
res = table->AddColumn(colDef->colname, typeLen, colType, colDef->is_not_null);
if (res != MOT::RC_OK) {
delete table;
table = nullptr;
report_pg_error(res, colDef, (void*)(int64)typeLen);
break;
}
}
}
MOT::RC MOTAdaptor::CreateTable(CreateForeignTableStmt* stmt, ::TransactionId tid)
{
bool hasBlob = false;
MOT::Index* primaryIdx = nullptr;
EnsureSafeThreadAccessInline();
MOT::TxnManager* txn = GetSafeTxn(__FUNCTION__, tid);
MOT::Table* currentTable = nullptr;
MOT::Table* table = nullptr;
MOT::RC res = MOT::RC_ERROR;
std::string tname("");
char* dbname = NULL;
do {
currentTable = new (std::nothrow) MOT::Table();
if (currentTable == nullptr) {
table = new (std::nothrow) MOT::Table();
if (table == nullptr) {
ereport(ERROR,
(errmodule(MOD_MOT), errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Allocation of table metadata failed")));
break;
}
uint32_t columnCount = list_length(table->base.tableElts);
uint32_t columnCount = list_length(stmt->base.tableElts);
// once the columns have been counted, we add one more for the nullable columns
++columnCount;
@ -1210,8 +1309,8 @@ MOT::RC MOTAdaptor::CreateTable(CreateForeignTableStmt* table, ::TransactionId t
// prepare table name
dbname = get_database_name(u_sess->proc_cxt.MyDatabaseId);
if (dbname == nullptr) {
delete currentTable;
currentTable = nullptr;
delete table;
table = nullptr;
ereport(ERROR,
(errmodule(MOD_MOT),
errcode(ERRCODE_UNDEFINED_DATABASE),
@ -1220,183 +1319,103 @@ MOT::RC MOTAdaptor::CreateTable(CreateForeignTableStmt* table, ::TransactionId t
}
tname.append(dbname);
tname.append("_");
if (table->base.relation->schemaname != nullptr) {
tname.append(table->base.relation->schemaname);
if (stmt->base.relation->schemaname != nullptr) {
tname.append(stmt->base.relation->schemaname);
} else {
tname.append("#");
}
tname.append("_");
tname.append(table->base.relation->relname);
tname.append(stmt->base.relation->relname);
if (!currentTable->Init(
table->base.relation->relname, tname.c_str(), columnCount, table->base.relation->foreignOid)) {
delete currentTable;
currentTable = nullptr;
if (!table->Init(
stmt->base.relation->relname, tname.c_str(), columnCount, stmt->base.relation->foreignOid)) {
delete table;
table = nullptr;
report_pg_error(MOT::RC_MEMORY_ALLOCATION_ERROR);
break;
}
// the null fields are copied verbatim because we have to give them back at some point
res = currentTable->AddColumn(
res = table->AddColumn(
"null_bytes", BITMAPLEN(columnCount - 1), MOT::MOT_CATALOG_FIELD_TYPES::MOT_TYPE_NULLBYTES);
if (res != MOT::RC_OK) {
delete currentTable;
currentTable = nullptr;
delete table;
table = nullptr;
report_pg_error(MOT::RC_MEMORY_ALLOCATION_ERROR);
break;
}
ListCell* cell = nullptr;
foreach (cell, table->base.tableElts) {
int16 typeLen = 0;
bool isBlob = false;
MOT::MOT_CATALOG_FIELD_TYPES colType;
ColumnDef* colDef = (ColumnDef*)lfirst(cell);
/*
* Add all the columns.
* NOTE: On failure, table object will be deleted and ereport will be done in AddTableColumns.
*/
AddTableColumns(table, stmt->base.tableElts, hasBlob);
if (colDef == nullptr || colDef->typname == nullptr) {
delete currentTable;
currentTable = nullptr;
ereport(ERROR,
(errmodule(MOD_MOT),
errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
errmsg("Column definition is not complete"),
errdetail("target table is a foreign table")));
break;
}
table->SetFixedLengthRow(!hasBlob);
res = TableFieldType(colDef, colType, &typeLen, isBlob);
if (res != MOT::RC_OK) {
delete currentTable;
currentTable = nullptr;
report_pg_error(res, colDef, (void*)(int64)typeLen);
break;
}
hasBlob |= isBlob;
if (colType == MOT::MOT_CATALOG_FIELD_TYPES::MOT_TYPE_DECIMAL) {
if (list_length(colDef->typname->typmods) > 0) {
bool canMakeShort = true;
int precision = 0;
int scale = 0;
int count = 0;
ListCell* c = nullptr;
foreach (c, colDef->typname->typmods) {
Node* d = (Node*)lfirst(c);
if (!IsA(d, A_Const)) {
canMakeShort = false;
break;
}
A_Const* ac = (A_Const*)d;
if (ac->val.type != T_Integer) {
canMakeShort = false;
break;
}
if (count == 0) {
precision = ac->val.val.ival;
} else {
scale = ac->val.val.ival;
}
count++;
}
if (canMakeShort) {
int len = 0;
len += scale / DEC_DIGITS;
len += (scale % DEC_DIGITS > 0 ? 1 : 0);
precision -= scale;
len += precision / DEC_DIGITS;
len += (precision % DEC_DIGITS > 0 ? 1 : 0);
typeLen = sizeof(MOT::DecimalSt) + len * sizeof(NumericDigit);
}
}
}
res = currentTable->AddColumn(colDef->colname, typeLen, colType, colDef->is_not_null);
if (res != MOT::RC_OK) {
delete currentTable;
currentTable = nullptr;
report_pg_error(res, colDef, (void*)(int64)typeLen);
break;
}
}
if (res != MOT::RC_OK) {
break;
}
currentTable->SetFixedLengthRow(!hasBlob);
uint32_t tupleSize = currentTable->GetTupleSize();
uint32_t tupleSize = table->GetTupleSize();
if (tupleSize > (unsigned int)MAX_TUPLE_SIZE) {
delete currentTable;
currentTable = nullptr;
delete table;
table = nullptr;
ereport(ERROR,
(errmodule(MOD_MOT),
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("Un-support feature"),
errdetail("MOT: Table %s tuple size %u exceeds MAX_TUPLE_SIZE=%u !!!",
table->base.relation->relname,
stmt->base.relation->relname,
tupleSize,
(unsigned int)MAX_TUPLE_SIZE)));
}
if (!currentTable->InitRowPool()) {
delete currentTable;
currentTable = nullptr;
if (!table->InitRowPool()) {
delete table;
table = nullptr;
report_pg_error(MOT::RC_MEMORY_ALLOCATION_ERROR);
break;
}
elog(LOG,
"creating table %s (OID: %u), num columns: %u, tuple: %u",
currentTable->GetLongTableName().c_str(),
table->base.relation->foreignOid,
table->GetLongTableName().c_str(),
stmt->base.relation->foreignOid,
columnCount,
tupleSize);
res = txn->CreateTable(currentTable);
res = txn->CreateTable(table);
if (res != MOT::RC_OK) {
delete currentTable;
currentTable = nullptr;
delete table;
table = nullptr;
report_pg_error(res);
break;
}
// add default PK index
MOT::RC rc = MOT::RC_OK;
primaryIdx = MOT::IndexFactory::CreatePrimaryIndexEx(MOT::IndexingMethod::INDEXING_METHOD_TREE,
DEFAULT_TREE_FLAVOR,
8,
currentTable->GetLongTableName(),
rc,
table->GetLongTableName(),
res,
nullptr);
if (rc != MOT::RC_OK) {
delete currentTable;
currentTable = nullptr;
report_pg_error(rc);
if (res != MOT::RC_OK) {
delete table;
table = nullptr;
report_pg_error(res);
break;
}
primaryIdx->SetExtId(table->base.relation->foreignOid + 1);
primaryIdx->SetExtId(stmt->base.relation->foreignOid + 1);
primaryIdx->SetNumTableFields(columnCount);
primaryIdx->SetNumIndexFields(1);
primaryIdx->SetLenghtKeyFields(0, -1, 8);
primaryIdx->SetFakePrimary(true);
// Add default primary index
res = txn->CreateIndex(currentTable, primaryIdx, true);
res = txn->CreateIndex(table, primaryIdx, true);
} while (0);
if (res != MOT::RC_OK) {
if (currentTable != nullptr) {
txn->DropTable(currentTable);
if (table != nullptr) {
txn->DropTable(table);
}
if (primaryIdx != nullptr) {
delete primaryIdx;

View File

@ -305,6 +305,17 @@ public:
static bool m_callbacks_initialized;
private:
/**
* @brief Adds all the columns.
* @param table Table object being created.
* @param tableElts Column definitions list.
* @param[out] hasBlob Whether any column is a blob.
* NOTE: On failure, table object will be deleted and ereport will be done.
*/
static void AddTableColumns(MOT::Table* table, List *tableElts, bool& hasBlob);
static void ValidateCreateIndex(IndexStmt* index, MOT::Table* table, MOT::TxnManager* txn);
static void VarcharToMOTKey(MOT::Column* col, ExprState* expr, Datum datum, Oid type, uint8_t* data, size_t len,
KEY_OPER oper, uint8_t fill);
static void FloatToMOTKey(MOT::Column* col, ExprState* expr, Datum datum, uint8_t* data);
@ -376,6 +387,7 @@ inline void CleanQueryStatesOnError(MOT::TxnManager* txn)
CleanCursors(state);
}
}
txn->m_queryState.clear();
}
}

View File

@ -289,6 +289,118 @@ extern bool ReFetchIndices(JitContext* jitContext)
return true;
}
static bool PrepareJitContextJoinData(JitContext* jitContext)
{
// allocate inner loop search key for JOIN commands
if ((jitContext->m_innerSearchKey == nullptr) && IsJoinCommand(jitContext->m_commandType)) {
MOT_LOG_TRACE(
"Preparing inner search key for JOIN command from index %s", jitContext->m_innerIndex->GetName().c_str());
jitContext->m_innerSearchKey = PrepareJitSearchKey(jitContext, jitContext->m_innerIndex);
if (jitContext->m_innerSearchKey == nullptr) {
MOT_LOG_TRACE(
"Failed to allocate reusable inner search key for JIT context, aborting jitted code execution");
return false; // safe cleanup during destroy
}
MOT_LOG_TRACE("Prepared inner search key %p (%u bytes) for JOIN command from index %s",
jitContext->m_innerSearchKey,
jitContext->m_innerSearchKey->GetKeyLength(),
jitContext->m_innerIndex->GetName().c_str());
}
// allocate inner loop end-iterator search key for JOIN commands
if ((jitContext->m_innerEndIteratorKey == nullptr) && IsJoinCommand(jitContext->m_commandType)) {
MOT_LOG_TRACE("Preparing inner end iterator key for JOIN command from index %s",
jitContext->m_innerIndex->GetName().c_str());
jitContext->m_innerEndIteratorKey = PrepareJitSearchKey(jitContext, jitContext->m_innerIndex);
if (jitContext->m_innerEndIteratorKey == nullptr) {
MOT_LOG_TRACE(
"Failed to allocate reusable inner end iterator key for JIT context, aborting jitted code execution");
return false; // safe cleanup during destroy
}
MOT_LOG_TRACE("Prepared inner end iterator key %p (%u bytes) for JOIN command from index %s",
jitContext->m_innerEndIteratorKey,
jitContext->m_innerEndIteratorKey->GetKeyLength(),
jitContext->m_innerIndex->GetName().c_str());
}
// preparing outer row copy for JOIN commands
if ((jitContext->m_outerRowCopy == nullptr) && IsJoinCommand(jitContext->m_commandType)) {
MOT_LOG_TRACE("Preparing outer row copy for JOIN command");
jitContext->m_outerRowCopy = jitContext->m_table->CreateNewRow();
if (jitContext->m_outerRowCopy == nullptr) {
MOT_LOG_TRACE("Failed to allocate reusable outer row copy for JIT context, aborting jitted code execution");
return false; // safe cleanup during destroy
}
}
return true;
}
static bool PrepareJitContextSubQueryData(JitContext* jitContext)
{
// allocate sub-query search keys and generate tuple table slot array using session top memory context
MemoryContext oldCtx = CurrentMemoryContext;
CurrentMemoryContext = u_sess->top_mem_cxt;
for (uint32_t i = 0; i < jitContext->m_subQueryCount; ++i) {
JitContext::SubQueryData* subQueryData = &jitContext->m_subQueryData[i];
if (subQueryData->m_tupleDesc == nullptr) {
MOT_LOG_TRACE("Preparing sub-query %u tuple descriptor", i);
List* targetList = GetSubQueryTargetList(jitContext->m_queryString, i);
if (targetList == nullptr) {
MOT_LOG_TRACE("Failed to locate sub-query %u target list", i);
CurrentMemoryContext = oldCtx;
return false; // safe cleanup during destroy
}
subQueryData->m_tupleDesc = ExecCleanTypeFromTL(targetList, false);
if (subQueryData->m_tupleDesc == nullptr) {
MOT_LOG_TRACE("Failed to create sub-query %u tuple descriptor from target list", i);
CurrentMemoryContext = oldCtx;
return false; // safe cleanup during destroy
}
}
if (subQueryData->m_slot == nullptr) {
MOT_ASSERT(subQueryData->m_tupleDesc != nullptr);
MOT_LOG_TRACE("Preparing sub-query %u result slot", i);
subQueryData->m_slot = MakeSingleTupleTableSlot(subQueryData->m_tupleDesc);
if (subQueryData->m_slot == nullptr) {
MOT_LOG_TRACE("Failed to generate sub-query %u tuple table slot", i);
CurrentMemoryContext = oldCtx;
return false; // safe cleanup during destroy
}
}
if (subQueryData->m_searchKey == nullptr) {
MOT_LOG_TRACE(
"Preparing sub-query %u search key from index %s", i, subQueryData->m_index->GetName().c_str());
subQueryData->m_searchKey = PrepareJitSearchKey(jitContext, subQueryData->m_index);
if (subQueryData->m_searchKey == nullptr) {
MOT_LOG_TRACE("Failed to generate sub-query %u search key", i);
CurrentMemoryContext = oldCtx;
return false; // safe cleanup during destroy
}
}
if ((subQueryData->m_commandType == JIT_COMMAND_AGGREGATE_RANGE_SELECT) &&
(subQueryData->m_endIteratorKey == nullptr)) {
MOT_LOG_TRACE("Preparing sub-query %u end-iterator search key from index %s",
i,
subQueryData->m_index->GetName().c_str());
subQueryData->m_endIteratorKey = PrepareJitSearchKey(jitContext, subQueryData->m_index);
if (subQueryData->m_endIteratorKey == nullptr) {
MOT_LOG_TRACE("Failed to generate sub-query %u end-iterator search key", i);
CurrentMemoryContext = oldCtx;
return false; // safe cleanup during destroy
}
}
}
CurrentMemoryContext = oldCtx;
return true;
}
extern bool PrepareJitContext(JitContext* jitContext)
{
// allocate argument-is-null array
@ -316,12 +428,12 @@ extern bool PrepareJitContext(JitContext* jitContext)
if (jitContext->m_searchKey == nullptr) {
MOT_LOG_TRACE("Failed to allocate reusable search key for JIT context, aborting jitted code execution");
return false; // safe cleanup during destroy
} else {
MOT_LOG_TRACE("Prepared search key %p (%u bytes) from index %s",
jitContext->m_searchKey,
jitContext->m_searchKey->GetKeyLength(),
jitContext->m_index->GetName().c_str());
}
MOT_LOG_TRACE("Prepared search key %p (%u bytes) from index %s",
jitContext->m_searchKey,
jitContext->m_searchKey->GetKeyLength(),
jitContext->m_index->GetName().c_str());
}
// allocate bitmap-set object for incremental-redo when executing UPDATE command
@ -356,115 +468,25 @@ extern bool PrepareJitContext(JitContext* jitContext)
MOT_LOG_TRACE(
"Failed to allocate reusable end iterator key for JIT context, aborting jitted code execution");
return false; // safe cleanup during destroy
} else {
MOT_LOG_TRACE("Prepared end iterator key %p (%u bytes) for range update/select command from index %s",
jitContext->m_endIteratorKey,
jitContext->m_endIteratorKey->GetKeyLength(),
jitContext->m_index->GetName().c_str());
}
MOT_LOG_TRACE("Prepared end iterator key %p (%u bytes) for range update/select command from index %s",
jitContext->m_endIteratorKey,
jitContext->m_endIteratorKey->GetKeyLength(),
jitContext->m_index->GetName().c_str());
}
// allocate inner loop search key for JOIN commands
if ((jitContext->m_innerSearchKey == nullptr) && IsJoinCommand(jitContext->m_commandType)) {
MOT_LOG_TRACE(
"Preparing inner search key for JOIN command from index %s", jitContext->m_innerIndex->GetName().c_str());
jitContext->m_innerSearchKey = PrepareJitSearchKey(jitContext, jitContext->m_innerIndex);
if (jitContext->m_innerSearchKey == nullptr) {
MOT_LOG_TRACE(
"Failed to allocate reusable inner search key for JIT context, aborting jitted code execution");
return false; // safe cleanup during destroy
} else {
MOT_LOG_TRACE("Prepared inner search key %p (%u bytes) for JOIN command from index %s",
jitContext->m_innerSearchKey,
jitContext->m_innerSearchKey->GetKeyLength(),
jitContext->m_innerIndex->GetName().c_str());
}
}
// allocate inner loop end-iterator search key for JOIN commands
if ((jitContext->m_innerEndIteratorKey == nullptr) && IsJoinCommand(jitContext->m_commandType)) {
MOT_LOG_TRACE("Preparing inner end iterator key for JOIN command from index %s",
jitContext->m_innerIndex->GetName().c_str());
jitContext->m_innerEndIteratorKey = PrepareJitSearchKey(jitContext, jitContext->m_innerIndex);
if (jitContext->m_innerEndIteratorKey == nullptr) {
MOT_LOG_TRACE(
"Failed to allocate reusable inner end iterator key for JIT context, aborting jitted code execution");
return false; // safe cleanup during destroy
} else {
MOT_LOG_TRACE("Prepared inner end iterator key %p (%u bytes) for JOIN command from index %s",
jitContext->m_innerEndIteratorKey,
jitContext->m_innerEndIteratorKey->GetKeyLength(),
jitContext->m_innerIndex->GetName().c_str());
}
}
// preparing outer row copy for JOIN commands
if ((jitContext->m_outerRowCopy == nullptr) && IsJoinCommand(jitContext->m_commandType)) {
MOT_LOG_TRACE("Preparing outer row copy for JOIN command");
jitContext->m_outerRowCopy = jitContext->m_table->CreateNewRow();
if (jitContext->m_outerRowCopy == nullptr) {
MOT_LOG_TRACE("Failed to allocate reusable outer row copy for JIT context, aborting jitted code execution");
return false; // safe cleanup during destroy
}
if (!PrepareJitContextJoinData(jitContext)) {
MOT_LOG_TRACE("Failed to allocate join related data for JIT context, aborting jitted code execution");
return false; // safe cleanup during destroy
}
// prepare sub-query data for COMPOUND commands
if (jitContext->m_commandType == JIT_COMMAND_COMPOUND_SELECT) {
// allocate sub-query search keys and generate tuple table slot array using session top memory context
MemoryContext oldCtx = CurrentMemoryContext;
CurrentMemoryContext = u_sess->top_mem_cxt;
for (uint32_t i = 0; i < jitContext->m_subQueryCount; ++i) {
JitContext::SubQueryData* subQueryData = &jitContext->m_subQueryData[i];
if (subQueryData->m_tupleDesc == nullptr) {
MOT_LOG_TRACE("Preparing sub-query %u tuple descriptor", i);
List* targetList = GetSubQueryTargetList(jitContext->m_queryString, i);
if (targetList == nullptr) {
MOT_LOG_TRACE("Failed to locate sub-query %u target list", i);
CurrentMemoryContext = oldCtx;
return false; // safe cleanup during destroy
} else {
subQueryData->m_tupleDesc = ExecCleanTypeFromTL(targetList, false);
if (subQueryData->m_tupleDesc == nullptr) {
MOT_LOG_TRACE("Failed to create sub-query %u tuple descriptor from target list", i);
CurrentMemoryContext = oldCtx;
return false; // safe cleanup during destroy
}
}
}
if (subQueryData->m_slot == nullptr) {
MOT_ASSERT(subQueryData->m_tupleDesc != nullptr);
MOT_LOG_TRACE("Preparing sub-query %u result slot", i);
subQueryData->m_slot = MakeSingleTupleTableSlot(subQueryData->m_tupleDesc);
if (subQueryData->m_slot == nullptr) {
MOT_LOG_TRACE("Failed to generate sub-query %u tuple table slot", i);
CurrentMemoryContext = oldCtx;
return false; // safe cleanup during destroy
}
}
if (subQueryData->m_searchKey == nullptr) {
MOT_LOG_TRACE(
"Preparing sub-query %u search key from index %s", i, subQueryData->m_index->GetName().c_str());
subQueryData->m_searchKey = PrepareJitSearchKey(jitContext, subQueryData->m_index);
if (subQueryData->m_searchKey == nullptr) {
MOT_LOG_TRACE("Failed to generate sub-query %u search key", i);
CurrentMemoryContext = oldCtx;
return false; // safe cleanup during destroy
}
}
if ((subQueryData->m_commandType == JIT_COMMAND_AGGREGATE_RANGE_SELECT) &&
(subQueryData->m_endIteratorKey == nullptr)) {
MOT_LOG_TRACE("Preparing sub-query %u end-iterator search key from index %s",
i,
subQueryData->m_index->GetName().c_str());
subQueryData->m_endIteratorKey = PrepareJitSearchKey(jitContext, subQueryData->m_index);
if (subQueryData->m_endIteratorKey == nullptr) {
MOT_LOG_TRACE("Failed to generate sub-query %u end-iterator search key", i);
CurrentMemoryContext = oldCtx;
return false; // safe cleanup during destroy
}
}
if (!PrepareJitContextSubQueryData(jitContext)) {
MOT_LOG_TRACE("Failed to sub-query data for JIT context, aborting jitted code execution");
return false; // safe cleanup during destroy
}
CurrentMemoryContext = oldCtx;
}
return true;

View File

@ -50,6 +50,58 @@ static llvm::Value* ProcessExpr(
static llvm::Value* ProcessExpr(JitLlvmCodeGenContext* ctx, llvm::Value* row, JitExpr* expr, int* max_arg);
/*--------------------------- Helpers to generate compound LLVM code ---------------------------*/
/** @brief Creates a jitted function for code generation. Builds prototype and entry block. */
void CreateJittedFunction(JitLlvmCodeGenContext* ctx, const char* function_name)
{
llvm::Value* llvmargs[MOT_JIT_FUNC_ARG_COUNT];
// define the function prototype
GsCodeGen::FnPrototype fn_prototype(ctx->_code_gen, function_name, ctx->INT32_T);
fn_prototype.addArgument(GsCodeGen::NamedVariable("table", ctx->TableType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("index", ctx->IndexType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("key", ctx->KeyType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("bitmap", ctx->BitmapSetType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("params", ctx->ParamListInfoDataType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("slot", ctx->TupleTableSlotType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("tp_processed", ctx->INT64_T->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("scan_ended", ctx->INT32_T->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("isNewScan", ctx->INT32_T));
fn_prototype.addArgument(GsCodeGen::NamedVariable("end_iterator_key", ctx->KeyType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("inner_table", ctx->TableType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("inner_index", ctx->IndexType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("inner_key", ctx->KeyType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("inner_end_iterator_key", ctx->KeyType->getPointerTo()));
ctx->m_jittedQuery = fn_prototype.generatePrototype(ctx->_builder, &llvmargs[0]);
// get the arguments
int arg_index = 0;
ctx->table_value = llvmargs[arg_index++];
ctx->index_value = llvmargs[arg_index++];
ctx->key_value = llvmargs[arg_index++];
ctx->bitmap_value = llvmargs[arg_index++];
ctx->params_value = llvmargs[arg_index++];
ctx->slot_value = llvmargs[arg_index++];
ctx->tp_processed_value = llvmargs[arg_index++];
ctx->scan_ended_value = llvmargs[arg_index++];
ctx->isNewScanValue = llvmargs[arg_index++];
ctx->end_iterator_key_value = llvmargs[arg_index++];
ctx->inner_table_value = llvmargs[arg_index++];
ctx->inner_index_value = llvmargs[arg_index++];
ctx->inner_key_value = llvmargs[arg_index++];
ctx->inner_end_iterator_key_value = llvmargs[arg_index++];
for (uint32_t i = 0; i < ctx->m_subQueryCount; ++i) {
ctx->m_subQueryData[i].m_slot = AddGetSubQuerySlot(ctx, i);
ctx->m_subQueryData[i].m_table = AddGetSubQueryTable(ctx, i);
ctx->m_subQueryData[i].m_index = AddGetSubQueryIndex(ctx, i);
ctx->m_subQueryData[i].m_searchKey = AddGetSubQuerySearchKey(ctx, i);
ctx->m_subQueryData[i].m_endIteratorKey = AddGetSubQueryEndIteratorKey(ctx, i);
}
IssueDebugLog("Starting execution of jitted function");
}
/** @brief Builds a code segment for checking if soft memory limit has been reached. */
void buildIsSoftMemoryLimitReached(JitLlvmCodeGenContext* ctx)
{
@ -212,58 +264,6 @@ static bool ProcessJoinBoolExpr(
return result;
}
/** @brief Creates a jitted function for code generation. Builds prototype and entry block. */
void CreateJittedFunction(JitLlvmCodeGenContext* ctx, const char* function_name)
{
llvm::Value* llvmargs[MOT_JIT_FUNC_ARG_COUNT];
// define the function prototype
GsCodeGen::FnPrototype fn_prototype(ctx->_code_gen, function_name, ctx->INT32_T);
fn_prototype.addArgument(GsCodeGen::NamedVariable("table", ctx->TableType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("index", ctx->IndexType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("key", ctx->KeyType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("bitmap", ctx->BitmapSetType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("params", ctx->ParamListInfoDataType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("slot", ctx->TupleTableSlotType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("tp_processed", ctx->INT64_T->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("scan_ended", ctx->INT32_T->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("isNewScan", ctx->INT32_T));
fn_prototype.addArgument(GsCodeGen::NamedVariable("end_iterator_key", ctx->KeyType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("inner_table", ctx->TableType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("inner_index", ctx->IndexType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("inner_key", ctx->KeyType->getPointerTo()));
fn_prototype.addArgument(GsCodeGen::NamedVariable("inner_end_iterator_key", ctx->KeyType->getPointerTo()));
ctx->m_jittedQuery = fn_prototype.generatePrototype(ctx->_builder, &llvmargs[0]);
// get the arguments
int arg_index = 0;
ctx->table_value = llvmargs[arg_index++];
ctx->index_value = llvmargs[arg_index++];
ctx->key_value = llvmargs[arg_index++];
ctx->bitmap_value = llvmargs[arg_index++];
ctx->params_value = llvmargs[arg_index++];
ctx->slot_value = llvmargs[arg_index++];
ctx->tp_processed_value = llvmargs[arg_index++];
ctx->scan_ended_value = llvmargs[arg_index++];
ctx->isNewScanValue = llvmargs[arg_index++];
ctx->end_iterator_key_value = llvmargs[arg_index++];
ctx->inner_table_value = llvmargs[arg_index++];
ctx->inner_index_value = llvmargs[arg_index++];
ctx->inner_key_value = llvmargs[arg_index++];
ctx->inner_end_iterator_key_value = llvmargs[arg_index++];
for (uint32_t i = 0; i < ctx->m_subQueryCount; ++i) {
ctx->m_subQueryData[i].m_slot = AddGetSubQuerySlot(ctx, i);
ctx->m_subQueryData[i].m_table = AddGetSubQueryTable(ctx, i);
ctx->m_subQueryData[i].m_index = AddGetSubQueryIndex(ctx, i);
ctx->m_subQueryData[i].m_searchKey = AddGetSubQuerySearchKey(ctx, i);
ctx->m_subQueryData[i].m_endIteratorKey = AddGetSubQueryEndIteratorKey(ctx, i);
}
IssueDebugLog("Starting execution of jitted function");
}
/** @brief Adds code to reset the number of rows processed. */
void buildResetRowsProcessed(JitLlvmCodeGenContext* ctx)
{
@ -1139,7 +1139,53 @@ static bool buildClosedRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* index
return result;
}
bool buildSemiOpenRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* indexScan, int* maxArg,
static void BuildAscendingSemiOpenRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* indexScan, int* maxArg,
JitRangeScanType rangeScanType, JitRangeBoundMode* beginRangeBound, JitRangeBoundMode* endRangeBound,
llvm::Value* outerRow, int subQueryIndex, int offset, int size, JitColumnExpr* lastExpr)
{
if ((indexScan->_last_dim_op1 == JIT_WOC_LESS_THAN) || (indexScan->_last_dim_op1 == JIT_WOC_LESS_EQUALS)) {
// this is an upper bound operator on an ascending semi-open scan so we fill the begin key with zeros,
// and the end key with the value
AddFillKeyPattern(ctx, 0x00, offset, size, JIT_RANGE_ITERATOR_START, rangeScanType, subQueryIndex);
buildScanExpression(ctx, lastExpr, maxArg, JIT_RANGE_ITERATOR_END, rangeScanType, outerRow, subQueryIndex);
*beginRangeBound = JIT_RANGE_BOUND_INCLUDE;
*endRangeBound = (indexScan->_last_dim_op1 == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE
: JIT_RANGE_BOUND_EXCLUDE;
} else {
// this is a lower bound operator on an ascending semi-open scan so we fill the begin key with the
// value, and the end key with 0xFF
buildScanExpression(ctx, lastExpr, maxArg, JIT_RANGE_ITERATOR_START, rangeScanType, outerRow, subQueryIndex);
AddFillKeyPattern(ctx, 0xFF, offset, size, JIT_RANGE_ITERATOR_END, rangeScanType, subQueryIndex);
*beginRangeBound = (indexScan->_last_dim_op1 == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE
: JIT_RANGE_BOUND_EXCLUDE;
*endRangeBound = JIT_RANGE_BOUND_INCLUDE;
}
}
static void BuildDescendingSemiOpenRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* indexScan, int* maxArg,
JitRangeScanType rangeScanType, JitRangeBoundMode* beginRangeBound, JitRangeBoundMode* endRangeBound,
llvm::Value* outerRow, int subQueryIndex, int offset, int size, JitColumnExpr* lastExpr)
{
if ((indexScan->_last_dim_op1 == JIT_WOC_LESS_THAN) || (indexScan->_last_dim_op1 == JIT_WOC_LESS_EQUALS)) {
// this is an upper bound operator on a descending semi-open scan so we fill the begin key with value,
// and the end key with zeroes
buildScanExpression(ctx, lastExpr, maxArg, JIT_RANGE_ITERATOR_START, rangeScanType, outerRow, subQueryIndex);
AddFillKeyPattern(ctx, 0x00, offset, size, JIT_RANGE_ITERATOR_END, rangeScanType, subQueryIndex);
*beginRangeBound = (indexScan->_last_dim_op1 == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE
: JIT_RANGE_BOUND_EXCLUDE;
*endRangeBound = JIT_RANGE_BOUND_INCLUDE;
} else {
// this is a lower bound operator on a descending semi-open scan so we fill the begin key with 0xFF, and
// the end key with the value
AddFillKeyPattern(ctx, 0xFF, offset, size, JIT_RANGE_ITERATOR_START, rangeScanType, subQueryIndex);
buildScanExpression(ctx, lastExpr, maxArg, JIT_RANGE_ITERATOR_END, rangeScanType, outerRow, subQueryIndex);
*beginRangeBound = JIT_RANGE_BOUND_INCLUDE;
*endRangeBound = (indexScan->_last_dim_op1 == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE
: JIT_RANGE_BOUND_EXCLUDE;
}
}
static bool buildSemiOpenRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* indexScan, int* maxArg,
JitRangeScanType rangeScanType, JitRangeBoundMode* beginRangeBound, JitRangeBoundMode* endRangeBound,
llvm::Value* outerRow, int subQueryIndex)
{
@ -1184,45 +1230,29 @@ bool buildSemiOpenRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* indexScan,
int last_expr_index = indexScan->_search_exprs._count - 1;
JitColumnExpr* last_expr = &indexScan->_search_exprs._exprs[last_expr_index];
if (ascending) {
if ((indexScan->_last_dim_op1 == JIT_WOC_LESS_THAN) || (indexScan->_last_dim_op1 == JIT_WOC_LESS_EQUALS)) {
// this is an upper bound operator on an ascending semi-open scan so we fill the begin key with zeros,
// and the end key with the value
AddFillKeyPattern(ctx, 0x00, offset, size, JIT_RANGE_ITERATOR_START, rangeScanType, subQueryIndex);
buildScanExpression(
ctx, last_expr, maxArg, JIT_RANGE_ITERATOR_END, rangeScanType, outerRow, subQueryIndex);
*beginRangeBound = JIT_RANGE_BOUND_INCLUDE;
*endRangeBound = (indexScan->_last_dim_op1 == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE
: JIT_RANGE_BOUND_EXCLUDE;
} else {
// this is a lower bound operator on an ascending semi-open scan so we fill the begin key with the
// value, and the end key with 0xFF
buildScanExpression(
ctx, last_expr, maxArg, JIT_RANGE_ITERATOR_START, rangeScanType, outerRow, subQueryIndex);
AddFillKeyPattern(ctx, 0xFF, offset, size, JIT_RANGE_ITERATOR_END, rangeScanType, subQueryIndex);
*beginRangeBound = (indexScan->_last_dim_op1 == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE
: JIT_RANGE_BOUND_EXCLUDE;
*endRangeBound = JIT_RANGE_BOUND_INCLUDE;
}
BuildAscendingSemiOpenRangeScan(ctx,
indexScan,
maxArg,
rangeScanType,
beginRangeBound,
endRangeBound,
outerRow,
subQueryIndex,
offset,
size,
last_expr);
} else {
if ((indexScan->_last_dim_op1 == JIT_WOC_LESS_THAN) || (indexScan->_last_dim_op1 == JIT_WOC_LESS_EQUALS)) {
// this is an upper bound operator on a descending semi-open scan so we fill the begin key with value,
// and the end key with zeroes
buildScanExpression(
ctx, last_expr, maxArg, JIT_RANGE_ITERATOR_START, rangeScanType, outerRow, subQueryIndex);
AddFillKeyPattern(ctx, 0x00, offset, size, JIT_RANGE_ITERATOR_END, rangeScanType, subQueryIndex);
*beginRangeBound = (indexScan->_last_dim_op1 == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE
: JIT_RANGE_BOUND_EXCLUDE;
*endRangeBound = JIT_RANGE_BOUND_INCLUDE;
} else {
// this is a lower bound operator on a descending semi-open scan so we fill the begin key with 0xFF, and
// the end key with the value
AddFillKeyPattern(ctx, 0xFF, offset, size, JIT_RANGE_ITERATOR_START, rangeScanType, subQueryIndex);
buildScanExpression(
ctx, last_expr, maxArg, JIT_RANGE_ITERATOR_END, rangeScanType, outerRow, subQueryIndex);
*beginRangeBound = JIT_RANGE_BOUND_INCLUDE;
*endRangeBound = (indexScan->_last_dim_op1 == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE
: JIT_RANGE_BOUND_EXCLUDE;
}
BuildDescendingSemiOpenRangeScan(ctx,
indexScan,
maxArg,
rangeScanType,
beginRangeBound,
endRangeBound,
outerRow,
subQueryIndex,
offset,
size,
last_expr);
}
// now fill the rest as usual
@ -1252,7 +1282,108 @@ bool buildSemiOpenRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* indexScan,
return result;
}
bool buildOpenRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* index_scan, int* max_arg,
static void BuildAscendingOpenRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* indexScan, int* maxArg,
JitRangeScanType rangeScanType, JitRangeBoundMode* beginRangeBound, JitRangeBoundMode* endRangeBound,
llvm::Value* outerRow, int subQueryIndex, JitWhereOperatorClass beforeLastDimOp, JitWhereOperatorClass lastDimOp,
JitColumnExpr* beforeLastExpr, JitColumnExpr* lastExpr)
{
if ((beforeLastDimOp == JIT_WOC_LESS_THAN) || (beforeLastDimOp == JIT_WOC_LESS_EQUALS)) {
MOT_ASSERT((lastDimOp == JIT_WOC_GREATER_THAN) || (lastDimOp == JIT_WOC_GREATER_EQUALS));
// the before-last operator is an upper bound operator on an ascending open scan so we fill the begin
// key with the last value, and the end key with the before-last value
buildScanExpression(ctx,
lastExpr,
maxArg,
JIT_RANGE_ITERATOR_START,
rangeScanType,
outerRow, // lower bound on begin iterator key
subQueryIndex);
buildScanExpression(ctx,
beforeLastExpr,
maxArg,
JIT_RANGE_ITERATOR_END,
rangeScanType,
outerRow, // upper bound on end iterator key
subQueryIndex);
*beginRangeBound =
(lastDimOp == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
*endRangeBound =
(beforeLastDimOp == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
} else {
MOT_ASSERT((lastDimOp == JIT_WOC_LESS_THAN) || (lastDimOp == JIT_WOC_LESS_EQUALS));
// the before-last operator is a lower bound operator on an ascending open scan so we fill the begin key
// with the before-last value, and the end key with the last value
buildScanExpression(ctx,
beforeLastExpr,
maxArg,
JIT_RANGE_ITERATOR_START,
rangeScanType,
outerRow, // lower bound on begin iterator key
subQueryIndex);
buildScanExpression(ctx,
lastExpr,
maxArg,
JIT_RANGE_ITERATOR_END,
rangeScanType,
outerRow, // upper bound on end iterator key
subQueryIndex);
*beginRangeBound =
(beforeLastDimOp == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
*endRangeBound = (lastDimOp == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
}
}
static void BuildDescendingOpenRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* indexScan, int* maxArg,
JitRangeScanType rangeScanType, JitRangeBoundMode* beginRangeBound, JitRangeBoundMode* endRangeBound,
llvm::Value* outerRow, int subQueryIndex, JitWhereOperatorClass beforeLastDimOp, JitWhereOperatorClass lastDimOp,
JitColumnExpr* beforeLastExpr, JitColumnExpr* lastExpr)
{
if ((beforeLastDimOp == JIT_WOC_LESS_THAN) || (beforeLastDimOp == JIT_WOC_LESS_EQUALS)) {
MOT_ASSERT((lastDimOp == JIT_WOC_GREATER_THAN) || (lastDimOp == JIT_WOC_GREATER_EQUALS));
// the before-last operator is an upper bound operator on an descending open scan so we fill the begin
// key with the last value, and the end key with the before-last value
buildScanExpression(ctx,
beforeLastExpr,
maxArg,
JIT_RANGE_ITERATOR_START,
rangeScanType,
outerRow, // upper bound on begin iterator key
subQueryIndex);
buildScanExpression(ctx,
lastExpr,
maxArg,
JIT_RANGE_ITERATOR_END,
rangeScanType,
outerRow, // lower bound on end iterator key
subQueryIndex);
*beginRangeBound =
(beforeLastDimOp == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
*endRangeBound = (lastDimOp == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
} else {
MOT_ASSERT((lastDimOp == JIT_WOC_LESS_THAN) || (lastDimOp == JIT_WOC_LESS_EQUALS));
// the before-last operator is a lower bound operator on an descending open scan so we fill the begin
// key with the last value, and the end key with the before-last value
buildScanExpression(ctx,
lastExpr,
maxArg,
JIT_RANGE_ITERATOR_START,
rangeScanType,
outerRow, // upper bound on begin iterator key
subQueryIndex);
buildScanExpression(ctx,
beforeLastExpr,
maxArg,
JIT_RANGE_ITERATOR_END,
rangeScanType,
outerRow, // lower bound on end iterator key
subQueryIndex);
*beginRangeBound = (lastDimOp == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
*endRangeBound =
(beforeLastDimOp == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
}
}
static bool buildOpenRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* index_scan, int* max_arg,
JitRangeScanType range_scan_type, JitRangeBoundMode* begin_range_bound, JitRangeBoundMode* end_range_bound,
llvm::Value* outer_row, int subQueryIndex)
{
@ -1295,97 +1426,31 @@ bool buildOpenRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* index_scan, in
JitColumnExpr* last_expr = &index_scan->_search_exprs._exprs[last_expr_index];
JitColumnExpr* before_last_expr = &index_scan->_search_exprs._exprs[last_expr_index - 1];
if (ascending) {
if ((before_last_dim_op == JIT_WOC_LESS_THAN) || (before_last_dim_op == JIT_WOC_LESS_EQUALS)) {
MOT_ASSERT((last_dim_op == JIT_WOC_GREATER_THAN) || (last_dim_op == JIT_WOC_GREATER_EQUALS));
// the before-last operator is an upper bound operator on an ascending open scan so we fill the begin
// key with the last value, and the end key with the before-last value
buildScanExpression(ctx,
last_expr,
max_arg,
JIT_RANGE_ITERATOR_START,
range_scan_type,
outer_row, // lower bound on begin iterator key
subQueryIndex);
buildScanExpression(ctx,
before_last_expr,
max_arg,
JIT_RANGE_ITERATOR_END,
range_scan_type,
outer_row, // upper bound on end iterator key
subQueryIndex);
*begin_range_bound =
(last_dim_op == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
*end_range_bound =
(before_last_dim_op == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
} else {
MOT_ASSERT((last_dim_op == JIT_WOC_LESS_THAN) || (last_dim_op == JIT_WOC_LESS_EQUALS));
// the before-last operator is a lower bound operator on an ascending open scan so we fill the begin key
// with the before-last value, and the end key with the last value
buildScanExpression(ctx,
before_last_expr,
max_arg,
JIT_RANGE_ITERATOR_START,
range_scan_type,
outer_row, // lower bound on begin iterator key
subQueryIndex);
buildScanExpression(ctx,
last_expr,
max_arg,
JIT_RANGE_ITERATOR_END,
range_scan_type,
outer_row, // upper bound on end iterator key
subQueryIndex);
*begin_range_bound =
(before_last_dim_op == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
*end_range_bound =
(last_dim_op == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
}
BuildAscendingOpenRangeScan(ctx,
index_scan,
max_arg,
range_scan_type,
begin_range_bound,
end_range_bound,
outer_row,
subQueryIndex,
before_last_dim_op,
last_dim_op,
before_last_expr,
last_expr);
} else {
if ((before_last_dim_op == JIT_WOC_LESS_THAN) || (before_last_dim_op == JIT_WOC_LESS_EQUALS)) {
MOT_ASSERT((last_dim_op == JIT_WOC_GREATER_THAN) || (last_dim_op == JIT_WOC_GREATER_EQUALS));
// the before-last operator is an upper bound operator on an descending open scan so we fill the begin
// key with the last value, and the end key with the before-last value
buildScanExpression(ctx,
before_last_expr,
max_arg,
JIT_RANGE_ITERATOR_START,
range_scan_type,
outer_row, // upper bound on begin iterator key
subQueryIndex);
buildScanExpression(ctx,
last_expr,
max_arg,
JIT_RANGE_ITERATOR_END,
range_scan_type,
outer_row, // lower bound on end iterator key
subQueryIndex);
*begin_range_bound =
(before_last_dim_op == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
*end_range_bound =
(last_dim_op == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
} else {
MOT_ASSERT((last_dim_op == JIT_WOC_LESS_THAN) || (last_dim_op == JIT_WOC_LESS_EQUALS));
// the before-last operator is a lower bound operator on an descending open scan so we fill the begin
// key with the last value, and the end key with the before-last value
buildScanExpression(ctx,
last_expr,
max_arg,
JIT_RANGE_ITERATOR_START,
range_scan_type,
outer_row, // upper bound on begin iterator key
subQueryIndex);
buildScanExpression(ctx,
before_last_expr,
max_arg,
JIT_RANGE_ITERATOR_END,
range_scan_type,
outer_row, // lower bound on end iterator key
subQueryIndex);
*begin_range_bound =
(last_dim_op == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
*end_range_bound =
(before_last_dim_op == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
}
BuildDescendingOpenRangeScan(ctx,
index_scan,
max_arg,
range_scan_type,
begin_range_bound,
end_range_bound,
outer_row,
subQueryIndex,
before_last_dim_op,
last_dim_op,
before_last_expr,
last_expr);
}
// now fill the rest as usual
@ -1415,9 +1480,9 @@ bool buildOpenRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* index_scan, in
return result;
}
bool buildRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* index_scan, int* max_arg,
static bool buildRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* index_scan, int* max_arg,
JitRangeScanType range_scan_type, JitRangeBoundMode* begin_range_bound, JitRangeBoundMode* end_range_bound,
llvm::Value* outer_row, int subQueryIndex /* = -1 */)
llvm::Value* outer_row, int subQueryIndex = -1)
{
bool result = false;
@ -1450,7 +1515,7 @@ bool buildRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* index_scan, int* m
return result;
}
bool buildPrepareStateScan(JitLlvmCodeGenContext* ctx, JitIndexScan* index_scan, int* max_arg,
static bool buildPrepareStateScan(JitLlvmCodeGenContext* ctx, JitIndexScan* index_scan, int* max_arg,
JitRangeScanType range_scan_type, llvm::Value* outer_row)
{
JitRangeBoundMode begin_range_bound = JIT_RANGE_BOUND_NONE;
@ -1493,7 +1558,7 @@ bool buildPrepareStateScan(JitLlvmCodeGenContext* ctx, JitIndexScan* index_scan,
return true;
}
bool buildPrepareStateRow(JitLlvmCodeGenContext* ctx, MOT::AccessType access_mode, JitIndexScan* index_scan,
static bool buildPrepareStateRow(JitLlvmCodeGenContext* ctx, MOT::AccessType access_mode, JitIndexScan* index_scan,
int* max_arg, JitRangeScanType range_scan_type, llvm::BasicBlock* next_block)
{
llvm::LLVMContext& context = ctx->_code_gen->context();

View File

@ -34,15 +34,15 @@
#include "jit_llvm_util.h"
namespace JitExec {
/** @brief Creates a jitted function for code generation. Builds prototype and entry block. */
void CreateJittedFunction(JitLlvmCodeGenContext* ctx, const char* function_name);
/** @brief Builds a code segment for checking if soft memory limit has been reached. */
void buildIsSoftMemoryLimitReached(JitLlvmCodeGenContext* ctx);
/** @brief Builds a code segment for writing a row. */
void buildWriteRow(JitLlvmCodeGenContext* ctx, llvm::Value* row, bool isPKey, JitLlvmRuntimeCursor* cursor);
/** @brief Creates a jitted function for code generation. Builds prototype and entry block. */
void CreateJittedFunction(JitLlvmCodeGenContext* ctx, const char* function_name);
/** @brief Adds code to reset the number of rows processed. */
void buildResetRowsProcessed(JitLlvmCodeGenContext* ctx);
@ -70,9 +70,6 @@ llvm::Value* buildGetRowFromIterator(JitLlvmCodeGenContext* ctx, llvm::BasicBloc
MOT::AccessType access_mode, JitIndexScanDirection index_scan_direction, JitLlvmRuntimeCursor* cursor,
JitRangeScanType range_scan_type, int subQueryIndex = -1);
bool buildScanExpression(JitLlvmCodeGenContext* ctx, JitColumnExpr* expr, int* max_arg,
JitRangeIteratorType range_itr_type, JitRangeScanType range_scan_type, llvm::Value* outer_row, int subQueryIndex);
bool buildPointScan(JitLlvmCodeGenContext* ctx, JitColumnExprArray* exprArray, int* maxArg,
JitRangeScanType rangeScanType, llvm::Value* outerRow, int exprCount = -1, int subQueryIndex = -1);
@ -82,24 +79,6 @@ bool writeRowColumns(
bool selectRowColumns(JitLlvmCodeGenContext* ctx, llvm::Value* row, JitSelectExprArray* expr_array, int* max_arg,
JitRangeScanType range_scan_type, int subQueryIndex = -1);
bool buildSemiOpenRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* indexScan, int* maxArg,
JitRangeScanType rangeScanType, JitRangeBoundMode* beginRangeBound, JitRangeBoundMode* endRangeBound,
llvm::Value* outerRow, int subQueryIndex);
bool buildOpenRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* index_scan, int* max_arg,
JitRangeScanType range_scan_type, JitRangeBoundMode* begin_range_bound, JitRangeBoundMode* end_range_bound,
llvm::Value* outer_row, int subQueryIndex);
bool buildRangeScan(JitLlvmCodeGenContext* ctx, JitIndexScan* index_scan, int* max_arg,
JitRangeScanType range_scan_type, JitRangeBoundMode* begin_range_bound, JitRangeBoundMode* end_range_bound,
llvm::Value* outer_row, int subQueryIndex = -1);
bool buildPrepareStateScan(JitLlvmCodeGenContext* ctx, JitIndexScan* index_scan, int* max_arg,
JitRangeScanType range_scan_type, llvm::Value* outer_row);
bool buildPrepareStateRow(JitLlvmCodeGenContext* ctx, MOT::AccessType access_mode, JitIndexScan* index_scan,
int* max_arg, JitRangeScanType range_scan_type, llvm::BasicBlock* next_block);
llvm::Value* buildPrepareStateScanRow(JitLlvmCodeGenContext* ctx, JitIndexScan* index_scan,
JitRangeScanType range_scan_type, MOT::AccessType access_mode, int* max_arg, llvm::Value* outer_row,
llvm::BasicBlock* next_block, llvm::BasicBlock** loop_block);
@ -108,14 +87,6 @@ JitLlvmRuntimeCursor buildRangeCursor(JitLlvmCodeGenContext* ctx, JitIndexScan*
JitRangeScanType rangeScanType, JitIndexScanDirection indexScanDirection, llvm::Value* outerRow,
int subQueryIndex = -1);
bool prepareAggregateAvg(JitLlvmCodeGenContext* ctx, const JitAggregate* aggregate);
bool prepareAggregateSum(JitLlvmCodeGenContext* ctx, const JitAggregate* aggregate);
bool prepareAggregateMaxMin(JitLlvmCodeGenContext* ctx, JitAggregate* aggregate);
bool prepareAggregateCount(JitLlvmCodeGenContext* ctx, JitAggregate* aggregate);
bool prepareAggregate(JitLlvmCodeGenContext* ctx, JitAggregate* aggregate);
bool buildAggregateRow(

View File

@ -1092,6 +1092,27 @@ static JitPlan* JitPrepareRangeSelectPlan(Query* query, MOT::Table* table, JoinC
return (JitPlan*)plan;
}
static JitPlan* JitPrepareSimplePointQueryPlan(Query* query, MOT::Table* table)
{
JitPlan* plan = nullptr;
// point query does not expect sort clause or aggregate clause
if (!CheckQueryAttributes(query, false, false, false)) {
MOT_LOG_TRACE("JitPrepareSimplePlan(): Disqualifying point query - Invalid query attributes");
} else if (query->commandType == CMD_UPDATE) {
plan = JitPrepareUpdatePlan(query, table);
} else if (query->commandType == CMD_DELETE) {
plan = JitPrepareDeletePlan(query, table);
} else if (query->commandType == CMD_SELECT) {
plan = JitPrepareSelectPlan(query, table);
} else {
MOT_REPORT_ERROR(MOT_ERROR_INTERNAL,
"Prepare Simple Point-query JIT Plan",
"Unexpected command type: %d",
(int)query->commandType);
}
return plan;
}
static JitPlan* JitPrepareSimplePlan(Query* query)
{
JitPlan* plan = nullptr;
@ -1119,21 +1140,7 @@ static JitPlan* JitPrepareSimplePlan(Query* query)
&count)) { // count all equals operators that relate to the index (disregard other filters)
MOT_LOG_TRACE("JitPrepareSimplePlan(): Failed to determine if this is a point query");
} else if (count == index->GetNumFields()) { // a point query
// point query does not expect sort clause or aggregate clause
if (!CheckQueryAttributes(query, false, false, false)) {
MOT_LOG_TRACE("JitPrepareSimplePlan(): Disqualifying point query - Invalid query attributes");
} else if (query->commandType == CMD_UPDATE) {
plan = JitPrepareUpdatePlan(query, table);
} else if (query->commandType == CMD_DELETE) {
plan = JitPrepareDeletePlan(query, table);
} else if (query->commandType == CMD_SELECT) {
plan = JitPrepareSelectPlan(query, table);
} else {
MOT_REPORT_ERROR(MOT_ERROR_INTERNAL,
"Prepare Simple Point-query JIT Plan",
"Unexpected command type: %d",
(int)query->commandType);
}
plan = JitPrepareSimplePointQueryPlan(query, table);
} else {
if (query->commandType == CMD_UPDATE) {
if (!CheckQueryAttributes(

View File

@ -33,6 +33,408 @@ namespace JitExec {
IMPLEMENT_CLASS_LOGGER(ExpressionVisitor, JitExec)
DECLARE_LOGGER(JitPlanExpr, JitExec)
// Forward declarations
JitExpr* parseExpr(Query* query, Expr* expr, int arg_pos, int depth);
bool ExpressionCounter::OnExpression(
Expr* expr, int columnType, int tableColumnId, MOT::Table* table, JitWhereOperatorClass opClass, bool joinExpr)
{
if (opClass != JIT_WOC_EQUALS) {
MOT_LOG_TRACE("ExpressionCounter::onExpression(): Skipping non-equals operator");
return true; // this is not an error condition
}
++(*_count);
return true;
}
bool ExpressionCollector::OnExpression(
Expr* expr, int columnType, int tableColumnId, MOT::Table* table, JitWhereOperatorClass opClass, bool joinExpr)
{
if (opClass != JIT_WOC_EQUALS) {
MOT_LOG_TRACE("ExpressionCollector::onExpression(): Skipping non-equals operator");
return true; // this is not an error condition
} else if (*_expr_count < _expr_array->_count) {
JitExpr* jit_expr = parseExpr(_query, expr, 0, 0);
if (jit_expr == nullptr) {
MOT_LOG_TRACE("ExpressionCollector::onExpression(): Failed to parse expression %d", *_expr_count);
Cleanup();
return false;
}
_expr_array->_exprs[*_expr_count]._table_column_id = tableColumnId;
_expr_array->_exprs[*_expr_count]._table = table;
_expr_array->_exprs[*_expr_count]._expr = jit_expr;
_expr_array->_exprs[*_expr_count]._column_type = columnType;
_expr_array->_exprs[*_expr_count]._join_expr = joinExpr;
++(*_expr_count);
return true;
} else {
MOT_REPORT_ERROR(MOT_ERROR_INTERNAL, "Prepare JIT Plan", "Exceeded expression count %d", _expr_array->_count);
return false;
}
}
void ExpressionCollector::Cleanup()
{
for (int i = 0; i < *_expr_count; ++i) {
freeExpr(_expr_array->_exprs[*_expr_count]._expr);
}
}
bool RangeScanExpressionCollector::Init()
{
_max_index_ops = _index->GetNumFields() + 1;
size_t alloc_size = sizeof(IndexOpClass) * _max_index_ops;
_index_ops = (IndexOpClass*)MOT::MemSessionAlloc(alloc_size);
if (_index_ops == nullptr) {
MOT_REPORT_ERROR(MOT_ERROR_OOM,
"Prepare JIT Range Scan Plan",
"Failed to allocate %u bytes for %d index operations",
(unsigned)alloc_size,
_max_index_ops);
return false;
}
return true;
}
bool RangeScanExpressionCollector::OnExpression(
Expr* expr, int columnType, int tableColumnId, MOT::Table* table, JitWhereOperatorClass opClass, bool joinExpr)
{
if (_index_op_count >= _index_scan->_search_exprs._count) {
MOT_REPORT_ERROR(MOT_ERROR_INTERNAL,
"Prepare JIT Plan",
"Exceeded expression count %d, while collecting range scan expressions",
_index_scan->_search_exprs._count);
return false;
} else if (_index_op_count == _max_index_ops) {
MOT_REPORT_ERROR(MOT_ERROR_INTERNAL,
"Prepare JIT Plan",
"Exceeded index column count %d, while collecting range scan expressions",
_max_index_ops);
return false;
} else {
JitExpr* jit_expr = parseExpr(_query, expr, 0, 0);
if (jit_expr == nullptr) {
MOT_LOG_TRACE(
"RangeScanExpressionCollector::onExpression(): Failed to parse expression %d", _index_op_count);
Cleanup();
return false;
}
_index_scan->_search_exprs._exprs[_index_op_count]._table_column_id = tableColumnId;
_index_scan->_search_exprs._exprs[_index_op_count]._table = table;
_index_scan->_search_exprs._exprs[_index_op_count]._expr = jit_expr;
_index_scan->_search_exprs._exprs[_index_op_count]._column_type = columnType;
_index_scan->_search_exprs._exprs[_index_op_count]._join_expr = joinExpr;
_index_ops[_index_op_count]._index_column_id = MapTableColumnToIndex(_table, _index, tableColumnId);
_index_ops[_index_op_count]._op_class = opClass;
++_index_op_count;
return true;
}
}
void RangeScanExpressionCollector::EvaluateScanType()
{
_index_scan->_scan_type = JIT_INDEX_SCAN_TYPE_INVALID;
JitIndexScanType scanType = JIT_INDEX_SCAN_TYPE_INVALID;
// if two expressions refer to the same column, we regard one of them as filter
// if an expression is removed from index scan, it will automatically be collected as filter
// (see pkey_exprs argument in @ref visitSearchOpExpression)
if (!RemoveDuplicates()) {
MOT_LOG_TRACE("RangeScanExpressionCollector(): Disqualifying query - failed to remove duplicates");
return;
}
// if no expression was collected, this is an invalid scan (we do not support full scans yet)
int columnCount = _index_op_count;
if (_index_op_count == 0) {
MOT_LOG_TRACE("RangeScanExpressionCollector(): no expression was collected, assuming full scan");
_index_scan->_scan_type = JIT_INDEX_SCAN_FULL;
_index_scan->_column_count = 0;
_index_scan->_search_exprs._count = 0;
return;
}
// first step: sort in-place all collected operators
if (_index_op_count > 1) {
std::sort(&_index_ops[0], &_index_ops[_index_op_count - 1], IndexOpCmp);
}
// now verify all but last two are equals operator
for (int i = 0; i < _index_op_count - 2; ++i) {
if (_index_ops[i]._op_class != JIT_WOC_EQUALS) {
MOT_LOG_TRACE("RangeScanExpressionCollector(): Disqualifying query - encountered non-equals operator "
"at premature index column %d",
_index_ops[i]._index_column_id);
return;
}
}
// now carefully inspect last two operators to determine expected scan type
if (!DetermineScanType(scanType, columnCount)) {
MOT_LOG_TRACE("RangeScanExpressionCollector(): Disqualifying query - invalid open scan specifying last "
"operator as equals, while previous one is not");
return;
}
// final step: verify we have no holes in the columns according to the expected scan type
if (!ScanHasHoles(scanType)) {
_index_scan->_scan_type = scanType;
_index_scan->_column_count = columnCount;
_index_scan->_search_exprs._count = _index_op_count; // update real number of participating expressions
}
}
bool RangeScanExpressionCollector::DetermineScanType(JitIndexScanType& scanType, int& columnCount)
{
// now carefully inspect last two operators to determine expected scan type
if (_index_op_count >= 2) {
if (_index_ops[_index_op_count - 2]._op_class == JIT_WOC_EQUALS) {
if (_index_ops[_index_op_count - 1]._op_class == JIT_WOC_EQUALS) {
if (_index->GetUnique() && (_index_op_count == _index->GetNumFields())) {
scanType = JIT_INDEX_SCAN_POINT;
} else {
scanType = JIT_INDEX_SCAN_CLOSED;
}
} else {
scanType = JIT_INDEX_SCAN_SEMI_OPEN;
_index_scan->_last_dim_op1 = _index_ops[_index_op_count - 1]._op_class;
}
} else if (_index_ops[_index_op_count - 1]._op_class == JIT_WOC_EQUALS) {
MOT_LOG_TRACE("RangeScanExpressionCollector(): Disqualifying query - invalid open scan specifying last "
"operator as equals, while previous one is not");
return false;
} else {
scanType = JIT_INDEX_SCAN_OPEN;
columnCount = _index_op_count - 1;
_index_scan->_last_dim_op1 = _index_ops[_index_op_count - 2]._op_class;
_index_scan->_last_dim_op2 = _index_ops[_index_op_count - 1]._op_class;
}
} else if (_index_op_count == 1) {
if (_index_ops[0]._op_class == JIT_WOC_EQUALS) {
if (_index->GetUnique() && (_index_op_count == _index->GetNumFields())) {
scanType = JIT_INDEX_SCAN_POINT;
} else {
scanType = JIT_INDEX_SCAN_CLOSED;
}
} else {
scanType = JIT_INDEX_SCAN_SEMI_OPEN;
_index_scan->_last_dim_op1 = _index_ops[0]._op_class;
}
}
return true;
}
void RangeScanExpressionCollector::Cleanup()
{
for (int i = 0; i < _index_op_count; ++i) {
freeExpr(_index_scan->_search_exprs._exprs[i]._expr);
}
_index_scan->_search_exprs._count = 0;
_index_op_count = 0;
}
bool RangeScanExpressionCollector::RemoveDuplicates()
{
int result = RemoveSingleDuplicate();
while (result > 0) {
result = RemoveSingleDuplicate();
}
return (result == 0);
}
int RangeScanExpressionCollector::RemoveSingleDuplicate()
{
// scan and stop after first removal
for (int i = 1; i < _index_op_count; ++i) {
for (int j = 0; j < i; ++j) {
if (_index_ops[i]._index_column_id == _index_ops[j]._index_column_id) {
MOT_LOG_TRACE("RangeScanExpressionCollector(): Found duplicate column ref at %d and %d", i, j);
if ((_index_ops[i]._op_class != JIT_WOC_EQUALS) && (_index_ops[j]._op_class != JIT_WOC_EQUALS)) {
MOT_LOG_DEBUG("RangeScanExpressionCollector(): Skipping probable open scan operators while "
"removing duplicates");
continue;
}
// now we need to decide which one to remove,
// our consideration is to keep equals operators and then join expressions
int victim = -1;
if ((_index_ops[i]._op_class == JIT_WOC_EQUALS) || (_index_ops[j]._op_class == JIT_WOC_EQUALS)) {
// we keep the equals operator for index scan, and leave the other as a filter
MOT_LOG_TRACE("RangeScanExpressionCollector(): Rejecting query due to duplicate index column "
"reference, one with EQUALS, one without");
if (_index_ops[i]._op_class != JIT_WOC_EQUALS) {
victim = i;
} else {
victim = j;
}
} else if (_index_scan->_search_exprs._exprs[i]._join_expr &&
!_index_scan->_search_exprs._exprs[j]._join_expr) {
victim = j;
} else if (!_index_scan->_search_exprs._exprs[i]._join_expr &&
_index_scan->_search_exprs._exprs[j]._join_expr) {
victim = i;
} else if (_index_scan->_search_exprs._exprs[i]._join_expr &&
_index_scan->_search_exprs._exprs[j]._join_expr) {
// both are join expressions, this is unacceptable, so we abort
MOT_LOG_TRACE("RangeScanExpressionCollector(): Disqualifying query - duplicate JOIN expression "
"on index %s in index column %d",
_index->GetName().c_str(),
_index_ops[i]._index_column_id);
return -1; // signal error
} else {
// both items are not join expressions, both refer to index columns, so we arbitrarily drop one
// of them
victim = j;
}
// switch victim with last item
if (_index_scan->_search_exprs._exprs[victim]._expr != nullptr) {
freeExpr(_index_scan->_search_exprs._exprs[victim]._expr);
}
_index_scan->_search_exprs._exprs[victim] = _index_scan->_search_exprs._exprs[_index_op_count - 1];
--_index_op_count;
--_index_scan->_search_exprs._count;
return 1;
}
}
}
// nothing changed
return 0;
}
int RangeScanExpressionCollector::IntCmp(int lhs, int rhs)
{
int result = 0;
if (lhs < rhs) {
result = -1;
} else if (lhs > rhs) {
result = 1;
}
return result;
}
bool RangeScanExpressionCollector::IndexOpCmp(const IndexOpClass& lhs, const IndexOpClass& rhs)
{
int result = IntCmp(lhs._index_column_id, rhs._index_column_id);
if (result == 0) {
// make sure equals appears before other operators in case column id is equal
result = IntCmp(lhs._op_class, rhs._op_class);
}
return result < 0;
}
bool RangeScanExpressionCollector::ScanHasHoles(JitIndexScanType scan_type) const
{
MOT_ASSERT(_index_op_count >= 1);
// closed and semi-open scans expect to see all columns in increasing order beginning from zero
int column_count = _index_op_count - 1;
if (scan_type != JIT_INDEX_SCAN_OPEN) {
column_count = _index_op_count;
}
// full prefix must begin with index column zero
if (_index_ops[0]._index_column_id != 0) {
MOT_LOG_TRACE(
"RangeScanExpressionCollector(): Disqualifying query - Index scan does not begin with index column 0");
return true;
}
// check each operation relates to the next index column
for (int i = 1; i < column_count; ++i) {
int prev_column = _index_ops[i - 1]._index_column_id;
int next_column = _index_ops[i]._index_column_id;
if (next_column != (prev_column + 1)) {
MOT_LOG_TRACE("RangeScanExpressionCollector(): Disqualifying query - found hole in closed or semi-open "
"range scan from index column %d to %d",
prev_column,
next_column);
return true;
}
}
// in open scan we expect two last columns to be equal
if (scan_type == JIT_INDEX_SCAN_OPEN) {
MOT_ASSERT(_index_op_count >= 2);
int prev_column = _index_ops[_index_op_count - 2]._index_column_id;
int next_column = _index_ops[_index_op_count - 1]._index_column_id;
if (next_column != prev_column) {
MOT_LOG_TRACE("RangeScanExpressionCollector(): Disqualifying query - last two columns in open index "
"scan are not equals: %d, %d",
prev_column,
next_column);
return true;
}
}
return false;
}
bool FilterCollector::OnFilterExpr(int filterOp, int filterOpFuncId, Expr* lhs, Expr* rhs)
{
JitExpr* jit_lhs = parseExpr(_query, lhs, 0, 0);
if (jit_lhs == nullptr) {
MOT_LOG_TRACE(
"FilterCollector::onFilterExpr(): Failed to parse LHS expression in filter expression %d", *_filter_count);
Cleanup();
return false;
}
JitExpr* jit_rhs = parseExpr(_query, rhs, 1, 0);
if (jit_rhs == nullptr) {
MOT_LOG_TRACE(
"FilterCollector::onFilterExpr(): Failed to parse RHS expression in filter expression %d", *_filter_count);
freeExpr(jit_lhs);
Cleanup();
return false;
}
if (*_filter_count < _filter_array->_filter_count) {
_filter_array->_scan_filters[*_filter_count]._filter_op = filterOp;
_filter_array->_scan_filters[*_filter_count]._filter_op_funcid = filterOpFuncId;
_filter_array->_scan_filters[*_filter_count]._lhs_operand = jit_lhs;
_filter_array->_scan_filters[*_filter_count]._rhs_operand = jit_rhs;
++(*_filter_count);
return true;
} else {
MOT_REPORT_ERROR(
MOT_ERROR_INTERNAL, "Prepare JIT Plan", "Exceeded filter count %d", _filter_array->_filter_count);
return false;
}
}
void FilterCollector::Cleanup()
{
for (int i = 0; i < *_filter_count; ++i) {
freeExpr(_filter_array->_scan_filters[*_filter_count]._lhs_operand);
freeExpr(_filter_array->_scan_filters[*_filter_count]._rhs_operand);
}
}
bool SubLinkFetcher::OnExpression(
Expr* expr, int columnType, int tableColumnId, MOT::Table* table, JitWhereOperatorClass opClass, bool joinExpr)
{
if (opClass != JIT_WOC_EQUALS) {
MOT_LOG_TRACE("SubLinkFetcher::onExpression(): Skipping non-equals operator");
return true; // this is not an error condition
}
if (expr->type == T_SubLink) {
if (++_count > 1) {
MOT_LOG_TRACE("SubLinkFetcher::onExpression(): encountered more than one sub-link");
return false; // already have a sub-link, we disqualify query
}
SubLink* subLink = (SubLink*)expr;
if (subLink->subLinkType != EXPR_SUBLINK) {
MOT_LOG_TRACE("SubLinkFetcher::onExpression(): unsupported sub-link type");
return false; // unsupported sub-link type, we disqualify query
}
if (subLink->testexpr != nullptr) {
MOT_LOG_TRACE("SubLinkFetcher::onExpression(): unsupported sub-link outer test expression");
return false; // unsupported sub-link type, we disqualify query
}
MOT_ASSERT(_subLink == nullptr);
_subLink = subLink;
}
return true;
}
MOT::Table* getRealTable(const Query* query, int table_ref_id, int column_id)
{
MOT::Table* table = nullptr;
@ -700,27 +1102,9 @@ static TableExprClass classifyTableExpr(Query* query, MOT::Table* table, MOT::In
}
}
static bool visitSearchOpExpression(Query* query, MOT::Table* table, MOT::Index* index, OpExpr* op_expr,
bool include_pkey, ExpressionVisitor* visitor, bool include_join_exprs, JitColumnExprArray* pkey_exprs)
static bool VisitSearchOpExpressionFilters(Query* query, MOT::Table* table, MOT::Index* index, OpExpr* op_expr,
ExpressionVisitor* visitor, JitColumnExprArray* pkey_exprs)
{
int arg_count = list_length(op_expr->args);
if (arg_count != 2) {
MOT_LOG_TRACE("visitSearchOpExpression(): Invalid OpExpr in WHERE clause having %d arguments", arg_count);
return false;
}
if (!IsWhereOperatorSupported(op_expr->opno)) {
MOT_LOG_TRACE("visitSearchOpExpression(): Unsupported operator %d", op_expr->opno);
return false;
}
int nargs = (int)list_length(op_expr->args);
if (nargs != 2) {
MOT_LOG_TRACE(
"visitSearchOpExpression(): Unexpected number of arguments %d in operator %d", nargs, op_expr->opno);
return false;
}
// when collecting filters we need to visit only those expressions not visited during pkey collection, but still
// refer only to this table in addition, the where operator class is not EQUALS, then this is definitely a filter
// (and nothing else than that), but we still need to verify that it belongs to this table/index
@ -769,12 +1153,52 @@ static bool visitSearchOpExpression(Query* query, MOT::Table* table, MOT::Index*
}
}
return true;
}
static bool SkipKeyColumn(MOT::Table* table, MOT::Index* index, bool include_pkey, int colid, int index_colid)
{
if (include_pkey && (index_colid < 0)) { // ordered to include only pkey and column is non-pkey so skip
MOT_LOG_TRACE("visitSearchOpExpression(): Skipping non-index key column %d %s (ordered to include "
"only index key columns)",
colid,
table->GetFieldName(colid));
return true; // not an error, but we need to stop (this is a filter expression)
} else if (!include_pkey && (index_colid >= 0)) { // ordered to include only non-pkey and column is pkey so skip
MOT_LOG_TRACE("visitSearchOpExpression(): Skipping index key column %d, table column %d %s "
"(ordered to include only non-index key columns)",
index_colid,
colid,
table->GetFieldName(colid));
return true; // not an error, but we need to stop (this is a primary key expression)
}
return false;
}
static bool visitSearchOpExpression(Query* query, MOT::Table* table, MOT::Index* index, OpExpr* op_expr,
bool include_pkey, ExpressionVisitor* visitor, bool include_join_exprs, JitColumnExprArray* pkey_exprs)
{
int arg_count = list_length(op_expr->args);
if (arg_count != 2) {
MOT_LOG_TRACE("visitSearchOpExpression(): Invalid OpExpr in WHERE clause having %d arguments", arg_count);
return false;
}
if (!IsWhereOperatorSupported(op_expr->opno)) {
MOT_LOG_TRACE("visitSearchOpExpression(): Unsupported operator %d", op_expr->opno);
return false;
}
if (!VisitSearchOpExpressionFilters(query, table, index, op_expr, visitor, pkey_exprs)) {
MOT_LOG_TRACE("visitSearchOpExpression(): Encountered error while classifying table expressions for filters");
return false;
}
ListCell* lc1 = nullptr;
int colid = -1;
int vartype = -1;
int index_colid = -1;
Expr* expr = nullptr;
bool join_expr = false;
foreach (lc1, op_expr->args) {
@ -820,20 +1244,8 @@ static bool visitSearchOpExpression(Query* query, MOT::Table* table, MOT::Index*
index_colid = MapTableColumnToIndex(table, index, colid);
MOT_LOG_TRACE(
"visitSearchOpExpression(): Found table column id %d and index column id %d", colid, index_colid);
if (include_pkey && (index_colid < 0)) { // ordered to include only pkey and column is non-pkey so skip
MOT_LOG_TRACE("visitSearchOpExpression(): Skipping non-index key column %d %s (ordered to include "
"only index key columns)",
colid,
table->GetFieldName(colid));
return true; // not an error, but we need to stop (this is a filter expression)
} else if (!include_pkey &&
(index_colid >= 0)) { // ordered to include only non-pkey and column is pkey so skip
MOT_LOG_TRACE("visitSearchOpExpression(): Skipping index key column %d, table column %d %s "
"(ordered to include only non-index key columns)",
index_colid,
colid,
table->GetFieldName(colid));
return true; // not an error, but we need to stop (this is a primary key expression)
if (SkipKeyColumn(table, index, include_pkey, colid, index_colid)) {
return true; // not an error, but we need to stop (this is a filter or primary key expression)
}
vartype = var->vartype;
}

View File

@ -464,22 +464,6 @@ struct JitJoinExpr {
int _inner_column_id;
};
// Forward declarations
class ExpressionVisitor;
MOT::Table* getRealTable(const Query* query, int table_ref_id, int column_id);
int getRealColumnId(const Query* query, int table_ref_id, int column_id, const MOT::Table* table);
JitExpr* parseExpr(Query* query, Expr* expr, int arg_pos, int depth);
void freeExpr(JitExpr* expr);
bool visitSearchExpressions(Query* query, MOT::Table* table, MOT::Index* index, Expr* expr, bool include_pkey,
ExpressionVisitor* visitor, bool include_join_exprs, JitColumnExprArray* pkey_exprs = nullptr);
bool getSearchExpressions(Query* query, MOT::Table* table, MOT::Index* index, bool include_pkey,
JitColumnExprArray* search_exprs, int* count, bool use_join_clause);
bool getRangeSearchExpressions(
Query* query, MOT::Table* table, MOT::Index* index, JitIndexScan* index_scan, JoinClauseType join_clause_type);
bool getTargetExpressions(Query* query, JitColumnExprArray* target_exprs);
bool getSelectExpressions(Query* query, JitSelectExprArray* select_exprs);
// Parent class for all expression visitors
class ExpressionVisitor {
public:
@ -516,15 +500,7 @@ public:
}
bool OnExpression(Expr* expr, int columnType, int tableColumnId, MOT::Table* table, JitWhereOperatorClass opClass,
bool joinExpr) final
{
if (opClass != JIT_WOC_EQUALS) {
MOT_LOG_TRACE("ExpressionCounter::onExpression(): Skipping non-equals operator");
return true; // this is not an error condition
}
++(*_count);
return true;
}
bool joinExpr) final;
private:
int* _count;
@ -545,43 +521,14 @@ public:
}
bool OnExpression(Expr* expr, int columnType, int tableColumnId, MOT::Table* table, JitWhereOperatorClass opClass,
bool joinExpr) final
{
if (opClass != JIT_WOC_EQUALS) {
MOT_LOG_TRACE("ExpressionCollector::onExpression(): Skipping non-equals operator");
return true; // this is not an error condition
} else if (*_expr_count < _expr_array->_count) {
JitExpr* jit_expr = parseExpr(_query, expr, 0, 0);
if (jit_expr == nullptr) {
MOT_LOG_TRACE("ExpressionCollector::onExpression(): Failed to parse expression %d", *_expr_count);
Cleanup();
return false;
}
_expr_array->_exprs[*_expr_count]._table_column_id = tableColumnId;
_expr_array->_exprs[*_expr_count]._table = table;
_expr_array->_exprs[*_expr_count]._expr = jit_expr;
_expr_array->_exprs[*_expr_count]._column_type = columnType;
_expr_array->_exprs[*_expr_count]._join_expr = joinExpr;
++(*_expr_count);
return true;
} else {
MOT_REPORT_ERROR(
MOT_ERROR_INTERNAL, "Prepare JIT Plan", "Exceeded expression count %d", _expr_array->_count);
return false;
}
}
bool joinExpr) final;
private:
Query* _query;
JitColumnExprArray* _expr_array;
int* _expr_count;
void Cleanup()
{
for (int i = 0; i < *_expr_count; ++i) {
freeExpr(_expr_array->_exprs[*_expr_count]._expr);
}
}
void Cleanup();
};
// Expression visitor that collects expressions for possibly open range scans
@ -610,140 +557,12 @@ public:
_index_scan = nullptr;
}
inline bool Init()
{
bool result = false;
_max_index_ops = _index->GetNumFields() + 1;
size_t alloc_size = sizeof(IndexOpClass) * _max_index_ops;
_index_ops = (IndexOpClass*)MOT::MemSessionAlloc(alloc_size);
if (_index_ops == nullptr) {
MOT_REPORT_ERROR(MOT_ERROR_OOM,
"Prepare JIT Range Scan Plan",
"Failed to allocate %u bytes for %d index operations",
(unsigned)alloc_size,
_max_index_ops);
} else {
result = true;
}
return result;
}
bool Init();
bool OnExpression(Expr* expr, int columnType, int tableColumnId, MOT::Table* table, JitWhereOperatorClass opClass,
bool joinExpr) final
{
if (_index_op_count >= _index_scan->_search_exprs._count) {
MOT_REPORT_ERROR(MOT_ERROR_INTERNAL,
"Prepare JIT Plan",
"Exceeded expression count %d, while collecting range scan expressions",
_index_scan->_search_exprs._count);
return false;
} else if (_index_op_count == _max_index_ops) {
MOT_REPORT_ERROR(MOT_ERROR_INTERNAL,
"Prepare JIT Plan",
"Exceeded index column count %d, while collecting range scan expressions",
_max_index_ops);
return false;
} else {
JitExpr* jit_expr = parseExpr(_query, expr, 0, 0);
if (jit_expr == nullptr) {
MOT_LOG_TRACE(
"RangeScanExpressionCollector::onExpression(): Failed to parse expression %d", _index_op_count);
Cleanup();
return false;
}
_index_scan->_search_exprs._exprs[_index_op_count]._table_column_id = tableColumnId;
_index_scan->_search_exprs._exprs[_index_op_count]._table = table;
_index_scan->_search_exprs._exprs[_index_op_count]._expr = jit_expr;
_index_scan->_search_exprs._exprs[_index_op_count]._column_type = columnType;
_index_scan->_search_exprs._exprs[_index_op_count]._join_expr = joinExpr;
_index_ops[_index_op_count]._index_column_id = MapTableColumnToIndex(_table, _index, tableColumnId);
_index_ops[_index_op_count]._op_class = opClass;
++_index_op_count;
return true;
}
}
bool joinExpr) final;
void EvaluateScanType()
{
_index_scan->_scan_type = JIT_INDEX_SCAN_TYPE_INVALID;
JitIndexScanType scan_type = JIT_INDEX_SCAN_TYPE_INVALID;
// if two expressions refer to the same column, we regard one of them as filter
// if an expression is removed from index scan, it will automatically be collected as filter
// (see pkey_exprs argument in @ref visitSearchOpExpression)
if (!RemoveDuplicates()) {
MOT_LOG_TRACE("RangeScanExpressionCollector(): Disqualifying query - failed to remove duplicates");
return;
}
// if no expression was collected, this is an invalid scan (we do not support full scans yet)
int column_count = _index_op_count;
if (_index_op_count == 0) {
MOT_LOG_TRACE("RangeScanExpressionCollector(): no expression was collected, assuming full scan");
_index_scan->_scan_type = JIT_INDEX_SCAN_FULL;
_index_scan->_column_count = 0;
_index_scan->_search_exprs._count = 0;
return;
}
// first step: sort in-place all collected operators
if (_index_op_count > 1) {
std::sort(&_index_ops[0], &_index_ops[_index_op_count - 1], IndexOpCmp);
}
// now verify all but last two are equals operator
for (int i = 0; i < _index_op_count - 2; ++i) {
if (_index_ops[i]._op_class != JIT_WOC_EQUALS) {
MOT_LOG_TRACE("RangeScanExpressionCollector(): Disqualifying query - encountered non-equals operator "
"at premature index column %d",
_index_ops[i]._index_column_id);
return;
}
}
// now carefully inspect last two operators to determine expected scan type
if (_index_op_count >= 2) {
if (_index_ops[_index_op_count - 2]._op_class == JIT_WOC_EQUALS) {
if (_index_ops[_index_op_count - 1]._op_class == JIT_WOC_EQUALS) {
if (_index->GetUnique() && (_index_op_count == _index->GetNumFields())) {
scan_type = JIT_INDEX_SCAN_POINT;
} else {
scan_type = JIT_INDEX_SCAN_CLOSED;
}
} else {
scan_type = JIT_INDEX_SCAN_SEMI_OPEN;
_index_scan->_last_dim_op1 = _index_ops[_index_op_count - 1]._op_class;
}
} else if (_index_ops[_index_op_count - 1]._op_class == JIT_WOC_EQUALS) {
MOT_LOG_TRACE("RangeScanExpressionCollector(): Disqualifying query - invalid open scan specifying last "
"operator as equals, while previous one is not");
return;
} else {
scan_type = JIT_INDEX_SCAN_OPEN;
column_count = _index_op_count - 1;
_index_scan->_last_dim_op1 = _index_ops[_index_op_count - 2]._op_class;
_index_scan->_last_dim_op2 = _index_ops[_index_op_count - 1]._op_class;
}
} else if (_index_op_count == 1) {
if (_index_ops[0]._op_class == JIT_WOC_EQUALS) {
if (_index->GetUnique() && (_index_op_count == _index->GetNumFields())) {
scan_type = JIT_INDEX_SCAN_POINT;
} else {
scan_type = JIT_INDEX_SCAN_CLOSED;
}
} else {
scan_type = JIT_INDEX_SCAN_SEMI_OPEN;
_index_scan->_last_dim_op1 = _index_ops[0]._op_class;
}
}
// final step: verify we have no holes in the columns according to the expected scan type
if (!ScanHasHoles(scan_type)) {
_index_scan->_scan_type = scan_type;
_index_scan->_column_count = column_count;
_index_scan->_search_exprs._count = _index_op_count; // update real number of participating expressions
}
}
void EvaluateScanType();
private:
Query* _query;
@ -759,150 +578,19 @@ private:
int _index_op_count;
JitIndexScan* _index_scan;
void Cleanup()
{
for (int i = 0; i < _index_op_count; ++i) {
freeExpr(_index_scan->_search_exprs._exprs[i]._expr);
}
_index_scan->_search_exprs._count = 0;
_index_op_count = 0;
}
bool DetermineScanType(JitIndexScanType& scanType, int& columnCount);
bool RemoveDuplicates()
{
int result = RemoveSingleDuplicate();
while (result > 0) {
result = RemoveSingleDuplicate();
}
return (result == 0);
}
void Cleanup();
int RemoveSingleDuplicate()
{
// scan and stop after first removal
for (int i = 1; i < _index_op_count; ++i) {
for (int j = 0; j < i; ++j) {
if (_index_ops[i]._index_column_id == _index_ops[j]._index_column_id) {
MOT_LOG_TRACE("RangeScanExpressionCollector(): Found duplicate column ref at %d and %d", i, j);
if ((_index_ops[i]._op_class != JIT_WOC_EQUALS) && (_index_ops[j]._op_class != JIT_WOC_EQUALS)) {
MOT_LOG_DEBUG("RangeScanExpressionCollector(): Skipping probable open scan operators while "
"removing duplicates");
continue;
}
// now we need to decide which one to remove,
// our consideration is to keep equals operators and then join expressions
int victim = -1;
if ((_index_ops[i]._op_class == JIT_WOC_EQUALS) || (_index_ops[j]._op_class == JIT_WOC_EQUALS)) {
// we keep the equals operator for index scan, and leave the other as a filter
MOT_LOG_TRACE("RangeScanExpressionCollector(): Rejecting query due to duplicate index column "
"reference, one with EQUALS, one without");
if (_index_ops[i]._op_class != JIT_WOC_EQUALS) {
victim = i;
} else {
victim = j;
}
} else if (_index_scan->_search_exprs._exprs[i]._join_expr &&
!_index_scan->_search_exprs._exprs[j]._join_expr) {
victim = j;
} else if (!_index_scan->_search_exprs._exprs[i]._join_expr &&
_index_scan->_search_exprs._exprs[j]._join_expr) {
victim = i;
} else if (_index_scan->_search_exprs._exprs[i]._join_expr &&
_index_scan->_search_exprs._exprs[j]._join_expr) {
// both are join expressions, this is unacceptable, so we abort
MOT_LOG_TRACE("RangeScanExpressionCollector(): Disqualifying query - duplicate JOIN expression "
"on index %s in index column %d",
_index->GetName().c_str(),
_index_ops[i]._index_column_id);
return -1; // signal error
} else {
// both items are not join expressions, both refer to index columns, so we arbitrarily drop one
// of them
victim = j;
}
// switch victim with last item
if (_index_scan->_search_exprs._exprs[victim]._expr != nullptr) {
freeExpr(_index_scan->_search_exprs._exprs[victim]._expr);
}
_index_scan->_search_exprs._exprs[victim] = _index_scan->_search_exprs._exprs[_index_op_count - 1];
--_index_op_count;
--_index_scan->_search_exprs._count;
return 1;
}
}
}
bool RemoveDuplicates();
// nothing changed
return 0;
}
int RemoveSingleDuplicate();
static int IntCmp(int lhs, int rhs)
{
int result = 0;
if (lhs < rhs) {
result = -1;
} else if (lhs > rhs) {
result = 1;
}
return result;
}
static int IntCmp(int lhs, int rhs);
static bool IndexOpCmp(const IndexOpClass& lhs, const IndexOpClass& rhs)
{
int result = IntCmp(lhs._index_column_id, rhs._index_column_id);
if (result == 0) {
// make sure equals appears before other operators in case column id is equal
result = IntCmp(lhs._op_class, rhs._op_class);
}
return result < 0;
}
static bool IndexOpCmp(const IndexOpClass& lhs, const IndexOpClass& rhs);
bool ScanHasHoles(JitIndexScanType scan_type) const
{
MOT_ASSERT(_index_op_count >= 1);
// closed and semi-open scans expect to see all columns in increasing order beginning from zero
int column_count = _index_op_count - 1;
if (scan_type != JIT_INDEX_SCAN_OPEN) {
column_count = _index_op_count;
}
// full prefix must begin with index column zero
if (_index_ops[0]._index_column_id != 0) {
MOT_LOG_TRACE(
"RangeScanExpressionCollector(): Disqualifying query - Index scan does not begin with index column 0");
return true;
}
// check each operation relates to the next index column
for (int i = 1; i < column_count; ++i) {
int prev_column = _index_ops[i - 1]._index_column_id;
int next_column = _index_ops[i]._index_column_id;
if (next_column != (prev_column + 1)) {
MOT_LOG_TRACE("RangeScanExpressionCollector(): Disqualifying query - found hole in closed or semi-open "
"range scan from index column %d to %d",
prev_column,
next_column);
return true;
}
}
// in open scan we expect two last columns to be equal
if (scan_type == JIT_INDEX_SCAN_OPEN) {
MOT_ASSERT(_index_op_count >= 2);
int prev_column = _index_ops[_index_op_count - 2]._index_column_id;
int next_column = _index_ops[_index_op_count - 1]._index_column_id;
if (next_column != prev_column) {
MOT_LOG_TRACE("RangeScanExpressionCollector(): Disqualifying query - last two columns in open index "
"scan are not equals: %d, %d",
prev_column,
next_column);
return true;
}
}
return false;
}
bool ScanHasHoles(JitIndexScanType scan_type) const;
};
// Expression visitor that counts number of filters
@ -940,49 +628,14 @@ public:
_filter_count = nullptr;
}
bool OnFilterExpr(int filterOp, int filterOpFuncId, Expr* lhs, Expr* rhs) final
{
JitExpr* jit_lhs = parseExpr(_query, lhs, 0, 0);
if (jit_lhs == nullptr) {
MOT_LOG_TRACE("FilterCollector::onFilterExpr(): Failed to parse LHS expression in filter expression %d",
*_filter_count);
Cleanup();
return false;
}
JitExpr* jit_rhs = parseExpr(_query, rhs, 1, 0);
if (jit_rhs == nullptr) {
MOT_LOG_TRACE("FilterCollector::onFilterExpr(): Failed to parse RHS expression in filter expression %d",
*_filter_count);
freeExpr(jit_lhs);
Cleanup();
return false;
}
if (*_filter_count < _filter_array->_filter_count) {
_filter_array->_scan_filters[*_filter_count]._filter_op = filterOp;
_filter_array->_scan_filters[*_filter_count]._filter_op_funcid = filterOpFuncId;
_filter_array->_scan_filters[*_filter_count]._lhs_operand = jit_lhs;
_filter_array->_scan_filters[*_filter_count]._rhs_operand = jit_rhs;
++(*_filter_count);
return true;
} else {
MOT_REPORT_ERROR(
MOT_ERROR_INTERNAL, "Prepare JIT Plan", "Exceeded filter count %d", _filter_array->_filter_count);
return false;
}
}
bool OnFilterExpr(int filterOp, int filterOpFuncId, Expr* lhs, Expr* rhs) final;
private:
Query* _query;
JitFilterArray* _filter_array;
int* _filter_count;
void Cleanup()
{
for (int i = 0; i < *_filter_count; ++i) {
freeExpr(_filter_array->_scan_filters[*_filter_count]._lhs_operand);
freeExpr(_filter_array->_scan_filters[*_filter_count]._rhs_operand);
}
}
void Cleanup();
};
// Expression visitor that fetches a single sub-link
@ -1000,36 +653,24 @@ public:
}
bool OnExpression(Expr* expr, int columnType, int tableColumnId, MOT::Table* table, JitWhereOperatorClass opClass,
bool joinExpr) final
{
if (opClass != JIT_WOC_EQUALS) {
MOT_LOG_TRACE("SubLinkFetcher::onExpression(): Skipping non-equals operator");
return true; // this is not an error condition
}
if (expr->type == T_SubLink) {
if (++_count > 1) {
MOT_LOG_TRACE("SubLinkFetcher::onExpression(): encountered more than one sub-link");
return false; // already have a sub-link, we disqualify query
}
SubLink* subLink = (SubLink*)expr;
if (subLink->subLinkType != EXPR_SUBLINK) {
MOT_LOG_TRACE("SubLinkFetcher::onExpression(): unsupported sub-link type");
return false; // unsupported sub-link type, we disqualify query
}
if (subLink->testexpr != nullptr) {
MOT_LOG_TRACE("SubLinkFetcher::onExpression(): unsupported sub-link outer test expression");
return false; // unsupported sub-link type, we disqualify query
}
MOT_ASSERT(_subLink == nullptr);
_subLink = subLink;
}
return true;
}
bool joinExpr) final;
private:
SubLink* _subLink;
int _count;
};
MOT::Table* getRealTable(const Query* query, int table_ref_id, int column_id);
int getRealColumnId(const Query* query, int table_ref_id, int column_id, const MOT::Table* table);
void freeExpr(JitExpr* expr);
bool visitSearchExpressions(Query* query, MOT::Table* table, MOT::Index* index, Expr* expr, bool include_pkey,
ExpressionVisitor* visitor, bool include_join_exprs, JitColumnExprArray* pkey_exprs = nullptr);
bool getSearchExpressions(Query* query, MOT::Table* table, MOT::Index* index, bool include_pkey,
JitColumnExprArray* search_exprs, int* count, bool use_join_clause);
bool getRangeSearchExpressions(
Query* query, MOT::Table* table, MOT::Index* index, JitIndexScan* index_scan, JoinClauseType join_clause_type);
bool getTargetExpressions(Query* query, JitColumnExprArray* target_exprs);
bool getSelectExpressions(Query* query, JitSelectExprArray* select_exprs);
} // namespace JitExec
#endif /* JIT_PLAN_EXPR_H */

View File

@ -1100,6 +1100,52 @@ static bool buildClosedRangeScan(JitTvmCodeGenContext* ctx, JitIndexScan* indexS
return result;
}
static void BuildAscendingSemiOpenRangeScan(JitTvmCodeGenContext* ctx, JitIndexScan* indexScan, int* maxArg,
JitRangeScanType rangeScanType, JitRangeBoundMode* beginRangeBound, JitRangeBoundMode* endRangeBound,
Instruction* outerRow, int subQueryIndex, int offset, int size, JitColumnExpr* lastExpr)
{
if ((indexScan->_last_dim_op1 == JIT_WOC_LESS_THAN) || (indexScan->_last_dim_op1 == JIT_WOC_LESS_EQUALS)) {
// this is an upper bound operator on an ascending semi-open scan so we fill the begin key with zeros,
// and the end key with the value
AddFillKeyPattern(ctx, 0x00, offset, size, JIT_RANGE_ITERATOR_START, rangeScanType, subQueryIndex);
buildScanExpression(ctx, lastExpr, maxArg, JIT_RANGE_ITERATOR_END, rangeScanType, outerRow, subQueryIndex);
*beginRangeBound = JIT_RANGE_BOUND_INCLUDE;
*endRangeBound = (indexScan->_last_dim_op1 == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE
: JIT_RANGE_BOUND_EXCLUDE;
} else {
// this is a lower bound operator on an ascending semi-open scan so we fill the begin key with the
// value, and the end key with 0xFF
buildScanExpression(ctx, lastExpr, maxArg, JIT_RANGE_ITERATOR_START, rangeScanType, outerRow, subQueryIndex);
AddFillKeyPattern(ctx, 0xFF, offset, size, JIT_RANGE_ITERATOR_END, rangeScanType, subQueryIndex);
*beginRangeBound = (indexScan->_last_dim_op1 == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE
: JIT_RANGE_BOUND_EXCLUDE;
*endRangeBound = JIT_RANGE_BOUND_INCLUDE;
}
}
static void BuildDescendingSemiOpenRangeScan(JitTvmCodeGenContext* ctx, JitIndexScan* indexScan, int* maxArg,
JitRangeScanType rangeScanType, JitRangeBoundMode* beginRangeBound, JitRangeBoundMode* endRangeBound,
Instruction* outerRow, int subQueryIndex, int offset, int size, JitColumnExpr* lastExpr)
{
if ((indexScan->_last_dim_op1 == JIT_WOC_LESS_THAN) || (indexScan->_last_dim_op1 == JIT_WOC_LESS_EQUALS)) {
// this is an upper bound operator on a descending semi-open scan so we fill the begin key with value,
// and the end key with zeroes
buildScanExpression(ctx, lastExpr, maxArg, JIT_RANGE_ITERATOR_START, rangeScanType, outerRow, subQueryIndex);
AddFillKeyPattern(ctx, 0x00, offset, size, JIT_RANGE_ITERATOR_END, rangeScanType, subQueryIndex);
*beginRangeBound = (indexScan->_last_dim_op1 == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE
: JIT_RANGE_BOUND_EXCLUDE;
*endRangeBound = JIT_RANGE_BOUND_INCLUDE;
} else {
// this is a lower bound operator on a descending semi-open scan so we fill the begin key with 0xFF, and
// the end key with the value
AddFillKeyPattern(ctx, 0xFF, offset, size, JIT_RANGE_ITERATOR_START, rangeScanType, subQueryIndex);
buildScanExpression(ctx, lastExpr, maxArg, JIT_RANGE_ITERATOR_END, rangeScanType, outerRow, subQueryIndex);
*beginRangeBound = JIT_RANGE_BOUND_INCLUDE;
*endRangeBound = (indexScan->_last_dim_op1 == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE
: JIT_RANGE_BOUND_EXCLUDE;
}
}
static bool buildSemiOpenRangeScan(JitTvmCodeGenContext* ctx, JitIndexScan* index_scan, int* max_arg,
JitRangeScanType range_scan_type, JitRangeBoundMode* begin_range_bound, JitRangeBoundMode* end_range_bound,
Instruction* outer_row, int subQueryIndex)
@ -1145,47 +1191,29 @@ static bool buildSemiOpenRangeScan(JitTvmCodeGenContext* ctx, JitIndexScan* inde
int last_expr_index = index_scan->_search_exprs._count - 1;
JitColumnExpr* last_expr = &index_scan->_search_exprs._exprs[last_expr_index];
if (ascending) {
if ((index_scan->_last_dim_op1 == JIT_WOC_LESS_THAN) ||
(index_scan->_last_dim_op1 == JIT_WOC_LESS_EQUALS)) {
// this is an upper bound operator on an ascending semi-open scan so we fill the begin key with zeros,
// and the end key with the value
AddFillKeyPattern(ctx, 0x00, offset, size, JIT_RANGE_ITERATOR_START, range_scan_type, subQueryIndex);
buildScanExpression(
ctx, last_expr, max_arg, JIT_RANGE_ITERATOR_END, range_scan_type, outer_row, subQueryIndex);
*begin_range_bound = JIT_RANGE_BOUND_INCLUDE;
*end_range_bound = (index_scan->_last_dim_op1 == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE
: JIT_RANGE_BOUND_EXCLUDE;
} else {
// this is a lower bound operator on an ascending semi-open scan so we fill the begin key with the
// value, and the end key with 0xFF
buildScanExpression(
ctx, last_expr, max_arg, JIT_RANGE_ITERATOR_START, range_scan_type, outer_row, subQueryIndex);
AddFillKeyPattern(ctx, 0xFF, offset, size, JIT_RANGE_ITERATOR_END, range_scan_type, subQueryIndex);
*begin_range_bound = (index_scan->_last_dim_op1 == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE
: JIT_RANGE_BOUND_EXCLUDE;
*end_range_bound = JIT_RANGE_BOUND_INCLUDE;
}
BuildAscendingSemiOpenRangeScan(ctx,
index_scan,
max_arg,
range_scan_type,
begin_range_bound,
end_range_bound,
outer_row,
subQueryIndex,
offset,
size,
last_expr);
} else {
if ((index_scan->_last_dim_op1 == JIT_WOC_LESS_THAN) ||
(index_scan->_last_dim_op1 == JIT_WOC_LESS_EQUALS)) {
// this is an upper bound operator on a descending semi-open scan so we fill the begin key with value,
// and the end key with zeroes
buildScanExpression(
ctx, last_expr, max_arg, JIT_RANGE_ITERATOR_START, range_scan_type, outer_row, subQueryIndex);
AddFillKeyPattern(ctx, 0x00, offset, size, JIT_RANGE_ITERATOR_END, range_scan_type, subQueryIndex);
*begin_range_bound = (index_scan->_last_dim_op1 == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE
: JIT_RANGE_BOUND_EXCLUDE;
*end_range_bound = JIT_RANGE_BOUND_INCLUDE;
} else {
// this is a lower bound operator on a descending semi-open scan so we fill the begin key with 0xFF, and
// the end key with the value
AddFillKeyPattern(ctx, 0xFF, offset, size, JIT_RANGE_ITERATOR_START, range_scan_type, subQueryIndex);
buildScanExpression(
ctx, last_expr, max_arg, JIT_RANGE_ITERATOR_END, range_scan_type, outer_row, subQueryIndex);
*begin_range_bound = JIT_RANGE_BOUND_INCLUDE;
*end_range_bound = (index_scan->_last_dim_op1 == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE
: JIT_RANGE_BOUND_EXCLUDE;
}
BuildDescendingSemiOpenRangeScan(ctx,
index_scan,
max_arg,
range_scan_type,
begin_range_bound,
end_range_bound,
outer_row,
subQueryIndex,
offset,
size,
last_expr);
}
// now fill the rest as usual
@ -1226,6 +1254,109 @@ static bool buildSemiOpenRangeScan(JitTvmCodeGenContext* ctx, JitIndexScan* inde
return result;
}
static void BuildAscendingOpenRangeScan(JitTvmCodeGenContext* ctx, JitIndexScan* indexScan, int* maxArg,
JitRangeScanType rangeScanType, JitRangeBoundMode* beginRangeBound, JitRangeBoundMode* endRangeBound,
Instruction* outerRow, int subQueryIndex, JitWhereOperatorClass beforeLastDimOp, JitWhereOperatorClass lastDimOp,
JitColumnExpr* beforeLastExpr, JitColumnExpr* lastExpr)
{
if ((beforeLastDimOp == JIT_WOC_LESS_THAN) || (beforeLastDimOp == JIT_WOC_LESS_EQUALS)) {
MOT_ASSERT((lastDimOp == JIT_WOC_GREATER_THAN) || (lastDimOp == JIT_WOC_GREATER_EQUALS));
// the before-last operator is an upper bound operator on an ascending open scan so we fill the begin
// key with the last value, and the end key with the before-last value
buildScanExpression(ctx,
lastExpr,
maxArg,
JIT_RANGE_ITERATOR_START,
rangeScanType,
outerRow, // lower bound on begin iterator key
subQueryIndex);
buildScanExpression(ctx,
beforeLastExpr,
maxArg,
JIT_RANGE_ITERATOR_END,
rangeScanType,
outerRow, // upper bound on end iterator key
subQueryIndex);
*beginRangeBound =
(lastDimOp == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
*endRangeBound =
(beforeLastDimOp == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
} else {
MOT_ASSERT((lastDimOp == JIT_WOC_LESS_THAN) || (lastDimOp == JIT_WOC_LESS_EQUALS));
// the before-last operator is a lower bound operator on an ascending open scan so we fill the begin key
// with the before-last value, and the end key with the last value
buildScanExpression(ctx,
beforeLastExpr,
maxArg,
JIT_RANGE_ITERATOR_START,
rangeScanType,
outerRow, // lower bound on begin iterator key
subQueryIndex);
buildScanExpression(ctx,
lastExpr,
maxArg,
JIT_RANGE_ITERATOR_END,
rangeScanType,
outerRow, // upper bound on end iterator key
subQueryIndex);
*beginRangeBound =
(beforeLastDimOp == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
*endRangeBound = (lastDimOp == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
}
}
static void BuildDescendingOpenRangeScan(JitTvmCodeGenContext* ctx, JitIndexScan* indexScan, int* maxArg,
JitRangeScanType rangeScanType, JitRangeBoundMode* beginRangeBound, JitRangeBoundMode* endRangeBound,
Instruction* outerRow, int subQueryIndex, JitWhereOperatorClass beforeLastDimOp, JitWhereOperatorClass lastDimOp,
JitColumnExpr* beforeLastExpr, JitColumnExpr* lastExpr)
{
if ((beforeLastDimOp == JIT_WOC_LESS_THAN) || (beforeLastDimOp == JIT_WOC_LESS_EQUALS)) {
MOT_ASSERT((lastDimOp == JIT_WOC_GREATER_THAN) || (lastDimOp == JIT_WOC_GREATER_EQUALS));
// the before-last operator is an upper bound operator on an descending open scan so we fill the begin
// key with the last value, and the end key with the before-last value
buildScanExpression(ctx,
beforeLastExpr,
maxArg,
JIT_RANGE_ITERATOR_START,
rangeScanType,
outerRow, // upper bound on begin iterator key
subQueryIndex);
buildScanExpression(ctx,
lastExpr,
maxArg,
JIT_RANGE_ITERATOR_END,
rangeScanType,
outerRow, // lower bound on end iterator key
subQueryIndex);
*beginRangeBound =
(beforeLastDimOp == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
*endRangeBound =
(lastDimOp == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
} else {
MOT_ASSERT((lastDimOp == JIT_WOC_LESS_THAN) || (lastDimOp == JIT_WOC_LESS_EQUALS));
// the before-last operator is a lower bound operator on an descending open scan so we fill the begin
// key with the last value, and the end key with the before-last value
buildScanExpression(ctx,
lastExpr,
maxArg,
JIT_RANGE_ITERATOR_START,
rangeScanType,
outerRow, // upper bound on begin iterator key
subQueryIndex);
buildScanExpression(ctx,
beforeLastExpr,
maxArg,
JIT_RANGE_ITERATOR_END,
rangeScanType,
outerRow, // lower bound on end iterator key
subQueryIndex);
*beginRangeBound =
(lastDimOp == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
*endRangeBound =
(beforeLastDimOp == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
}
}
static bool buildOpenRangeScan(JitTvmCodeGenContext* ctx, JitIndexScan* index_scan, int* max_arg,
JitRangeScanType range_scan_type, JitRangeBoundMode* begin_range_bound, JitRangeBoundMode* end_range_bound,
Instruction* outer_row, int subQueryIndex)
@ -1269,97 +1400,31 @@ static bool buildOpenRangeScan(JitTvmCodeGenContext* ctx, JitIndexScan* index_sc
JitColumnExpr* last_expr = &index_scan->_search_exprs._exprs[last_expr_index];
JitColumnExpr* before_last_expr = &index_scan->_search_exprs._exprs[last_expr_index - 1];
if (ascending) {
if ((before_last_dim_op == JIT_WOC_LESS_THAN) || (before_last_dim_op == JIT_WOC_LESS_EQUALS)) {
MOT_ASSERT((last_dim_op == JIT_WOC_GREATER_THAN) || (last_dim_op == JIT_WOC_GREATER_EQUALS));
// the before-last operator is an upper bound operator on an ascending open scan so we fill the begin
// key with the last value, and the end key with the before-last value
buildScanExpression(ctx,
last_expr,
max_arg,
JIT_RANGE_ITERATOR_START,
range_scan_type,
outer_row, // lower bound on begin iterator key
subQueryIndex);
buildScanExpression(ctx,
before_last_expr,
max_arg,
JIT_RANGE_ITERATOR_END,
range_scan_type,
outer_row, // upper bound on end iterator key
subQueryIndex);
*begin_range_bound =
(last_dim_op == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
*end_range_bound =
(before_last_dim_op == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
} else {
MOT_ASSERT((last_dim_op == JIT_WOC_LESS_THAN) || (last_dim_op == JIT_WOC_LESS_EQUALS));
// the before-last operator is a lower bound operator on an ascending open scan so we fill the begin key
// with the before-last value, and the end key with the last value
buildScanExpression(ctx,
before_last_expr,
max_arg,
JIT_RANGE_ITERATOR_START,
range_scan_type,
outer_row, // lower bound on begin iterator key
subQueryIndex);
buildScanExpression(ctx,
last_expr,
max_arg,
JIT_RANGE_ITERATOR_END,
range_scan_type,
outer_row, // upper bound on end iterator key
subQueryIndex);
*begin_range_bound =
(before_last_dim_op == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
*end_range_bound =
(last_dim_op == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
}
BuildAscendingOpenRangeScan(ctx,
index_scan,
max_arg,
range_scan_type,
begin_range_bound,
end_range_bound,
outer_row,
subQueryIndex,
before_last_dim_op,
last_dim_op,
before_last_expr,
last_expr);
} else {
if ((before_last_dim_op == JIT_WOC_LESS_THAN) || (before_last_dim_op == JIT_WOC_LESS_EQUALS)) {
MOT_ASSERT((last_dim_op == JIT_WOC_GREATER_THAN) || (last_dim_op == JIT_WOC_GREATER_EQUALS));
// the before-last operator is an upper bound operator on an descending open scan so we fill the begin
// key with the last value, and the end key with the before-last value
buildScanExpression(ctx,
before_last_expr,
max_arg,
JIT_RANGE_ITERATOR_START,
range_scan_type,
outer_row, // upper bound on begin iterator key
subQueryIndex);
buildScanExpression(ctx,
last_expr,
max_arg,
JIT_RANGE_ITERATOR_END,
range_scan_type,
outer_row, // lower bound on end iterator key
subQueryIndex);
*begin_range_bound =
(before_last_dim_op == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
*end_range_bound =
(last_dim_op == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
} else {
MOT_ASSERT((last_dim_op == JIT_WOC_LESS_THAN) || (last_dim_op == JIT_WOC_LESS_EQUALS));
// the before-last operator is a lower bound operator on an descending open scan so we fill the begin
// key with the last value, and the end key with the before-last value
buildScanExpression(ctx,
last_expr,
max_arg,
JIT_RANGE_ITERATOR_START,
range_scan_type,
outer_row, // upper bound on begin iterator key
subQueryIndex);
buildScanExpression(ctx,
before_last_expr,
max_arg,
JIT_RANGE_ITERATOR_END,
range_scan_type,
outer_row, // lower bound on end iterator key
subQueryIndex);
*begin_range_bound =
(last_dim_op == JIT_WOC_LESS_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
*end_range_bound =
(before_last_dim_op == JIT_WOC_GREATER_EQUALS) ? JIT_RANGE_BOUND_INCLUDE : JIT_RANGE_BOUND_EXCLUDE;
}
BuildDescendingOpenRangeScan(ctx,
index_scan,
max_arg,
range_scan_type,
begin_range_bound,
end_range_bound,
outer_row,
subQueryIndex,
before_last_dim_op,
last_dim_op,
before_last_expr,
last_expr);
}
// now fill the rest as usual