forked from huawei/openGauss-server
763 lines
26 KiB
C++
763 lines
26 KiB
C++
/*
|
|
* contrib/pg_standby/pg_standby.c
|
|
*
|
|
*
|
|
* pg_standby.c
|
|
*
|
|
* Production-ready example of how to create a Warm Standby
|
|
* database server using continuous archiving as a
|
|
* replication mechanism
|
|
*
|
|
* We separate the parameters for archive and nextWALfile
|
|
* so that we can check the archive exists, even if the
|
|
* WAL file doesn't (yet).
|
|
*
|
|
* This program will be executed once in full for each file
|
|
* requested by the warm standby server.
|
|
*
|
|
* It is designed to cater to a variety of needs, as well
|
|
* providing a customizable section.
|
|
*
|
|
* Original author: Simon Riggs simon@2ndquadrant.com
|
|
* Current maintainer: Simon Riggs
|
|
*/
|
|
#include "postgres_fe.h"
|
|
|
|
#include <ctype.h>
|
|
#include <dirent.h>
|
|
#include <sys/stat.h>
|
|
#include <fcntl.h>
|
|
#include <signal.h>
|
|
|
|
#ifdef WIN32
|
|
int getopt(int argc, char* const argv[], const char* optstring);
|
|
#else
|
|
#include <sys/time.h>
|
|
#include <unistd.h>
|
|
|
|
#ifdef HAVE_GETOPT_H
|
|
#include <getopt.h>
|
|
#endif
|
|
#endif /* ! WIN32 */
|
|
|
|
extern char* optarg;
|
|
extern int optind;
|
|
|
|
const char* progname;
|
|
|
|
/* Options and defaults */
|
|
int sleeptime = 5; /* amount of time to sleep between file checks */
|
|
int waittime = -1; /* how long we have been waiting, -1 no wait
|
|
* yet */
|
|
int maxwaittime = 0; /* how long are we prepared to wait for? */
|
|
int keepfiles = 0; /* number of WAL files to keep, 0 keep all */
|
|
int maxretries = 3; /* number of retries on restore command */
|
|
bool debug = false; /* are we debugging? */
|
|
bool need_cleanup = false; /* do we need to remove files from
|
|
* archive? */
|
|
|
|
#ifndef WIN32
|
|
static volatile sig_atomic_t signaled = false;
|
|
#endif
|
|
|
|
char* archiveLocation; /* where to find the archive? */
|
|
char* triggerPath; /* where to find the trigger file? */
|
|
char* xlogFilePath; /* where we are going to restore to */
|
|
char* nextWALFileName; /* the file we need to get from archive */
|
|
char* restartWALFileName; /* the file from which we can restart restore */
|
|
char* priorWALFileName; /* the file we need to get from archive */
|
|
char WALFilePath[MAXPGPATH]; /* the file path including archive */
|
|
char restoreCommand[MAXPGPATH]; /* run this to restore */
|
|
char exclusiveCleanupFileName[MAXPGPATH]; /* the file we need to
|
|
* get from archive */
|
|
|
|
/*
|
|
* Two types of failover are supported (smart and fast failover).
|
|
*
|
|
* The content of the trigger file determines the type of failover. If the
|
|
* trigger file contains the word "smart" (or the file is empty), smart
|
|
* failover is chosen: pg_standby acts as cp or ln command itself, on
|
|
* successful completion all the available WAL records will be applied
|
|
* resulting in zero data loss. But, it might take a long time to finish
|
|
* recovery if there's a lot of unapplied WAL.
|
|
*
|
|
* On the other hand, if the trigger file contains the word "fast", the
|
|
* recovery is finished immediately even if unapplied WAL files remain. Any
|
|
* transactions in the unapplied WAL files are lost.
|
|
*
|
|
* An empty trigger file performs smart failover. SIGUSR or SIGINT triggers
|
|
* fast failover. A timeout causes fast failover (smart failover would have
|
|
* the same effect, since if the timeout is reached there is no unapplied WAL).
|
|
*/
|
|
#define NoFailover 0
|
|
#define SmartFailover 1
|
|
#define FastFailover 2
|
|
|
|
static int Failover = NoFailover;
|
|
|
|
#define RESTORE_COMMAND_COPY 0
|
|
#define RESTORE_COMMAND_LINK 1
|
|
int restoreCommandType;
|
|
|
|
#define XLOG_DATA 0
|
|
#define XLOG_HISTORY 1
|
|
#define XLOG_BACKUP_LABEL 2
|
|
int nextWALFileType;
|
|
|
|
#define SET_RESTORE_COMMAND(cmd, arg1, arg2) snprintf(restoreCommand, MAXPGPATH, cmd " \"%s\" \"%s\"", arg1, arg2)
|
|
|
|
struct stat stat_buf;
|
|
|
|
/* =====================================================================
|
|
*
|
|
* Customizable section
|
|
*
|
|
* =====================================================================
|
|
*
|
|
* Currently, this section assumes that the Archive is a locally
|
|
* accessible directory. If you want to make other assumptions,
|
|
* such as using a vendor-specific archive and access API, these
|
|
* routines are the ones you'll need to change. You're
|
|
* encouraged to submit any changes to pgsql-hackers@postgresql.org
|
|
* or personally to the current maintainer. Those changes may be
|
|
* folded in to later versions of this program.
|
|
*/
|
|
|
|
#define XLOG_DATA_FNAME_LEN 24
|
|
/* Reworked from access/xlog_internal.h */
|
|
#define XLogFileName(fname, len, tli, logSegNo) \
|
|
snprintf(fname, \
|
|
len, \
|
|
"%08X%08X%08X", \
|
|
tli, \
|
|
(uint32)((logSegNo) / XLogSegmentsPerXLogId), \
|
|
(uint32)((logSegNo) % XLogSegmentsPerXLogId))
|
|
|
|
/*
|
|
* Initialize allows customized commands into the warm standby program.
|
|
*
|
|
* As an example, and probably the common case, we use either
|
|
* cp/ln commands on *nix, or copy/move command on Windows.
|
|
*/
|
|
static void CustomizableInitialize(void)
|
|
{
|
|
#ifdef WIN32
|
|
snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, nextWALFileName);
|
|
switch (restoreCommandType) {
|
|
case RESTORE_COMMAND_LINK:
|
|
SET_RESTORE_COMMAND("mklink", WALFilePath, xlogFilePath);
|
|
break;
|
|
case RESTORE_COMMAND_COPY:
|
|
default:
|
|
SET_RESTORE_COMMAND("copy", WALFilePath, xlogFilePath);
|
|
break;
|
|
}
|
|
#else
|
|
snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, nextWALFileName);
|
|
switch (restoreCommandType) {
|
|
case RESTORE_COMMAND_LINK:
|
|
#if HAVE_WORKING_LINK
|
|
SET_RESTORE_COMMAND("ln -s -f", WALFilePath, xlogFilePath);
|
|
break;
|
|
#endif
|
|
case RESTORE_COMMAND_COPY:
|
|
default:
|
|
SET_RESTORE_COMMAND("cp", WALFilePath, xlogFilePath);
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* This code assumes that archiveLocation is a directory You may wish to
|
|
* add code to check for tape libraries, etc.. So, since it is a
|
|
* directory, we use stat to test if it's accessible
|
|
*/
|
|
if (stat(archiveLocation, &stat_buf) != 0) {
|
|
fprintf(stderr, "%s: archive location \"%s\" does not exist\n", progname, archiveLocation);
|
|
fflush(stderr);
|
|
exit(2);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* CustomizableNextWALFileReady()
|
|
*
|
|
* Is the requested file ready yet?
|
|
*/
|
|
static bool CustomizableNextWALFileReady()
|
|
{
|
|
if (stat(WALFilePath, &stat_buf) == 0) {
|
|
/*
|
|
* If it's a backup file, return immediately. If it's a regular file
|
|
* return only if it's the right size already.
|
|
*/
|
|
if (strlen(nextWALFileName) > 24 && strspn(nextWALFileName, "0123456789ABCDEF") == 24 &&
|
|
strcmp(nextWALFileName + strlen(nextWALFileName) - strlen(".backup"), ".backup") == 0) {
|
|
nextWALFileType = XLOG_BACKUP_LABEL;
|
|
return true;
|
|
} else if (stat_buf.st_size == XLOG_SEG_SIZE) {
|
|
#ifdef WIN32
|
|
|
|
/*
|
|
* Windows 'cp' sets the final file size before the copy is
|
|
* complete, and not yet ready to be opened by pg_standby. So we
|
|
* wait for sleeptime secs before attempting to restore. If that
|
|
* is not enough, we will rely on the retry/holdoff mechanism.
|
|
* GNUWin32's cp does not have this problem.
|
|
*/
|
|
pg_usleep(sleeptime * 1000000L);
|
|
#endif
|
|
nextWALFileType = XLOG_DATA;
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* If still too small, wait until it is the correct size
|
|
*/
|
|
if (stat_buf.st_size > XLOG_SEG_SIZE) {
|
|
if (debug) {
|
|
fprintf(stderr, "file size greater than expected\n");
|
|
fflush(stderr);
|
|
}
|
|
exit(3);
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
#define MaxSegmentsPerLogFile (0xFFFFFFFF / XLOG_SEG_SIZE)
|
|
|
|
static void CustomizableCleanupPriorWALFiles(void)
|
|
{
|
|
/*
|
|
* Work out name of prior file from current filename
|
|
*/
|
|
if (nextWALFileType == XLOG_DATA) {
|
|
int rc;
|
|
DIR* xldir = NULL;
|
|
struct dirent* xlde;
|
|
|
|
/*
|
|
* Assume it's OK to keep failing. The failure situation may change
|
|
* over time, so we'd rather keep going on the main processing than
|
|
* fail because we couldn't clean up yet.
|
|
*/
|
|
if ((xldir = opendir(archiveLocation)) != NULL) {
|
|
while ((xlde = readdir(xldir)) != NULL) {
|
|
/*
|
|
* We ignore the timeline part of the XLOG segment identifiers
|
|
* in deciding whether a segment is still needed. This
|
|
* ensures that we won't prematurely remove a segment from a
|
|
* parent timeline. We could probably be a little more
|
|
* proactive about removing segments of non-parent timelines,
|
|
* but that would be a whole lot more complicated.
|
|
*
|
|
* We use the alphanumeric sorting property of the filenames
|
|
* to decide which ones are earlier than the
|
|
* exclusiveCleanupFileName file. Note that this means files
|
|
* are not removed in the order they were originally written,
|
|
* in case this worries you.
|
|
*/
|
|
if (strlen(xlde->d_name) == XLOG_DATA_FNAME_LEN &&
|
|
strspn(xlde->d_name, "0123456789ABCDEF") == XLOG_DATA_FNAME_LEN &&
|
|
strcmp(xlde->d_name + 8, exclusiveCleanupFileName + 8) < 0) {
|
|
#ifdef WIN32
|
|
snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, xlde->d_name);
|
|
#else
|
|
snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, xlde->d_name);
|
|
#endif
|
|
|
|
if (debug)
|
|
fprintf(stderr, "\nremoving file \"%s\"", WALFilePath);
|
|
|
|
rc = unlink(WALFilePath);
|
|
if (rc != 0) {
|
|
fprintf(stderr,
|
|
"\n%s: ERROR: could not remove file \"%s\": %s\n",
|
|
progname,
|
|
WALFilePath,
|
|
strerror(errno));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (debug)
|
|
fprintf(stderr, "\n");
|
|
} else
|
|
fprintf(
|
|
stderr, "%s: could not open archive location \"%s\": %s\n", progname, archiveLocation, strerror(errno));
|
|
|
|
closedir(xldir);
|
|
fflush(stderr);
|
|
}
|
|
}
|
|
|
|
/* =====================================================================
|
|
* End of Customizable section
|
|
* =====================================================================
|
|
*/
|
|
|
|
/*
|
|
* SetWALFileNameForCleanup()
|
|
*
|
|
* Set the earliest WAL filename that we want to keep on the archive
|
|
* and decide whether we need_cleanup
|
|
*/
|
|
static bool SetWALFileNameForCleanup(void)
|
|
{
|
|
uint32 tli = 1, log = 0, seg = 0;
|
|
uint32 log_diff = 0, seg_diff = 0;
|
|
bool cleanup = false;
|
|
|
|
if (restartWALFileName) {
|
|
/*
|
|
* Don't do cleanup if the restartWALFileName provided is later than
|
|
* the xlog file requested. This is an error and we must not remove
|
|
* these files from archive. This shouldn't happen, but better safe
|
|
* than sorry.
|
|
*/
|
|
if (strcmp(restartWALFileName, nextWALFileName) > 0)
|
|
return false;
|
|
|
|
strcpy(exclusiveCleanupFileName, restartWALFileName);
|
|
return true;
|
|
}
|
|
|
|
if (keepfiles > 0) {
|
|
sscanf(nextWALFileName, "%08X%08X%08X", &tli, &log, &seg);
|
|
if (tli > 0 && log >= 0 && seg > 0) {
|
|
log_diff = keepfiles / MaxSegmentsPerLogFile;
|
|
seg_diff = keepfiles % MaxSegmentsPerLogFile;
|
|
if (seg_diff > seg) {
|
|
log_diff++;
|
|
seg = MaxSegmentsPerLogFile - (seg_diff - seg);
|
|
} else
|
|
seg -= seg_diff;
|
|
|
|
if (log >= log_diff) {
|
|
log -= log_diff;
|
|
cleanup = true;
|
|
} else {
|
|
log = 0;
|
|
seg = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
XLogFileName(exclusiveCleanupFileName, MAXFNAMELEN, tli, (((uint32)log) << 32) | seg);
|
|
|
|
return cleanup;
|
|
}
|
|
|
|
/*
|
|
* CheckForExternalTrigger()
|
|
*
|
|
* Is there a trigger file? Sets global 'Failover' variable to indicate
|
|
* what kind of a trigger file it was. A "fast" trigger file is turned
|
|
* into a "smart" file as a side-effect.
|
|
*/
|
|
static void CheckForExternalTrigger(void)
|
|
{
|
|
char buf[32];
|
|
int fd;
|
|
int len;
|
|
|
|
/*
|
|
* Look for a trigger file, if that option has been selected
|
|
*
|
|
* We use stat() here because triggerPath is always a file rather than
|
|
* potentially being in an archive
|
|
*/
|
|
if (!triggerPath || stat(triggerPath, &stat_buf) != 0)
|
|
return;
|
|
|
|
/*
|
|
* An empty trigger file performs smart failover. There's a little race
|
|
* condition here: if the writer of the trigger file has just created the
|
|
* file, but not yet written anything to it, we'll treat that as smart
|
|
* shutdown even if the other process was just about to write "fast" to
|
|
* it. But that's fine: we'll restore one more WAL file, and when we're
|
|
* invoked next time, we'll see the word "fast" and fail over immediately.
|
|
*/
|
|
if (stat_buf.st_size == 0) {
|
|
Failover = SmartFailover;
|
|
fprintf(stderr, "trigger file found: smart failover\n");
|
|
fflush(stderr);
|
|
return;
|
|
}
|
|
|
|
if ((fd = open(triggerPath, O_RDWR, 0)) < 0) {
|
|
fprintf(stderr, "WARNING: could not open \"%s\": %s\n", triggerPath, strerror(errno));
|
|
fflush(stderr);
|
|
return;
|
|
}
|
|
|
|
if ((len = read(fd, buf, sizeof(buf) - 1)) < 0) {
|
|
fprintf(stderr, "WARNING: could not read \"%s\": %s\n", triggerPath, strerror(errno));
|
|
fflush(stderr);
|
|
close(fd);
|
|
return;
|
|
}
|
|
buf[len] = '\0';
|
|
|
|
if (strncmp(buf, "smart", 5) == 0) {
|
|
Failover = SmartFailover;
|
|
fprintf(stderr, "trigger file found: smart failover\n");
|
|
fflush(stderr);
|
|
close(fd);
|
|
return;
|
|
}
|
|
|
|
if (strncmp(buf, "fast", 4) == 0) {
|
|
Failover = FastFailover;
|
|
|
|
fprintf(stderr, "trigger file found: fast failover\n");
|
|
fflush(stderr);
|
|
|
|
/*
|
|
* Turn it into a "smart" trigger by truncating the file. Otherwise if
|
|
* the server asks us again to restore a segment that was restored
|
|
* already, we would return "not found" and upset the server.
|
|
*/
|
|
if (ftruncate(fd, 0) < 0) {
|
|
fprintf(stderr, "WARNING: could not read \"%s\": %s\n", triggerPath, strerror(errno));
|
|
fflush(stderr);
|
|
}
|
|
close(fd);
|
|
|
|
return;
|
|
}
|
|
close(fd);
|
|
|
|
fprintf(stderr, "WARNING: invalid content in \"%s\"\n", triggerPath);
|
|
fflush(stderr);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* RestoreWALFileForRecovery()
|
|
*
|
|
* Perform the action required to restore the file from archive
|
|
*/
|
|
static bool RestoreWALFileForRecovery(void)
|
|
{
|
|
int rc = 0;
|
|
int numretries = 0;
|
|
|
|
if (debug) {
|
|
fprintf(stderr, "running restore: ");
|
|
fflush(stderr);
|
|
}
|
|
|
|
while (numretries <= maxretries) {
|
|
rc = system(restoreCommand);
|
|
if (rc == 0) {
|
|
if (debug) {
|
|
fprintf(stderr, "OK\n");
|
|
fflush(stderr);
|
|
}
|
|
return true;
|
|
}
|
|
pg_usleep(numretries++ * sleeptime * 1000000L);
|
|
}
|
|
|
|
/*
|
|
* Allow caller to add additional info
|
|
*/
|
|
if (debug)
|
|
fprintf(stderr, "not restored\n");
|
|
return false;
|
|
}
|
|
|
|
static void usage(void)
|
|
{
|
|
printf("%s allows openGauss warm standby servers to be configured.\n\n", progname);
|
|
printf("Usage:\n");
|
|
printf(" %s [OPTION]... ARCHIVELOCATION NEXTWALFILE XLOGFILEPATH [RESTARTWALFILE]\n", progname);
|
|
printf("\nOptions:\n");
|
|
printf(" -c copy file from archive (default)\n");
|
|
printf(" -d generate lots of debugging output (testing only)\n");
|
|
printf(" -k NUMFILESTOKEEP if RESTARTWALFILE is not used, remove files prior to limit\n"
|
|
" (0 keeps all)\n");
|
|
printf(" -l does nothing; use of link is now deprecated\n");
|
|
printf(" -r MAXRETRIES max number of times to retry, with progressive wait\n"
|
|
" (default=3)\n");
|
|
printf(" -s SLEEPTIME seconds to wait between file checks (min=1, max=60,\n"
|
|
" default=5)\n");
|
|
printf(" -t TRIGGERFILE trigger file to initiate failover (no default)\n");
|
|
printf(" -V, --version output version information, then exit\n");
|
|
printf(" -w MAXWAITTIME max seconds to wait for a file (0=no limit) (default=0)\n");
|
|
printf(" -?, --help show this help, then exit\n");
|
|
printf("\n"
|
|
"Main intended use as restore_command in recovery.conf:\n"
|
|
" restore_command = 'pg_standby [OPTION]... ARCHIVELOCATION %%f %%p %%r'\n"
|
|
"e.g.\n"
|
|
" restore_command = 'pg_standby /mnt/server/archiverdir %%f %%p %%r'\n");
|
|
#if ((defined(ENABLE_MULTIPLE_NODES)) || (defined(ENABLE_PRIVATEGAUSS)))
|
|
printf("\nReport bugs to GaussDB support.\n");
|
|
#else
|
|
printf("\nReport bugs to community@opengauss.org> or join opengauss community <https://opengauss.org>.\n");
|
|
#endif
|
|
}
|
|
|
|
#ifndef WIN32
|
|
static void sighandler(int sig)
|
|
{
|
|
signaled = true;
|
|
}
|
|
|
|
/* We don't want SIGQUIT to core dump */
|
|
static void sigquit_handler(int sig)
|
|
{
|
|
signal(SIGINT, SIG_DFL);
|
|
kill(getpid(), SIGINT);
|
|
}
|
|
#endif
|
|
|
|
/*------------ MAIN ----------------------------------------*/
|
|
int main(int argc, char** argv)
|
|
{
|
|
int c;
|
|
|
|
progname = get_progname(argv[0]);
|
|
|
|
if (argc > 1) {
|
|
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) {
|
|
usage();
|
|
exit(0);
|
|
}
|
|
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) {
|
|
puts("pg_standby (PostgreSQL) " PG_VERSION);
|
|
exit(0);
|
|
}
|
|
}
|
|
|
|
#ifndef WIN32
|
|
|
|
/*
|
|
* You can send SIGUSR1 to trigger failover.
|
|
*
|
|
* Postmaster uses SIGQUIT to request immediate shutdown. The default
|
|
* action is to core dump, but we don't want that, so trap it and commit
|
|
* suicide without core dump.
|
|
*
|
|
* We used to use SIGINT and SIGQUIT to trigger failover, but that turned
|
|
* out to be a bad idea because postmaster uses SIGQUIT to request
|
|
* immediate shutdown. We still trap SIGINT, but that may change in a
|
|
* future release.
|
|
*
|
|
* There's no way to trigger failover via signal on Windows.
|
|
*/
|
|
(void)signal(SIGUSR1, sighandler);
|
|
(void)signal(SIGINT, sighandler); /* deprecated, use SIGUSR1 */
|
|
(void)signal(SIGQUIT, sigquit_handler);
|
|
#endif
|
|
|
|
while ((c = getopt(argc, argv, "cdk:lr:s:t:w:")) != -1) {
|
|
switch (c) {
|
|
case 'c': /* Use copy */
|
|
restoreCommandType = RESTORE_COMMAND_COPY;
|
|
break;
|
|
case 'd': /* Debug mode */
|
|
debug = true;
|
|
break;
|
|
case 'k': /* keepfiles */
|
|
keepfiles = atoi(optarg);
|
|
if (keepfiles < 0) {
|
|
fprintf(stderr, "%s: -k keepfiles must be >= 0\n", progname);
|
|
exit(2);
|
|
}
|
|
break;
|
|
case 'l': /* Use link */
|
|
|
|
/*
|
|
* Link feature disabled, possibly permanently. Linking causes
|
|
* a problem after recovery ends that is not currently
|
|
* resolved by PostgreSQL. 25 Jun 2009
|
|
*/
|
|
#ifdef NOT_USED
|
|
restoreCommandType = RESTORE_COMMAND_LINK;
|
|
#endif
|
|
break;
|
|
case 'r': /* Retries */
|
|
maxretries = atoi(optarg);
|
|
if (maxretries < 0) {
|
|
fprintf(stderr, "%s: -r maxretries must be >= 0\n", progname);
|
|
exit(2);
|
|
}
|
|
break;
|
|
case 's': /* Sleep time */
|
|
sleeptime = atoi(optarg);
|
|
if (sleeptime <= 0 || sleeptime > 60) {
|
|
fprintf(stderr, "%s: -s sleeptime incorrectly set\n", progname);
|
|
exit(2);
|
|
}
|
|
break;
|
|
case 't': /* Trigger file */
|
|
triggerPath = optarg;
|
|
break;
|
|
case 'w': /* Max wait time */
|
|
maxwaittime = atoi(optarg);
|
|
if (maxwaittime < 0) {
|
|
fprintf(stderr, "%s: -w maxwaittime incorrectly set\n", progname);
|
|
exit(2);
|
|
}
|
|
break;
|
|
default:
|
|
fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
|
|
exit(2);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Parameter checking - after checking to see if trigger file present
|
|
*/
|
|
if (argc == 1) {
|
|
fprintf(stderr, "%s: not enough command-line arguments\n", progname);
|
|
exit(2);
|
|
}
|
|
|
|
/*
|
|
* We will go to the archiveLocation to get nextWALFileName.
|
|
* nextWALFileName may not exist yet, which would not be an error, so we
|
|
* separate the archiveLocation and nextWALFileName so we can check
|
|
* separately whether archiveLocation exists, if not that is an error
|
|
*/
|
|
if (optind < argc) {
|
|
archiveLocation = argv[optind];
|
|
optind++;
|
|
} else {
|
|
fprintf(stderr, "%s: must specify archive location\n", progname);
|
|
fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
|
|
exit(2);
|
|
}
|
|
|
|
if (optind < argc) {
|
|
nextWALFileName = argv[optind];
|
|
optind++;
|
|
} else {
|
|
fprintf(stderr, "%s: must specify WAL file name as second non-option argument (use \"%%f\")\n", progname);
|
|
fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
|
|
exit(2);
|
|
}
|
|
|
|
if (optind < argc) {
|
|
xlogFilePath = argv[optind];
|
|
optind++;
|
|
} else {
|
|
fprintf(stderr, "%s: must specify xlog destination as third non-option argument (use \"%%p\")\n", progname);
|
|
fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
|
|
exit(2);
|
|
}
|
|
|
|
if (optind < argc) {
|
|
restartWALFileName = argv[optind];
|
|
optind++;
|
|
}
|
|
|
|
CustomizableInitialize();
|
|
|
|
need_cleanup = SetWALFileNameForCleanup();
|
|
|
|
if (debug) {
|
|
fprintf(stderr, "Trigger file: %s\n", triggerPath ? triggerPath : "<not set>");
|
|
fprintf(stderr, "Waiting for WAL file: %s\n", nextWALFileName);
|
|
fprintf(stderr, "WAL file path: %s\n", WALFilePath);
|
|
fprintf(stderr, "Restoring to: %s\n", xlogFilePath);
|
|
fprintf(stderr, "Sleep interval: %d second%s\n", sleeptime, (sleeptime > 1 ? "s" : " "));
|
|
fprintf(stderr, "Max wait interval: %d %s\n", maxwaittime, (maxwaittime > 0 ? "seconds" : "forever"));
|
|
fprintf(stderr, "Command for restore: %s\n", restoreCommand);
|
|
fprintf(stderr, "Keep archive history: ");
|
|
if (need_cleanup)
|
|
fprintf(stderr, "%s and later\n", exclusiveCleanupFileName);
|
|
else
|
|
fprintf(stderr, "no cleanup required\n");
|
|
fflush(stderr);
|
|
}
|
|
|
|
/*
|
|
* Check for initial history file: always the first file to be requested
|
|
* It's OK if the file isn't there - all other files need to wait
|
|
*/
|
|
if (strlen(nextWALFileName) > 8 && strspn(nextWALFileName, "0123456789ABCDEF") == 8 &&
|
|
strcmp(nextWALFileName + strlen(nextWALFileName) - strlen(".history"), ".history") == 0) {
|
|
nextWALFileType = XLOG_HISTORY;
|
|
if (RestoreWALFileForRecovery())
|
|
exit(0);
|
|
else {
|
|
if (debug) {
|
|
fprintf(stderr, "history file not found\n");
|
|
fflush(stderr);
|
|
}
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Main wait loop
|
|
*/
|
|
for (;;) {
|
|
/* Check for trigger file or signal first */
|
|
CheckForExternalTrigger();
|
|
#ifndef WIN32
|
|
if (signaled) {
|
|
Failover = FastFailover;
|
|
if (debug) {
|
|
fprintf(stderr, "signaled to exit: fast failover\n");
|
|
fflush(stderr);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Check for fast failover immediately, before checking if the
|
|
* requested WAL file is available
|
|
*/
|
|
if (Failover == FastFailover)
|
|
exit(1);
|
|
|
|
if (CustomizableNextWALFileReady()) {
|
|
/*
|
|
* Once we have restored this file successfully we can remove some
|
|
* prior WAL files. If this restore fails we musn't remove any
|
|
* file because some of them will be requested again immediately
|
|
* after the failed restore, or when we restart recovery.
|
|
*/
|
|
if (RestoreWALFileForRecovery()) {
|
|
if (need_cleanup)
|
|
CustomizableCleanupPriorWALFiles();
|
|
|
|
exit(0);
|
|
} else {
|
|
/* Something went wrong in copying the file */
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
/* Check for smart failover if the next WAL file was not available */
|
|
if (Failover == SmartFailover)
|
|
exit(1);
|
|
|
|
if (sleeptime <= 60)
|
|
pg_usleep(sleeptime * 1000000L);
|
|
|
|
waittime += sleeptime;
|
|
if (waittime >= maxwaittime && maxwaittime > 0) {
|
|
Failover = FastFailover;
|
|
if (debug) {
|
|
fprintf(stderr, "Timed out after %d seconds: fast failover\n", waittime);
|
|
fflush(stderr);
|
|
}
|
|
}
|
|
if (debug) {
|
|
fprintf(stderr, "WAL file not present yet.");
|
|
if (triggerPath)
|
|
fprintf(stderr, " Checking for trigger file...");
|
|
fprintf(stderr, "\n");
|
|
fflush(stderr);
|
|
}
|
|
}
|
|
}
|