mirror of https://gitlab.com/QEF/q-e.git
461 lines
13 KiB
C
461 lines
13 KiB
C
|
|
/*
|
|
* Copyright (c) 1997 Massachusetts Institute of Technology
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
* a copy of this software and associated documentation files (the
|
|
* "Software"), to use, copy, modify, and distribute the Software without
|
|
* restriction, provided the Software, including any modified copies made
|
|
* under this license, is not distributed for a fee, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be
|
|
* included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
* IN NO EVENT SHALL THE MASSACHUSETTS INSTITUTE OF TECHNOLOGY BE LIABLE
|
|
* FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
|
|
* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
* Except as contained in this notice, the name of the Massachusetts
|
|
* Institute of Technology shall not be used in advertising or otherwise
|
|
* to promote the sale, use or other dealings in this Software without
|
|
* prior written authorization from the Massachusetts Institute of
|
|
* Technology.
|
|
*
|
|
*/
|
|
|
|
/* fftw.h -- system-wide definitions */
|
|
/* $Id: fftw.h,v 1.2 2006-01-15 20:18:53 giannozz Exp $ */
|
|
|
|
#ifndef FFTW_H
|
|
#define FFTW_H
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif /* __cplusplus */
|
|
|
|
/* our real numbers */
|
|
typedef double FFTW_REAL;
|
|
|
|
/*********************************************
|
|
* Complex numbers and operations
|
|
*********************************************/
|
|
typedef struct {
|
|
FFTW_REAL re, im;
|
|
} FFTW_COMPLEX;
|
|
|
|
#define c_re(c) ((c).re)
|
|
#define c_im(c) ((c).im)
|
|
|
|
typedef enum {
|
|
FFTW_FORWARD = -1, FFTW_BACKWARD = 1
|
|
} fftw_direction;
|
|
|
|
#ifndef FFTW_1_0_COMPATIBILITY
|
|
#define FFTW_1_0_COMPATIBILITY 1
|
|
#endif
|
|
|
|
#if FFTW_1_0_COMPATIBILITY
|
|
/* backward compatibility with FFTW-1.0 */
|
|
#define REAL FFTW_REAL
|
|
#define COMPLEX FFTW_COMPLEX
|
|
#endif
|
|
|
|
/*********************************************
|
|
* Success or failure status
|
|
*********************************************/
|
|
|
|
typedef enum {
|
|
FFTW_SUCCESS = 0, FFTW_FAILURE = -1
|
|
} fftw_status;
|
|
|
|
/*********************************************
|
|
* Codelets
|
|
*********************************************/
|
|
/*
|
|
* There are two kinds of codelets:
|
|
*
|
|
* NO_TWIDDLE computes the FFT of a certain size, operating
|
|
* out-of-place (i.e., take an input and produce a
|
|
* separate output)
|
|
*
|
|
* TWIDDLE like no_twiddle, but operating in place. Moreover,
|
|
* multiplies the input by twiddle factors.
|
|
*/
|
|
|
|
typedef void (notw_codelet) (const FFTW_COMPLEX *, FFTW_COMPLEX *, int, int);
|
|
typedef void (twiddle_codelet) (FFTW_COMPLEX *, const FFTW_COMPLEX *, int,
|
|
int, int);
|
|
typedef void (generic_codelet) (FFTW_COMPLEX *, const FFTW_COMPLEX *, int,
|
|
int, int, int);
|
|
|
|
/*********************************************
|
|
* Configurations
|
|
*********************************************/
|
|
/*
|
|
* A configuration is a database of all known codelets
|
|
*/
|
|
|
|
typedef struct {
|
|
int size; /* size of the problem */
|
|
int signature; /* unique codelet id */
|
|
notw_codelet *codelet; /*
|
|
* pointer to the codelet that solves
|
|
* the problem
|
|
*/
|
|
} config_notw;
|
|
|
|
extern config_notw fftw_config_notw[];
|
|
extern config_notw fftwi_config_notw[];
|
|
|
|
typedef struct {
|
|
int size; /* size of the problem */
|
|
int signature; /* unique codelet id */
|
|
twiddle_codelet *codelet;
|
|
} config_twiddle;
|
|
|
|
extern config_twiddle fftw_config_twiddle[];
|
|
extern config_twiddle fftwi_config_twiddle[];
|
|
|
|
extern generic_codelet fftw_twiddle_generic;
|
|
extern generic_codelet fftwi_twiddle_generic;
|
|
extern char *fftw_version;
|
|
|
|
/*****************************
|
|
* Plans
|
|
*****************************/
|
|
/*
|
|
* A plan is a sequence of reductions to compute a FFT of
|
|
* a given size. At each step, the FFT algorithm can:
|
|
*
|
|
* 1) apply a notw codelet, or
|
|
* 2) recurse and apply a twiddle codelet, or
|
|
* 3) apply the generic codelet.
|
|
*/
|
|
|
|
enum fftw_node_type {
|
|
FFTW_NOTW, FFTW_TWIDDLE, FFTW_GENERIC
|
|
};
|
|
|
|
/* structure that contains twiddle factors */
|
|
typedef struct fftw_twiddle_struct {
|
|
int n;
|
|
int r;
|
|
int m;
|
|
FFTW_COMPLEX *twarray;
|
|
struct fftw_twiddle_struct *next;
|
|
int refcnt;
|
|
} fftw_twiddle;
|
|
|
|
/* structure that holds all the data needed for a given step */
|
|
typedef struct fftw_plan_node_struct {
|
|
enum fftw_node_type type;
|
|
|
|
union {
|
|
/* nodes of type FFTW_NOTW */
|
|
struct {
|
|
int size;
|
|
notw_codelet *codelet;
|
|
} notw;
|
|
|
|
/* nodes of type FFTW_TWIDDLE */
|
|
struct {
|
|
int size;
|
|
twiddle_codelet *codelet;
|
|
fftw_twiddle *tw;
|
|
struct fftw_plan_node_struct *recurse;
|
|
} twiddle;
|
|
|
|
/* nodes of type FFTW_GENERIC */
|
|
struct {
|
|
int size;
|
|
generic_codelet *codelet;
|
|
fftw_twiddle *tw;
|
|
struct fftw_plan_node_struct *recurse;
|
|
} generic;
|
|
|
|
} nodeu;
|
|
|
|
int refcnt;
|
|
} fftw_plan_node;
|
|
|
|
struct fftw_plan_struct {
|
|
int n;
|
|
fftw_direction dir;
|
|
fftw_plan_node *root;
|
|
|
|
double cost;
|
|
int flags;
|
|
|
|
enum fftw_node_type wisdom_type;
|
|
int wisdom_signature;
|
|
|
|
struct fftw_plan_struct *next;
|
|
int refcnt;
|
|
};
|
|
|
|
/* a plan is just an array of instructions */
|
|
typedef struct fftw_plan_struct *fftw_plan;
|
|
|
|
/* flags for the planner */
|
|
#define FFTW_ESTIMATE (0)
|
|
#define FFTW_MEASURE (1)
|
|
|
|
#define FFTW_IN_PLACE (8)
|
|
#define FFTW_USE_WISDOM (16)
|
|
|
|
extern fftw_plan fftw_create_plan(int n, fftw_direction dir, int flags);
|
|
extern fftw_twiddle *fftw_create_twiddle(int n, int r, int m);
|
|
extern void fftw_destroy_twiddle(fftw_twiddle * tw);
|
|
extern void fftw_print_plan(fftw_plan plan);
|
|
extern void fftw_destroy_plan(fftw_plan plan);
|
|
extern void fftw_naive(int n, FFTW_COMPLEX *in, FFTW_COMPLEX *out);
|
|
extern void fftwi_naive(int n, FFTW_COMPLEX *in, FFTW_COMPLEX *out);
|
|
void fftw(fftw_plan plan, int howmany, FFTW_COMPLEX *in, int istride,
|
|
int idist, FFTW_COMPLEX *out, int ostride, int odist);
|
|
extern double fftw_measure_runtime(fftw_plan plan);
|
|
extern void fftw_die(char *s);
|
|
extern void *fftw_malloc(size_t n);
|
|
extern void fftw_free(void *p);
|
|
extern void fftw_check_memory_leaks(void);
|
|
extern void fftw_strided_copy(int, FFTW_COMPLEX *, int, FFTW_COMPLEX *);
|
|
extern void fftw_executor_simple(int, const FFTW_COMPLEX *, FFTW_COMPLEX *,
|
|
fftw_plan_node *, int, int);
|
|
extern void *(*fftw_malloc_hook) (size_t n);
|
|
extern void (*fftw_free_hook) (void *p);
|
|
|
|
/* Wisdom: */
|
|
#define FFTW_HAS_WISDOM /* define this symbol so that we know we are using
|
|
a version of FFTW with wisdom */
|
|
extern void fftw_forget_wisdom(void);
|
|
extern void fftw_export_wisdom(void (*emitter)(char c, void *), void *data);
|
|
extern fftw_status fftw_import_wisdom(int (*g)(void *), void *data);
|
|
extern void fftw_export_wisdom_to_file(FILE *output_file);
|
|
extern fftw_status fftw_import_wisdom_from_file(FILE *input_file);
|
|
extern char *fftw_export_wisdom_to_string(void);
|
|
extern fftw_status fftw_import_wisdom_from_string(const char *input_string);
|
|
|
|
/*
|
|
* define symbol so we know this function is available (it is not in
|
|
* older FFTWs)
|
|
*/
|
|
#define FFTW_HAS_FPRINT_PLAN
|
|
extern void fftw_fprint_plan(FILE * f, fftw_plan plan);
|
|
|
|
/* Returns 1 if FFTW is working. Otherwise, its value is undefined: */
|
|
#define is_fftw_working() 1
|
|
|
|
/*****************************
|
|
* N-dimensional code
|
|
*****************************/
|
|
typedef struct {
|
|
int is_in_place; /* 1 if for in-place FFT's, 0 otherwise */
|
|
int rank; /*
|
|
* the rank (number of dimensions) of the
|
|
* array to be FFT'ed
|
|
*/
|
|
int *n; /*
|
|
* the dimensions of the array to the
|
|
* FFT'ed
|
|
*/
|
|
int *n_before; /*
|
|
* n_before[i] = product of n[j] for j < i
|
|
*/
|
|
int *n_after; /* n_after[i] = product of n[j] for j > i */
|
|
fftw_plan *plans; /* fftw plans for each dimension */
|
|
FFTW_COMPLEX *work; /*
|
|
* work array for FFT when doing
|
|
* "in-place" FFT
|
|
*/
|
|
} fftwnd_aux_data;
|
|
|
|
typedef fftwnd_aux_data *fftwnd_plan;
|
|
|
|
/* Initializing the FFTWND Auxiliary Data */
|
|
fftwnd_plan fftw2d_create_plan(int nx, int ny, fftw_direction dir, int flags);
|
|
fftwnd_plan fftw3d_create_plan(int nx, int ny, int nz,
|
|
fftw_direction dir, int flags);
|
|
fftwnd_plan fftwnd_create_plan(int rank, const int *n, fftw_direction dir,
|
|
int flags);
|
|
|
|
/* Freeing the FFTWND Auxiliary Data */
|
|
void fftwnd_destroy_plan(fftwnd_plan plan);
|
|
|
|
/* Computing the N-Dimensional FFT */
|
|
void fftwnd(fftwnd_plan plan, int howmany,
|
|
FFTW_COMPLEX *in, int istride, int idist,
|
|
FFTW_COMPLEX *out, int ostride, int odist);
|
|
|
|
/****************************************************************************/
|
|
/********************************** Timers **********************************/
|
|
/****************************************************************************/
|
|
|
|
/*
|
|
* Here, you can use all the nice timers available in your machine.
|
|
*/
|
|
|
|
/*
|
|
*
|
|
Things you should define to include your own clock:
|
|
|
|
fftw_time -- the data type used to store a time
|
|
|
|
extern fftw_time fftw_get_time(void);
|
|
-- a function returning the current time. (We have
|
|
implemented this as a macro in most cases.)
|
|
|
|
extern fftw_time fftw_time_diff(fftw_time t1, fftw_time t2);
|
|
-- returns the time difference (t1 - t2).
|
|
If t1 < t2, it may simply return zero (although this
|
|
is not required). (We have implemented this as a macro
|
|
in most cases.)
|
|
|
|
extern double fftw_time_to_sec(fftw_time t);
|
|
-- returns the time t expressed in seconds, as a double.
|
|
(Implemented as a macro in most cases.)
|
|
|
|
FFTW_TIME_MIN -- a double-precision macro holding the minimum
|
|
time interval (in seconds) for accurate time measurements.
|
|
This should probably be at least 100 times the precision of
|
|
your clock (we use even longer intervals, to be conservative).
|
|
This will determine how long the planner takes to measure
|
|
the speeds of different possible plans.
|
|
|
|
Bracket all of your definitions with an appropriate #ifdef so that
|
|
they will be enabled on your machine. If you do add your own
|
|
high-precision timer code, let us know (at fftw@theory.lcs.mit.edu).
|
|
|
|
Only declarations should go in this file. Any function definitions
|
|
that you need should go into timer.c.
|
|
*/
|
|
|
|
/* define a symbol so that we know that we have the fftw_time_diff
|
|
function/macro (it did not exist prior to FFTW 1.2) */
|
|
#define FFTW_HAS_TIME_DIFF
|
|
|
|
#ifdef SOLARIS
|
|
|
|
/* we use the nanosecond virtual timer */
|
|
#include <sys/time.h>
|
|
|
|
typedef hrtime_t fftw_time;
|
|
|
|
#define fftw_get_time() gethrtime()
|
|
#define fftw_time_diff(t1,t2) ((t1) - (t2))
|
|
#define fftw_time_to_sec(t) ((double) t / 1.0e9)
|
|
|
|
/*
|
|
* a measurement is valid if it runs for at least
|
|
* FFTW_TIME_MIN seconds.
|
|
*/
|
|
#define FFTW_TIME_MIN (1.0e-4) /* for Solaris nanosecond timer */
|
|
|
|
#endif /* SOLARIS */
|
|
|
|
#if defined(MAC) || defined(macintosh)
|
|
|
|
/* Use Macintosh Time Manager routines (maximum resolution is about 20
|
|
microseconds). */
|
|
|
|
typedef struct fftw_time_struct {
|
|
unsigned long hi,lo;
|
|
} fftw_time;
|
|
|
|
extern fftw_time get_Mac_microseconds(void);
|
|
|
|
#define fftw_get_time() get_Mac_microseconds()
|
|
|
|
/* define as a function instead of a macro: */
|
|
extern fftw_time fftw_time_diff(fftw_time t1, fftw_time t2);
|
|
|
|
#define fftw_time_to_sec(t) ((t).lo * 1.0e-6 + 4294967295.0e-6 * (t).hi)
|
|
|
|
/* very conservative, since timer should be accurate to 20e-6: */
|
|
/* (although this seems not to be the case in practice) */
|
|
#define FFTW_TIME_MIN (5.0e-2) /* for MacOS Time Manager timer */
|
|
|
|
#endif /* Macintosh */
|
|
|
|
#ifdef __WIN32__
|
|
|
|
#include <time.h>
|
|
|
|
typedef unsigned long fftw_time;
|
|
extern unsigned long GetPerfTime(void);
|
|
extern double GetPerfSec(double ticks);
|
|
|
|
#define fftw_get_time() GetPerfTime()
|
|
#define fftw_time_diff(t1,t2) ((t1) - (t2))
|
|
#define fftw_time_to_sec(t) GetPerfSec(t)
|
|
|
|
#define FFTW_TIME_MIN (5.0e-2) /* for Win32 timer */
|
|
#endif /* __WIN32__ */
|
|
|
|
#if defined(_CRAYMPP) /* Cray MPP system */
|
|
|
|
double SECONDR(void); /*
|
|
* I think you have to link with -lsci to
|
|
* get this
|
|
*/
|
|
|
|
typedef double fftw_time;
|
|
#define fftw_get_time() SECONDR()
|
|
#define fftw_time_diff(t1,t2) ((t1) - (t2))
|
|
#define fftw_time_to_sec(t) (t)
|
|
|
|
#define FFTW_TIME_MIN (1.0e-1) /* for Cray MPP SECONDR timer */
|
|
|
|
#endif /* _CRAYMPP */
|
|
|
|
/***********************************************
|
|
* last resort: good old Unix clock()
|
|
***********************************************/
|
|
#ifndef FFTW_TIME_MIN
|
|
#include <time.h>
|
|
|
|
typedef clock_t fftw_time;
|
|
|
|
#ifndef CLOCKS_PER_SEC
|
|
#ifdef sun
|
|
/* stupid sunos4 prototypes */
|
|
#define CLOCKS_PER_SEC 1000000
|
|
extern long clock(void);
|
|
#else /* not sun, we don't know CLOCKS_PER_SEC */
|
|
#error Please define CLOCKS_PER_SEC
|
|
#endif
|
|
#endif
|
|
|
|
#if defined(__QK_USER__)
|
|
#define fftw_get_time() ((long) (dclock() * 1000000.0L))
|
|
#else
|
|
#define fftw_get_time() clock()
|
|
#endif
|
|
#define fftw_time_diff(t1,t2) ((t1) - (t2))
|
|
#define fftw_time_to_sec(t) (((double) (t)) / CLOCKS_PER_SEC)
|
|
|
|
/*
|
|
* ***VERY*** conservative constant: this says that a
|
|
* measurement must run for 200ms in order to be valid.
|
|
* You had better check the manual of your machine
|
|
* to discover if it can do better than this
|
|
*/
|
|
#define FFTW_TIME_MIN (2.0e-1) /* for default clock() timer */
|
|
|
|
#endif /* UNIX clock() */
|
|
|
|
/****************************************************************************/
|
|
|
|
#ifdef __cplusplus
|
|
} /* extern "C" */
|
|
#endif /* __cplusplus */
|
|
|
|
#endif /* FFTW_H */
|
|
|