Signal-Desktop/ts/util/benchmark/stats.ts

154 lines
3.7 KiB
TypeScript

// Copyright 2023 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
export type StatsType = {
mean: number;
stddev: number;
[key: string]: number;
};
export function stats(
list: ReadonlyArray<number>,
percentiles: ReadonlyArray<number> = []
): StatsType {
if (list.length === 0) {
throw new Error('Empty list given to stats');
}
let mean = 0;
let stddev = 0;
for (const value of list) {
mean += value;
stddev += value ** 2;
}
mean /= list.length;
stddev /= list.length;
stddev -= mean ** 2;
stddev = Math.sqrt(stddev);
const sorted = list.slice().sort((a, b) => a - b);
const result: StatsType = { mean, stddev };
for (const p of percentiles) {
result[`p${p}`] = sorted[Math.floor((sorted.length * p) / 100)];
}
return result;
}
// t-distribution value for various sample count and p=0.001
// https://www.itl.nist.gov/div898/handbook/eda/section3/eda3672.htm
const STUDENT_T = [
318.309, 22.327, 10.215, 7.173, 5.893, 5.208, 4.785, 4.501, 4.297, 4.144,
4.025, 3.93, 3.852, 3.787, 3.733, 3.686, 3.646, 3.61, 3.579, 3.552, 3.527,
3.505, 3.485, 3.467, 3.45, 3.435, 3.421, 3.408, 3.396, 3.385, 3.375, 3.365,
3.356, 3.348, 3.34, 3.333, 3.326, 3.319, 3.313, 3.307, 3.301, 3.296, 3.291,
3.286, 3.281, 3.277, 3.273, 3.269, 3.265, 3.261, 3.258, 3.255, 3.251, 3.248,
3.245, 3.242, 3.239, 3.237, 3.234, 3.232, 3.229, 3.227, 3.225, 3.223, 3.22,
3.218, 3.216, 3.214, 3.213, 3.211, 3.209, 3.207, 3.206, 3.204, 3.202, 3.201,
3.199, 3.198, 3.197, 3.195, 3.194, 3.193, 3.191, 3.19, 3.189, 3.188, 3.187,
3.185, 3.184, 3.183, 3.182, 3.181, 3.18, 3.179, 3.178, 3.177, 3.176, 3.175,
3.175, 3.174,
// Infinity
3.09,
];
export type Sample = Readonly<{
y: number;
x: number;
}>;
export type Regression = Readonly<{
yIntercept: number;
slope: number;
confidence: number;
outliers: number;
severeOutliers: number;
}>;
export function regress(samples: ReadonlyArray<Sample>): Regression {
// Bin the data by iteration count
const bins = new Map<number, Array<number>>();
for (const { x, y } of samples) {
let bin = bins.get(x);
if (bin === undefined) {
bin = [];
bins.set(x, bin);
}
bin.push(y);
}
let outliers = 0;
let severeOutliers = 0;
// Within each iteration bin identify the outliers for reporting purposes.
for (const [, ys] of bins) {
ys.sort();
const p25 = ys[Math.floor(ys.length * 0.25)] ?? -Infinity;
const p75 = ys[Math.ceil(ys.length * 0.75)] ?? +Infinity;
const iqr = p75 - p25;
const outlierLow = p25 - iqr * 1.5;
const outlierHigh = p75 + iqr * 1.5;
const badOutlierLow = p25 - iqr * 3;
const badOutlierHigh = p75 + iqr * 3;
// Tukey's method
for (const d of ys) {
if (d < badOutlierLow || d > badOutlierHigh) {
severeOutliers += 1;
} else if (d < outlierLow || d > outlierHigh) {
outliers += 1;
}
}
}
if (samples.length < 2) {
throw new Error('Low sample count');
}
let meanY = 0;
let meanX = 0;
for (const { y, x } of samples) {
meanY += y;
meanX += x;
}
meanY /= samples.length;
meanX /= samples.length;
let slopeNum = 0;
let slopeDenom = 0;
for (const { y, x } of samples) {
slopeNum += (y - meanY) * (x - meanX);
slopeDenom += (x - meanX) ** 2;
}
// Slope
const slope = slopeNum / slopeDenom;
// Y-Intercept
const yIntercept = meanY - slope * meanX;
let stdError = 0;
for (const { y, x } of samples) {
stdError += (y - yIntercept - slope * x) ** 2;
}
stdError /= samples.length - 2;
stdError /= slopeDenom;
stdError = Math.sqrt(stdError);
return {
yIntercept,
slope,
confidence:
STUDENT_T[Math.min(samples.length, STUDENT_T.length - 1)] * stdError,
outliers,
severeOutliers,
};
}