View paste O4PQ

New paste Repaste Download

	`#!/usr/bin/env pypy3`

	`# TASK: Look for gross error in given data use q-dixon and grubbs tests, compare results`

	`import math`

	`# [95% confidence] Critical Q values for Dixon's Q test`
	`Q95 = {`
	`3: 0.970, 4: 0.829, 5: 0.710, 6: 0.625, 7: 0.568,`
	`8: 0.526, 9: 0.493, 10: 0.466, 11: 0.444, 12: 0.426,`
	`13: 0.410, 14: 0.396, 15: 0.384, 16: 0.374, 17: 0.365,`
	`18: 0.356, 19: 0.349, 20: 0.342, 21: 0.337, 22: 0.331,`
	`23: 0.326, 24: 0.321, 25: 0.317, 26: 0.312, 27: 0.308,`
	`28: 0.305, 29: 0.301, 30: 0.290`
	`}`

	`# [95% confidence] t-distribution critical values`
	`GRUBBS_CRITICAL = {`
	`3: 1.155, 4: 1.481, 5: 1.715, 6: 1.887, 7: 2.020, 8: 2.126,`
	`9: 2.215, 10: 2.290, 11: 2.355, 12: 2.412, 13: 2.462, 14: 2.507,`
	`15: 2.549, 16: 2.585, 17: 2.620, 18: 2.651, 19: 2.681, 20: 2.709,`
	`21: 2.733, 22: 2.758, 23: 2.781, 24: 2.802, 25: 2.822, 26: 2.841,`
	`27: 2.859, 28: 2.876, 29: 2.893, 30: 2.908, 31: 2.924, 32: 2.938,`
	`33: 2.952, 34: 2.965, 35: 2.979, 36: 2.991, 37: 3.003, 38: 3.014,`
	`39: 3.025, 40: 3.036`
	`}`

	`def dixon_test(data, left=True, right=True, q_dict=Q95):`
	`"""`
	`Keyword arguments:`
	`data = A ordered or unordered list of data points (int or float).`
	`left = Q-test of minimum value in the ordered list if True.`
	`right = Q-test of maximum value in the ordered list if True.`
	`q_dict = A dictionary of Q-values for a given confidence level,`
	`where the dict. keys are sample sizes N, and the associated values`
	`are the corresponding critical Q values. E.g.,`
	`{3: 0.97, 4: 0.829, 5: 0.71, 6: 0.625, ...}`

	`Returns a list of 2 values for the outliers, or None.`
	`E.g.,`
	`for [1,1,1] -> [None, None]`
	`for [5,1,1] -> [None, 5]`
	`for [5,1,5] -> [1, None]`

	`"""`
	assert(left or right), 'At least one of the variables, `left` or `right`, must be True.'
	`assert(len(data) >= 3), 'At least 3 data points are required'`
	`assert(len(data) <= max(q_dict.keys())), 'Sample size too large'`

	`sdata = sorted(data)`
	`Q_mindiff, Q_maxdiff = (0,0), (0,0)`

	`if left:`
	`Q_min = (sdata[1] - sdata[0])`
	`try:`
	`Q_min /= (sdata[-1] - sdata[0])`
	`except ZeroDivisionError:`
	`pass`
	`Q_mindiff = (Q_min - q_dict[len(data)], sdata[0])`

	`if right:`
	`Q_max = abs((sdata[-2] - sdata[-1]))`
	`try:`
	`Q_max /= abs((sdata[0] - sdata[-1]))`
	`except ZeroDivisionError:`
	`pass`
	`Q_maxdiff = (Q_max - q_dict[len(data)], sdata[-1])`

	`if not Q_mindiff[0] > 0 and not Q_maxdiff[0] > 0:`
	`outliers = [None, None]`

	`elif Q_mindiff[0] == Q_maxdiff[0]:`
	`outliers = [Q_mindiff[1], Q_maxdiff[1]]`

	`elif Q_mindiff[0] > Q_maxdiff[0]:`
	`outliers = [Q_mindiff[1], None]`

	`else:`
	`outliers = [None, Q_maxdiff[1]]`

	`return outliers`


	`def grubbs_test(data: list[float], alpha=0.05):`
	`"""`
	`Grubbs' test (ESD method) for detecting a single outlier.`

	`Keyword arguments:`
	`data = A list of data points (int or float).`
	`alpha = Significance level (default 0.05 for 95% confidence).`

	`Returns a tuple: (is_outlier, outlier_value, G_statistic, G_critical)`
	`"""`
	`assert len(data) >= 3, 'Grubbs test requires at least 3 data points'`
	`assert len(data) <= max(GRUBBS_CRITICAL.keys()), 'Sample size too large'`

	`n = len(data)`
	`mean = sum(data) / n`

	`std_dev = math.sqrt(sum((x - mean) ** 2 for x in data) / (n - 1))`

	`if std_dev == 0:`
	`return (False, None, 0, 0)`

	`g_values = [(abs(x - mean) / std_dev, x) for x in data]`
	`g_max, extreme_value = max(g_values)`

	`g_critical = GRUBBS_CRITICAL[n]`

	`is_outlier = g_max > g_critical`

	`return (is_outlier, extreme_value if is_outlier else None, g_max, g_critical)`

	`def solve(data: list[float]):`
	`print(f"Data: {data}")`

	`mean = sum(data) / len(data)`
	`std_dev = math.sqrt(sum((x - mean) ** 2 for x in data) / (len(data) - 1))`
	`print(f"Mean: {mean:.4f}")`
	`print(f"Standard deviation: {std_dev:.4f}\n")`

	`# Grubbs`
	`print("=" * 60)`
	`print("GRUBBS' TEST")`
	`print("=" * 60)`
	`is_outlier, outlier_value, g_stat, g_crit = grubbs_test(data)`
	`print(f"G statistic: {g_stat:.4f}")`
	`print(f"G critical (α=0.05): {g_crit:.4f}")`
	`print(f"Most extreme value: {outlier_value if outlier_value else 'N/A'}")`
	`print(f"Is outlier? {is_outlier}")`
	`if is_outlier:`
	`print(f"Detected outlier: {outlier_value}")`
	`else:`
	`print("No outlier detected")`

	`# Dixon`
	`print("\n" + "=" * 60)`
	`print("DIXON'S Q TEST")`
	`print("=" * 60)`
	`dixon_outliers = dixon_test(data)`
	`print(f"Q critical (α=0.05, n={len(data)}): {Q95[len(data)]:.4f}")`
	`print(f"Left outlier (minimum): {dixon_outliers[0] if dixon_outliers[0] else 'None'}")`
	`print(f"Right outlier (maximum): {dixon_outliers[1] if dixon_outliers[1] else 'None'}")`

	`print("\n" + "=" * 60)`
	`print("Comparison")`
	`print("=" * 60)`

	`grubbs_outliers = {outlier_value} if is_outlier else set()`
	`q_outliers = {x for x in dixon_outliers if x is not None}`

	`print(f"Grubbs outlier: {grubbs_outliers if grubbs_outliers else 'None'}")`
	`print(f"Dixon outliers: {q_outliers if q_outliers else 'None'}")`

	`if grubbs_outliers == q_outliers:`
	`print("\nBoth agree! ^^")`
	`elif len(grubbs_outliers & q_outliers) > 0:`
	`print(f"\nPartially agree on: {grubbs_outliers & q_outliers}")`
	`else:`
	`print("\nTests disagree:")`
	`if grubbs_outliers:`
	`print(f" Only Grubbs detected: {grubbs_outliers}")`
	`if q_outliers - grubbs_outliers:`
	`print(f" Only Dixon detected: {q_outliers - grubbs_outliers}")`

	`return {"grubbs": (is_outlier, outlier_value, g_stat, g_crit), "dixon": dixon_outliers}`


	`if __name__ == "__main__":`
	`data = [0.51, 0.49, 0.45, 0.44, 0.45, 0.50, 0.57, 0.47, 0.45, 0.48]`
	`solve(data)`

#!/usr/bin/env pypy3

# TASK: Look for gross error in given data use q-dixon and grubbs tests, compare results

import math

# [95% confidence] Critical Q values for Dixon's Q test 
Q95 = {
    3: 0.970, 4: 0.829, 5: 0.710, 6: 0.625, 7: 0.568,
    8: 0.526, 9: 0.493, 10: 0.466, 11: 0.444, 12: 0.426,
    13: 0.410, 14: 0.396, 15: 0.384, 16: 0.374, 17: 0.365,
    18: 0.356, 19: 0.349, 20: 0.342, 21: 0.337, 22: 0.331,
    23: 0.326, 24: 0.321, 25: 0.317, 26: 0.312, 27: 0.308,
    28: 0.305, 29: 0.301, 30: 0.290
}

# [95% confidence] t-distribution critical values
GRUBBS_CRITICAL = {
    3: 1.155, 4: 1.481, 5: 1.715, 6: 1.887, 7: 2.020, 8: 2.126,
    9: 2.215, 10: 2.290, 11: 2.355, 12: 2.412, 13: 2.462, 14: 2.507,
    15: 2.549, 16: 2.585, 17: 2.620, 18: 2.651, 19: 2.681, 20: 2.709,
    21: 2.733, 22: 2.758, 23: 2.781, 24: 2.802, 25: 2.822, 26: 2.841,
    27: 2.859, 28: 2.876, 29: 2.893, 30: 2.908, 31: 2.924, 32: 2.938,
    33: 2.952, 34: 2.965, 35: 2.979, 36: 2.991, 37: 3.003, 38: 3.014,
    39: 3.025, 40: 3.036
}

def dixon_test(data, left=True, right=True, q_dict=Q95):
    """
    Keyword arguments:
        data = A ordered or unordered list of data points (int or float).
        left = Q-test of minimum value in the ordered list if True.
        right = Q-test of maximum value in the ordered list if True.
        q_dict = A dictionary of Q-values for a given confidence level,
            where the dict. keys are sample sizes N, and the associated values
            are the corresponding critical Q values. E.g.,
            {3: 0.97, 4: 0.829, 5: 0.71, 6: 0.625, ...}

Returns a list of 2 values for the outliers, or None.
    E.g.,
       for [1,1,1] -> [None, None]
       for [5,1,1] -> [None, 5]
       for [5,1,5] -> [1, None]

"""
    assert(left or right), 'At least one of the variables, `left` or `right`, must be True.'
    assert(len(data) >= 3), 'At least 3 data points are required'
    assert(len(data) <= max(q_dict.keys())), 'Sample size too large'

sdata = sorted(data)
    Q_mindiff, Q_maxdiff = (0,0), (0,0)

if left:
        Q_min = (sdata[1] - sdata[0])
        try:
            Q_min /= (sdata[-1] - sdata[0])
        except ZeroDivisionError:
            pass
        Q_mindiff = (Q_min - q_dict[len(data)], sdata[0])

if right:
        Q_max = abs((sdata[-2] - sdata[-1]))
        try:
            Q_max /= abs((sdata[0] - sdata[-1]))
        except ZeroDivisionError:
            pass
        Q_maxdiff = (Q_max - q_dict[len(data)], sdata[-1])

if not Q_mindiff[0] > 0 and not Q_maxdiff[0] > 0:
        outliers = [None, None]

elif Q_mindiff[0] == Q_maxdiff[0]:
        outliers = [Q_mindiff[1], Q_maxdiff[1]]

elif Q_mindiff[0] > Q_maxdiff[0]:
        outliers = [Q_mindiff[1], None]

else:
        outliers = [None, Q_maxdiff[1]]

return outliers

def grubbs_test(data: list[float], alpha=0.05):
    """
    Grubbs' test (ESD method) for detecting a single outlier.

Keyword arguments:
        data = A list of data points (int or float).
        alpha = Significance level (default 0.05 for 95% confidence).

Returns a tuple: (is_outlier, outlier_value, G_statistic, G_critical)
    """
    assert len(data) >= 3, 'Grubbs test requires at least 3 data points'
    assert len(data) <= max(GRUBBS_CRITICAL.keys()), 'Sample size too large'

n = len(data)
    mean = sum(data) / n

std_dev = math.sqrt(sum((x - mean) ** 2 for x in data) / (n - 1))

if std_dev == 0:
        return (False, None, 0, 0)

g_values = [(abs(x - mean) / std_dev, x) for x in data]
    g_max, extreme_value = max(g_values)

g_critical = GRUBBS_CRITICAL[n]

is_outlier = g_max > g_critical

return (is_outlier, extreme_value if is_outlier else None, g_max, g_critical)

def solve(data: list[float]):
    print(f"Data: {data}")

mean = sum(data) / len(data)
    std_dev = math.sqrt(sum((x - mean) ** 2 for x in data) / (len(data) - 1))
    print(f"Mean: {mean:.4f}")
    print(f"Standard deviation: {std_dev:.4f}\n")

# Grubbs 
    print("=" * 60)
    print("GRUBBS' TEST")
    print("=" * 60)
    is_outlier, outlier_value, g_stat, g_crit = grubbs_test(data)
    print(f"G statistic: {g_stat:.4f}")
    print(f"G critical (α=0.05): {g_crit:.4f}")
    print(f"Most extreme value: {outlier_value if outlier_value else 'N/A'}")
    print(f"Is outlier? {is_outlier}")
    if is_outlier:
        print(f"Detected outlier: {outlier_value}")
    else:
        print("No outlier detected")

# Dixon
    print("\n" + "=" * 60)
    print("DIXON'S Q TEST")
    print("=" * 60)
    dixon_outliers = dixon_test(data)
    print(f"Q critical (α=0.05, n={len(data)}): {Q95[len(data)]:.4f}")
    print(f"Left outlier (minimum): {dixon_outliers[0] if dixon_outliers[0] else 'None'}")
    print(f"Right outlier (maximum): {dixon_outliers[1] if dixon_outliers[1] else 'None'}")

print("\n" + "=" * 60)
    print("Comparison")
    print("=" * 60)

grubbs_outliers = {outlier_value} if is_outlier else set()
    q_outliers = {x for x in dixon_outliers if x is not None}

print(f"Grubbs outlier: {grubbs_outliers if grubbs_outliers else 'None'}")
    print(f"Dixon outliers: {q_outliers if q_outliers else 'None'}")

if grubbs_outliers == q_outliers:
        print("\nBoth agree! ^^")
    elif len(grubbs_outliers & q_outliers) > 0:
        print(f"\nPartially agree on: {grubbs_outliers & q_outliers}")
    else:
        print("\nTests disagree:")
        if grubbs_outliers:
            print(f"  Only Grubbs detected: {grubbs_outliers}")
        if q_outliers - grubbs_outliers:
            print(f"  Only Dixon detected: {q_outliers - grubbs_outliers}")

return {"grubbs": (is_outlier, outlier_value, g_stat, g_crit), "dixon": dixon_outliers}

if __name__ == "__main__":
    data = [0.51, 0.49, 0.45, 0.44, 0.45, 0.50, 0.57, 0.47, 0.45, 0.48]
    solve(data)

Filename: main.py. Size: 6kb. View raw, , hex, or download this file.

This paste expires on 2026-01-11 11:12:11.437902+00:00. Pasted through web.

Are you sure you'd like to remove this file?