Pythonでサンプルサイズと検出力

ABテストで使うやつ

from __future__ import annotations

import math
from typing import Tuple

import pandas as pd
from scipy.stats import chi2_contingency
from statsmodels.stats import power
from statsmodels.stats.gof import chisquare_effectsize


def calc_power(
    df: pd.DataFrame,
    alpha: float = 0.05
) -> Tuple[int, float, float, float]:
    """
    df のサンプル
    df = pd.DataFrame({
        'total': [1000, 900],
        'cv': [50, 70],
    }, index=['A', 'B'])
    """

    if not 0 < alpha < 1:
        raise ValueError()

    N = df['total']['A'] + df['total']['B']
    o11 = df['total']['A'] - df['cv']['A']
    o12 = df['total']['B'] - df['cv']['B']
    o21 = df['cv']['A']
    o22 = df['cv']['B']
    chi2, pvalue, _, _ = chi2_contingency(
        [[o11, o12],
         [o21, o22]],
        correction=False
    )
    effect_size = math.sqrt(chi2 / N)
    resp_power = power.GofChisquarePower().power(
        effect_size, nobs=N, n_bins=2, alpha=alpha
    )
    return N, pvalue, effect_size, resp_power


def calc_sample_size(
        a_ratio: float,
        orig: float,
        kaizen: float,
        alpha: float = 0.05,
        pw: float = 0.8
) -> int:
    """
    例
    a_ratio = 0.6   # A:B = 60:40
    orig = 0.03     # AパターンのCVR 3%
    kaizen = 1.05   # 目標とする改善率 105%
    """

    if not 0 < a_ratio < 1:
        raise ValueError()

    p11 = a_ratio * orig
    p12 = a_ratio * (1 - orig)
    p21 = (1 - a_ratio) * (orig * kaizen)
    p22 = (1 - a_ratio) * (1 - orig * kaizen)

    pe11 = a_ratio * (p11 + p21)
    pe12 = a_ratio * (p12 + p22)
    pe21 = (1 - a_ratio) * (p11 + p21)
    pe22 = (1 - a_ratio) * (p12 + p22)

    effect_size = chisquare_effectsize(
        [pe11, pe12, pe21, pe22],
        [p11, p12, p21, p22]
    )

    resn_N = power.GofChisquarePower().solve_power(
        effect_size=effect_size, n_bins=2, alpha=alpha, power=pw
    )

    return math.ceil(resn_N)

使用例

def calc_power_sample():

    df = pd.DataFrame({
        'total': [1000, 900],
        'cv': [50, 70],
    }, index=['A', 'B'])

    N, pvalue, effect_size, resp_power = calc_power(df)

    print('df:')
    print(df)
    print('-->')
    print(f'N = {N:,}')
    print(f'p-value = {pvalue:.4f}')
    print(f'effect size = {effect_size:.4f}')
    print(f'power = {resp_power:.4f}')


def calc_sample_size_sample():

    a_ratio = 0.6   # A:B = 60:40
    orig = 0.03     # AパターンのCVR 3%
    kaizen = 1.05   # 目標とする改善率 105%

    N = calc_sample_size(a_ratio, orig, kaizen)

    print(f'ratio of original pattern = {a_ratio:.2f}')
    print(f'original CVR = {100 * orig:.1f} %')
    print(f'expected improvement rate = {100 * kaizen:.1f} %')
    print(f'alpha = {0.05:.3f}')
    print(f'power = {0.8:.2f}')
    print('-->')
    print(f'N = {N:,}')


if __name__ == '__main__':
    calc_power_sample()
    # df:
    #    total  cv
    # A   1000  50
    # B    900  70
    # -->
    # N = 1,900
    # p-value = 0.0129
    # effect size = 0.0570
    
    calc_sample_size_sample()
    # ratio of original pattern = 0.60
    # original CVR = 3.0 %
    # expected improvement rate = 105.0 %
    # alpha = 0.050
    # -->
    # N = 431,159

サンプルサイズの決め方をガチで知りたい方は以下の本をどうぞ。

コメント

タイトルとURLをコピーしました