
"""
required arguments: data file path
#example:python assign1.py C:/home/a-b-testing.csv
"""
from numpy import *
from scipy.stats import *
import os
import sys

def main() :
    if len(sys.argv) < 2:
        sys.exit("no file path")
    
    fn = sys.argv[1]
    
    if os.path.exists(fn):
        data=genfromtxt(os.path.abspath(fn), dtype=[('Optimization','S6'),('CPV','f8')], skip_header=1, delimiter=",")
        
        tech=unique(data['Optimization'])
        
        muA=mean(data['CPV'][data['Optimization']==tech[0]])
        muB=mean(data['CPV'][data['Optimization']==tech[1]])
        
        nA=sum(data['Optimization']==tech[0])
        nB=sum(data['Optimization']==tech[1])
        
        nA=float(nA)
        nB=float(nB)
        
        kappa=nA/nB
        
        alpha=0.05
        beta=0.10
        
        sd=sqrt(((nA-1)*var(data['CPV'][data['Optimization']==tech[0]])+(nB-1)*var(data['CPV'][data['Optimization']==tech[1]]))/(nA+nB-2))
        z=abs(muA-muB)/(sd*sqrt((1+1/kappa)/nB))
        
        #p-value
        p=2*(1-t.cdf(z,nA+nB-2))
        
        #statistical power
        Power=t.cdf(z-t.ppf(1-alpha/2,nA+nB-2),nA+nB-2)+t.cdf(-z-t.ppf(1-alpha/2,nA+nB-2),nA+nB-2)
        
        #sample size
        a_nB=ceil((1+1/kappa)*pow(sd*(norm.ppf(1-alpha/2)+norm.ppf(1-beta))/(muA-muB),2))
        a_nA=ceil(kappa*a_nB)
        
        #95% confidence interval
        lower=-t.ppf(1-alpha/2,nA+nB-2)*(sd*sqrt((1+1/kappa)/nB))+(muA-muB)
        upper=t.ppf(1-alpha/2,nA+nB-2)*(sd*sqrt((1+1/kappa)/nB))+(muA-muB)
        
        #output
        print('Two Sample t-test')
        print('data: {0} vs. {1}'.format(str(tech[0]),str(tech[1])))
        print('mean of {0}: {1}'.format(str(tech[0]),muA))
        print('mean of {0}: {1}'.format(str(tech[1]),muB))
        print('p-value: {}'.format(p))
        print('statistical power: {}'.format(Power))
        print('actual sample size: {0} {1} and {2} {3}'.format(str(tech[0]),nA,str(tech[1]),nB))
        print('required approximate sample size for type I. error of 0.05 and type II. error of 0.1: {0} {1} and {2} {3}'.format(str(tech[0]),a_nA,str(tech[1]),a_nB))
        print('alternative hypothesis: true difference in means is not equal to 0')
        print('95 percent confidence interval: ({0},{1})'.format(lower,upper))
    else:
        print('no such file')
        
main()
    
    