import random
import math

'''class Instance is complete
	You can use self.label: label=1 ==> +; label=0 ==> -
	self.fts: mapping a t c g TO 0,1,2,3. For example self.fts[3]=2 means 3rd feature is 'c'
	self.uniqeId: every instance will have an uniqe Id;
	'''
class Instance:
	FTSVALUERANGE=4
	def __init__(self,line,id):
		self.fts=[]
		self.uniqeId=id
		temp=line.split()
		if(temp[1]=='+'):
			self.label=1
		else:
			self.label=0
		charfts = temp[0]
		for i in range(len(charfts)):
			if(charfts[i]=='a'):
				self.fts.append(0)
			elif(charfts[i]=='t'):
				self.fts.append(1)
			elif(charfts[i]=='c'):
				self.fts.append(2)
			else:
				self.fts.append(3)
	def selfprint(self):
		print self.label,self.fts,'\n'
	
class Stump:
	'''You can use self.root: feature to test in the root node
	self.leaflabel: branches of the root; leaflabel[i]=1 indicates the class label of  the leaf i is +'''
	def learn(self, trainInstances):
		NUMFTS=len(trainInstances[0].fts)
		self.leaflabel=[None]*Instance.FTSVALUERANGE #dummy value
		#TODO: finish this function; you should change

						
	def classify(self,testInstances):
		'''
		 Apply the stump on testInstances, return a list of predictions.
		 Note: testInstances and predictions should have the same length!
		'''
		predictions = []
		#TODO: finish this function
		return predictions
			
		
def sampleByUniform(instances,L):
	'''		
	resample L instances with replacement from originalInstances
	uniformly. Note: (1) the length of resampledInstance should be L; (2)
	every instance of originalInstances, including head and tail, should
	have equal odd to be chosen.
	'''
	resampledInstance = []
	#TODO: finish this function
	return resampledInstance

def sampleByWeight(instances, p, L):
	'''
	resample L instances with replacement from originalInstances
	following distribution p. Note: p is a distribution so make sure when
	you call this function, sum(p)=1.
	'''
	resampledInstance=[]
	#TODO: finish this function
	return resampledInstance
	

def baggingClassify(trainInstances, testInstances, L, M):
	'''
	bagging classifier: your job is (1) generating M new training sets,
	each with L instances sampled uniformly with replacement from
	trainInstances; (2) training M stumps with these M training sets; (3)
	majority vote
	'''
	predictions = []
	#TODO finish this function
	return predictions

def computeAccuracy(predictions, testInstances):
#This function is complete
	if len(predictions)!= len(testInstances):
		return 0
	else:
		right=0
		wrong=0
		for i in range(len(predictions)):
			if(predictions[i]==testInstances[i].label):
				right+=1
			else:
				wrong+=1
		return right*1.0/(right+wrong)
	

#	adaboostClassify: your job is to follow the pseudo code to finish
#	adaboosting. Remember in Part B you will use ID3 as the base learner.
#	So try to keep your code flexible when you are doing partA.
	
def adaboostClassify(trainInstances, testInstances, L,K):
	predictions = []
	#TODO: funish this function
	return predictions		
	
	
def load(trainfile,testfile,trainInstances,testInstances):
#This function is complete
	f=open(trainfile,'r')
	a=f.readlines()
	uniqId=0
	for i in range(len(a)):
		ins = Instance(a[i],uniqId)
		trainInstances.append(ins)
		uniqId = uniqId+1
	f=open(testfile,'r')
	a = f.readlines()
	for i in range(len(a)):
		ins = Instance(a[i],uniqId)
		testInstances.append(ins)
		uniqId = uniqId+1

def main():
	trainInstances=[]
	testInstances=[]
	load('training.txt','test.txt',trainInstances,testInstances)
	print 'Number of Train/test instances',len(trainInstances),len(testInstances)
	s = Stump()
	s.learn(trainInstances)
	stump_train_predictions = s.classify(trainInstances)
	print 'Stump accuracy on training', computeAccuracy(stump_train_predictions,trainInstances)
	stump_test_predictions = s.classify(testInstances)
	print 'Stump accuracy on testing', computeAccuracy(stump_test_predictions,testInstances)
	bagging_train_predictions = baggingClassify(trainInstances,trainInstances,71,100)
	print 'Bagging accuracy on training', computeAccuracy(bagging_train_predictions,trainInstances)
	bagging_test_predictions = baggingClassify(trainInstances,testInstances,71,100)
	print 'Bagging accuracy on testing', computeAccuracy(bagging_test_predictions,testInstances)
	adaboost_train_predictions = adaboostClassify(trainInstances,trainInstances,71,100)
	print 'Adaboost accuracy on training', computeAccuracy(adaboost_train_predictions,trainInstances)
	adaboost_test_predictions = adaboostClassify(trainInstances,testInstances,71,100)
	print 'Adaboost accuracy on testing', computeAccuracy(adaboost_test_predictions,testInstances)	

#Usage: put training.txt & test.txt in the same directory
#python ensemble.py
main()