Naive Bayesian Classifier Implementtion In Python

I have implemented naive Bayesian classifier in Python. I ran my code using 1000,2000 and even 5000 tuples, but it is always giving 100% accuracy. Now I am doubtful about the implementation. Is it normal or i have done something wrong in my code. It is a 2-class classifier.

class_names=my_data[target].unique()
prob_class=[]


for clas in class_names:
    prob_class.append(float(len(my_data[my_data[target]==clas]))/float(len(my_data)))


def bayes_classify(x,my_train_data):
    max_prob=-1
    best_class=class_names[0]
    ind=0
    col_names=my_train_data.column_names()
    for clas in class_names:
        prob=prob_class[ind]
        pure_data=my_train_data[my_train_data[target]==clas]
        for col in col_names:
            if(my_train_data[col].dtype()==str):
                prob=prob*(float(len(pure_data[pure_data[col]==x[col]]))/float(len(pure_data)))
            else:    
                mean=pure_data[col].mean()
                stdev=pure_data[col].std()
                stdev=stdev+0.1
                exponent = math.exp(-(math.pow(x[col]-mean,2)/(2*math.pow(stdev,2))))
                prob=prob*(1 / (math.sqrt(2*math.pi)) * stdev) * exponent

        if(prob>max_prob):
            max_prob=prob
            best_class=clas
        ind=ind+1
    return best_class


def evaluate_bayes_classifier(my_test_data,my_train_data):
    prediction = my_test_data.apply(lambda x: bayes_classify(x,my_train_data))
    my_test_data['pred']=prediction
    target_values=my_test_data[target]
    mistakes=my_test_data.apply(lambda x: 1 if(x[target]!=x['pred']) else 0)
    cla_err=mistakes.sum()/float (len(mistakes))
    my_test_data.remove_column('pred')
    return target_values,prediction,cla_err



my_train_data, my_test_data = my_data.random_split(.8, seed=1)
targets,predictions,error=evaluate_bayes_classifier(my_test_data,my_train_data)
accuracy=(1-error)*100

This topic was automatically closed 91 days after the last reply. New replies are no longer allowed.