/* Simple EM demo for fitting a mixture of two or more classes to data in two Boolean variables. Use: Use: emdemo seed #iterations #classes #vars #...00 #...01 #...10 ... where seed = integer used to initialize random number generator #iterations = how many EM iterations to run The #...00 etc. arguments are the data itself. E.g. for two variables: #00 = how many data elements are <0, 0> #01 = how many data elements are <0, 1> #10 = how many data elements are <1, 0> #11 = how many data elements are <1, 1> Assumes a naive Bayes model: Class / \ var0 var1 Uses EM to estimate parameters of Bayesian model: - Probability of choice of class c0 or c1 P(C) - Probability tables for var0 and var1 conditioned on choice of class P(V0 | C) P(V1 | C) After each iteration prints the estimated probability of class given a data element P(C | V0 V1 ... Vn), the new parameters of the model, and the log likelihood of the data. */ #include #include #include #define MAX_NUMVARS 6 #define MAX_NUMCLASSES 10 #define MAX_NUMCOMBOS (1<0) { if ((x>>(width-1))&1) *(p++)='1'; else *(p++)='0'; width--; } *p = 0; return s; } void emdemo(void) { int c,v,t,i,r,b,combo; float sum, prod; char buf[65]; /* Randomly initialize prior and cpt */ for (c=0;c>v) & 1]; } normalize(numclasses, data_class[combo]); } printf("Iteration %d\n", i); printf(" E-step\n"); for (combo=0;combo>v)&1] += data[combo]*data_class[combo][c]; normalize(2, cpt[v][c]); } } printf(" M-step\n"); for (c=0;c>v)&1]; sum += prod; } logmarginal[combo] = log(sum); } loglike = 0.0; for (combo=0; combo<4; combo++) loglike += data[combo] * logmarginal[combo]; printf(" Log likelihood = %8.1f\n", loglike); printf("\n"); } } int main(int argc, char** argv) { int i; if (argc < 7){ fprintf(stderr, "Use: emdemo seed #iterations #classes #vars #...00 #...01 #...10 ...\n"); exit(1); } seed = atoi(argv[1]); srand(seed); numit = atoi(argv[2]); numclasses = atoi(argv[3]); numvars = atoi(argv[4]); numcombos = 1 << numvars; if (argc != numcombos + 5) { fprintf(stderr, "Bad arguments\n"); exit(1); } for (i=0;i