// Copyright 2007 Steven Gribble #include #include #include #include #include #include #include #include // only works on pentium+ x86 // access the pentium cycle counter // This routing lifted from somewhere on the Web void access_counter(unsigned int *hi, unsigned int *lo) { asm("rdtsc; movl %%edx,%0; movl %%eax,%1" /* Read cycle counter */ : "=r" (*hi), "=r" (*lo) /* and move results to */ : /* No input */ /* the two outputs */ : "%edx", "%eax"); } #define FILE1 "/tmp/big1.bin" #define FILE2 "/var/tmp/big2.bin" void create_big_files() { pid_t res; printf("Creating files...\n"); res = fork(); if (res == 0) { // am child; create the files char cmd1[1024]; char cmd2[1024]; char cmd3[1024]; char cmd[2048]; // create two big files in different directories. Need to be less // than 2GB, since some versions of libc on Linux don't let you // open/seek with files that are bigger than 2GB. // // once they are created, cat the first file. This will flush // the file system buffer cache of the second file. So, once // the cat is done, we're ready to seek around in the second file // without fear of hitting in the buffer cache sprintf(cmd1, "dd if=/dev/urandom of=%s bs=1024 count=1997152", FILE1); sprintf(cmd2, "dd if=/dev/urandom of=%s bs=1024 count=1997152", FILE2); sprintf(cmd3, "cat %s > /dev/null", FILE1); sprintf(cmd, "%s;%s;%s", cmd1, cmd2, cmd3); execl("/bin/bash", "bash", "-c", cmd); } else if (res > 0) { // am parent; wait for child to exit int status; while (wait(&status) != res) { } printf(" done.\n"); return; } else { // error fprintf(stderr, "fork failed!\n"); exit(-1); } } // measure the system call using the cycle counter. measures the // difference in time between doing two system calls and doing // one system call, to try to factor out any measurement overhead void measure_cyclecounter(float mhz) { unsigned int high_s, low_s, high_e, low_e; float latency; int big2; char bytes[4]; // create the files create_big_files(); // open big2 big2 = open(FILE2, O_RDONLY); if (big2 <= 0) { fprintf(stderr, "Open failed!\n"); exit(-1); } // warm things up. Try to get the inode indirect entries for // the blocks we want to seek/measure in the cache, but seeking // just past the blocks and reading. Seeking just past will // bring the right inodes and indirection blocks into the buffer // cache, and if the OS does prefetching readahead, reading past // our measurement blocks makes sure the prefetch doesn't bring in // the measurement blocks themselves. lseek(big2, (off_t) 1024*1024*201, SEEK_SET); read(big2, bytes, 1); lseek(big2, (off_t) 1024*1024*1801, SEEK_SET); read(big2, bytes, 1); // probably won't make a difference given seek latency relative // to the latency of this function call, but warm up anything relative // to grabbing a cycle counter access_counter(&high_s, &low_s); // ok -- time to do the measurement itself. We'll do two seeks/reads. access_counter(&high_s, &low_s); lseek(big2, (off_t) 1024*1024*1800, SEEK_SET); read(big2, bytes, 1); lseek(big2, (off_t) 200*1024*1024, SEEK_SET); read(big2, bytes, 1); access_counter(&high_e, &low_e); latency = ((float) (low_e - low_s) / mhz); // divide by two, to get the time for one seek/read latency = latency / 2; // print out the results printf("(cyclecounter) latency: %f microseconds\n", latency); // delete the files close(big2); unlink(FILE1); unlink(FILE2); } void usage(void) { fprintf(stderr, "usage: measure_seek cpu_mhz\n"); fprintf(stderr, " e.g., measure_seek 2791.375\n"); exit(-1); } int main(int argc, char **argv) { float mhz; if (argc < 2) usage(); if (sscanf(argv[1], "%f", &mhz) != 1) usage(); if ((mhz < 100.0) || (mhz > 100000.0)) usage(); // measure usiing the cycle counter measure_cyclecounter(mhz); return 0; }