This will store the result in value. Combining the results takes extra cycles, so the number of cycles between calls to this code will be a few less than the difference in results.
unsigned int hi,lo;
unsigned long long value;
asm (
"cpuid\n\t"
"rdtsc"
: "d" (hi), "a" (lo)
);
value = (((unsigned long long)hi) << 32) | lo;