I have seen ctk.c obfuscated code, but How can I start to de-obfuscate it?
#include
#include
#include
#include
Using:
sed -e'/#include/d' ctk.c | gcc -E - | sed -e's/;/;\n/g' -e's/}/}\n/g' -e '/^#/d' | indent
I was able to generate the following output which while not perfect already seems to be readable a lot better:
char x[] = "((((((((((((((((((((((", w[] =
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";
char r[] = { 92, 124, 47 }
, l[] =
{
2, 3, 1, 0}
;
char *T[] = { " |", " |", "%\\|/%", " %%%", "" }
;
char d = 1, p = 40, o = 40, k = 0, *a, y, z, g = -1, G, X, **P = &T[4], f = 0;
unsigned int s = 0;
void
u (int i)
{
int n;
printf ("\233;
%uH\233L%c\233;
%uH%c\233;
%uH%s\23322;
%uH@\23323;
%uH \n", *x - *w, r[d], *x + *w, r[d], X, *P, p += k, o);
if (abs (p - x[21]) >= w[21])
exit (0);
if (g != G)
{
struct itimerval t = { 0, 0, 0, 0 }
;
g += ((g < G) << 1) - 1;
t.it_interval.tv_usec = t.it_value.tv_usec = 72000 / ((g >> 3) + 1);
setitimer (0, &t, 0);
f && printf ("\e[10;
%u]", g + 24);
}
f && putchar (7);
s += (9 - w[21]) * ((g >> 3) + 1);
o = p;
a = x;
z = *a;
while (*++a)
{
y = *a;
*a = z;
z = y;
}
;
a = w;
z = *a;
while (*++a)
{
y = *a;
*a = z;
z = y;
}
;
(n = rand ()) & 255 || --*w || ++*w;
if (!(**P && P++ || n & 7936))
{
while (abs ((X = rand () % 76) - *x + 2) - *w < 6);
++X;
P = T;
}
(n = rand () & 31) < 3 && (d = n);
!d && --*x <= *w && (++*x, ++d) || d == 2 && ++*x + *w > 79 && (--*x, --d);
signal (i, u);
}
void
e ()
{
signal (14, SIG_IGN);
printf ("\e[0q\ecScore: %u\n", s);
system ("stty echo -cbreak");
}
int main (int C, char **V)
{
atexit (e);
(C < 2 || *V[1] != 113)
&& (f = (C = *(int *) getenv ("TERM")) == (int) 0x756E696C
|| C == (int) 0x6C696E75);
srand (getpid ());
system ("stty -echo cbreak");
G = 0 << 3;
printf ("\e[%uq", l[0]);
u (14);
for (;;)
switch (getchar ())
{
case 113:
return 0;
case 91:
case 98:
case 44:
k = -1;
continue;
case 32:
case 110:
case 46:
k = 0;
continue;
case 93:
case 109:
case 47:
k = 1;
continue;
case 49:
G = 0 << 3;
printf ("\e[%uq", l[0]);
continue;
case 50:
G = 1 << 3;
printf ("\e[%uq", l[1]);
continue;
case 51:
G = 2 << 3;
printf ("\e[%uq", l[2]);
continue;
case 52:
G = 3 << 3;
printf ("\e[%uq", l[3]);
continue;
}
}
I don't think there's much more an automated process will be able perform at this point as the term "more" readable or "less" readable from now on might depend on the specific preferences of the reader.
One step that could be performed is removing escape sequences from the strings and placing them somewhere separately. As it turns out the whole
char l[] = {2, 3, 1, 0}
has no other purpose than to be utilized in the escape sequences of the main loop:
printf ("\e[%uq", l[0]);
and so on. Looking up their meaning:
ESC [ 0 q: clear all LEDs
ESC [ 1 q: set Scroll Lock LED
ESC [ 2 q: set Num Lock LED
ESC [ 3 q: set Caps Lock LED
depending on taste you might want to exchange them with a macro or a function call more meaningful to you like clear_all_LEDs
and so on.
I strongly doubt a machine would agree on this being a simplification. As it turns out the whole main loop just seems to be working with keys entered by the user, so probably turning numbers into their corresponding characters might add to readability, like in replacing:
case 113:
return 0;
case 91:
case 98:
case 44:
k = -1;
// ...
case 49:
G = 0 << 3;
printf ("\e[%uq", l[0]);
with something like:
case 'q':
return 0;
case '[':
case 'b':
case ',':
k = -1;
// ...
case '1':
G = 0 << 3;
set_Num_Lock_LED ();
Oh - and while we are at it already why wouldn't we want to change the name from this rather strange G
to gear
. Again I strongly doubt an automated process would have found renaming from G
to gear
any better than renaming it to butterfly
. Well maybe it even isn't.
While beautifying names maybe this function referenced by a single u
is another candidate:
u (14);
with a more meaningful name update
probably. And as we already included <signal.h>
why don't we deobfuscate the code further by replacing 14
with SIGALRM
like this:
upadate (SIGALRM);
As you can see "deobfuscating" here requires the exact opposite step of that taken before. Replacing the expansion with a macro this time. How would a machine try to decide which one is more useful?
Another spot where we might want to replace a bare number with something else is this one in the update function:
f && putchar (7);
Why not replace the 7
with \a
as it will turn out to be the same in the end. Maybe we should even change the bare f
with something more "meaningful".
Again I vote agains butterfly
but would rather like to call it play_sound
:
if (play_sound)
putchar ('\a');
might be the more readable version we are looking for. Sure we shouldn't forget to replace f in all other spots. The one right at the beginning of our main function beeing such a culprit:
int main (int C, char **V)
{
atexit (e);
(C < 2 || *V[1] != 113)
&& (f = (C = *(int *) getenv ("TERM")) == (int) 0x756E696C
|| C == (int) 0x6C696E75);
While happily renaming f
to play_sound
and e
to - no, still no butterfly
, this time I'll rather call it: - end
we spot that the function signature seems to look a bit strange in terms of naming conventions: argc
instead of C
and argv
instead of V
would seem more conventional here. Thus giving us:
int main (int argc, char* argv[])
{
atexit (end);
(argc < 2 || *argv[1] != 113)
&& (playsound = (argc = *(int *) getenv ("TERM")) == (int) 0x756E696C
|| argc == (int) 0x6C696E75);
As this is still not a beauty we ask our standards guy and he informs us that it's pretty OK to replace
(A || B) && (C)
with
if (A || B) { C }
and
E = (x=F)==H || x==I
with
x = F;
if (x==H || x==I)
A=1;
else
A=0;`
So maybe this should be a more readable version of the whole code:
if (argc < 2 || *argv[1] != 'q') {
argc = *(int*) getenv ("TERM");
if (argc == (int) 0x756E69 || argc == (int) 0x6C696E75))
play_sound = 1;
/* skip the else brach here as play_sound is alredy initialized to 0 */
}
Now still another guy turns up and starts to inform us, that depending on something called endianness tose strange looking numbers 0x6C696E75 and 0x756E69 if stored in memory would (when interpreting raw byte vales as ASCII code) just look like "linu"
or "unil"
. One being "unil" on one architecure type and "linu" the other one while just the other way round on the other architecture with different endianness.
So taking a closer look what's essentially happening here is:
Probably we just want to check if the TERM environment variable is set to "linux" so our deobfuscated version might want to perform a string comparison here.
As on the other hand we can't be sure if also allowing terminals with names starting with "unil" to play sound might be a special feature of this software so I decided to probably better leave it intact.
While renaming and re-encoding variable names and values those strange char arrays could be our next victims. The following mess doesn't look too nice:
char x[] = "((((((((((((((((((((((", w[] =
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";
char r[] = { 92, 124, 47 };
So maybe they could be changed to:
char x_offset[] = {
40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 0 };
char width[] = {
8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 0 };
const char border[] = "\\|/";
As you can see I just chose to switch the way the values are described between x
as string constant to x written down as an array as this way the purpose of the values stored here seemed a little bit clearer to me.
While on the other hand I changed the way the way r
is written down just in exactly the opposite direction as again this seemed a lot clearer to me.
While hunting down all those refs to x
, w
and r
the time could be used to rename p
and o
to - sorry again no butterfly
- pos
and old_pos
while renaming s
to score
.
Changing for example:
s += (9 - w[21]) * ((g >> 3) + 1);
o = p;
a = x;
z = *a;
while (*++a)
{
y = *a;
*a = z;
z = y;
}
;
a = w;
z = *a;
while (*++a)
{
y = *a;
*a = z;
z = y;
}
;
to:
/* update score */
score += (9 - width[NEXT_LINE]) * ((g >> 3) + 1);
old_pos = pos;
/* shift x_offset */
a = x_offset;
z = *a;
while (*++a) {
y = *a;
*a = z;
z = y;
};
/* shift width */
a = width;
z = *a;
while (*++a) {
y = *a;
*a = z;
z = y;
};
Besides the possibility to turn it into some other kind of loop there's not much beautification possible for both shifting functions so probably adding an appropriate comment is the maximum you can do. Removing the magic number 21
might be another idea NEXT_LINE
didn't seem to be the worst choice here.
The single character labeled variable g
still doesn't look too good. But renaming it to something like update_interval
there's also the chance to eliminate another weird terminal escape sequence:
if (g != G)
{
struct itimerval t = { 0, 0, 0, 0 }
;
g += ((g < G) << 1) - 1;
t.it_interval.tv_usec = t.it_value.tv_usec = 72000 / ((g >> 3) + 1);
setitimer (0, &t, 0);
f && printf ("\e[10;
%u]", g + 24);
}
Maybe looks a little bit more confusing than:
/* update simulation speed */
if (update_interval != gear) {
struct itimerval t = { 0, 0, 0, 0 } ;
update_interval += ((update_interval < gear) << 1) - 1;
t.it_interval.tv_usec = t.it_value.tv_usec = 72000 / ((update_interval >> 3) + 1);
setitimer (0, &t, 0);
if (play_sound)
change_bell_frequency (update_interval + 24);
}
Although the code should look a lot more readable by now there are still some nasty parts left:
!d && --*x <= *w && (++*x, ++d) || d == 2 && ++*x + *w > 79 && (--*x, --d);
Choosing another (hopefully) more meaningful name for d
and breaking operator precedence down you might end up with something like:
if (curve == CURVE_LEFT) {
--*x_offset;
if (*x_offset < *width) {
++*x_offset;
curve = CURVE_NONE;
}
}
else if (curve == CURVE_RIGHT) {
++*x_offset;
if (*x_offset + *width > 79) {
--*x_offsett;
curve = CURVE_NONE;
}
}
instead adding appropriate macros for all those CURVE_...
s.
Now there are still those X
, P
and T
names hanging around that also might be changed. As it makes its purpose also a little bit better visible in code I decided to flip the line order of T
that I renamed to tree
which sure means the calculation also has to be fixed. All in all it's from:
char *T[] = { " |", " |", "%\\|/%", " %%%", "" };
char X, **P = &T[4];
// ...
if (!(**P && P++ || n & 7936))
{
while (abs ((X = rand () % 76) - *x + 2) - *w < 6);
++X;
P = T;
}
To something like:
char *tree[] = {
"",
" %%%",
"%\\|/%",
" |",
" |",
};
char **tree_line = tree;
char tree_position;
// ...
/* update tree line pointer */
if (!(**tree_line && tree_line-- || n & 7936)) {
/* find the right spot to grow */
while (abs ((tree_position = rand () % 76) - *x_offset + 2) - *width < 6)
;
++tree_position;
tree_line = &tree[4];
}
Although the code already seems to looks a lot prettier to me now there's still one part missing. That's the one that's doing all the output. It's this line I'm talking about:
printf ("\233;%uH\233L%c\233;%uH%c\233;%uH%s\23322;%uH@\23323;%uH \n",
*x - *w, r[d], *x + *w, r[d], X, *P, p += k, o);
That apart from looking pretty hard to read was even to obfuscated for computer to produce any usable result.
I tried a lot of different things running in other terminal emulators, changing terminal settings and switching locales back and forth without sucess.
So besides the fact this kind of obfuscation seemed to be more that perfect as it even seems to confuse my computer I still can't tell what trick the author intended here.
The octal code \233
has the same bit-pattern as the escape character (\033
) with the 8-th bit set additionally which probably is in some way related to effect that was intended here. Unfortunately as I already told it didn't work for me.
Fortunately enough the escape sequences still seemed easy enough to guess, so I came up with the following replacement:
pos += move_x,
/* draw street */
printf ("\e[1;%uH" "\e[L" "%c"
"\e[1;%uH" "%c",
*x_offset - *width, border[curve],
*x_offset + *width, border[curve]);
/* draw tree */
printf ("\e[1;%uH" "%s",
tree_position, *tree_line);
/* redraw car */
printf ("\e[22;%uH" "@"
"\e[23;%uH" " " "\n",
pos,
old_pos);
Taking drawing down into separate to (hopefully) make them a little bit more readable. The actual line and the previous line are still hard coded here as in the original version. Maybe extracting them from there as shown below would even improve readability:
/* draw street */
printf ("\e[1;%uH" "\e[L" "%c"
"\e[1;%uH" "%c",
*x_offset - *width, border[curve],
*x_offset + *width, border[curve]);
/* draw tree */
printf ("\e[1;%uH" "%s",
tree_position, *tree_line);
/* redraw car */
printf ("\e[%u;%uH" "@"
"\e[%u;%uH" " " "\n",
NEXT_LINE +1, pos,
NEXT_LINE +2, old_pos);
This finally brought me to the first usable version which I then "tested" a lot. While probably not 100% state of the art it still seems to be very addictive.
Here the final unobfuscated version that I came with. As you'll see I didn't implement the LED setting functions and the clear screen function but it shouldn't be to hard to find the needed escape sequences scattered throughout the obfuscated version. In fact I already mentioned the LED sequences in this post. The one to clear the screen is "\e[0q". Happy hacking.
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/time.h>
#include <signal.h>
#define NEXT_LINE 21
#define CURVE_LEFT 0
#define CURVE_NONE 1
#define CURVE_RIGHT 2
char x_offset[] = {
40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 0 };
char width[] = {
8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 0 };
const char border[] = "\\|/";
void change_bell_frequency () {}
void clear_screen () {}
void clear_all_LEDs () {}
void set_Num_Lock_LED () {}
void set_Scroll_lock_LED () {}
void set_Caps_Lock_LED () {}
char *tree[] = {
"",
" %%%",
"%\\|/%",
" |",
" |",
};
char **tree_line = tree;
char tree_position;
char curve = CURVE_NONE;
char *a, y, z;
char move_x = 0;
char update_interval = -1;
char pos = 40;
char old_pos = 40;
char play_sound = 0;
char gear;
unsigned int score = 0;
void move (char x, char y) {
printf ("\e[%u;%uH", x, y);
}
void insert () {
printf ("\e[L");
}
void update (int i) {
int n;
pos += move_x,
/* draw street */
printf ("\e[1;%uH" "\e[L" "%c"
"\e[1;%uH" "%c",
*x_offset - *width, border[curve],
*x_offset + *width, border[curve]);
/* draw tree */
printf ("\e[1;%uH" "%s",
tree_position, *tree_line);
/* redraw car */
printf ("\e[%u;%uH" "@"
"\e[%u;%uH" " " "\n",
NEXT_LINE + 1, pos,
NEXT_LINE +2, old_pos);
/* did we leave the road ? */
if (abs (pos - x_offset[NEXT_LINE]) >= width[NEXT_LINE])
exit (0);
/* update simulation speed */
if (update_interval != gear) {
struct itimerval t = { 0, 0, 0, 0 } ;
update_interval += ((update_interval < gear) << 1) - 1;
t.it_interval.tv_usec = t.it_value.tv_usec = 72000 / ((update_interval >> 3) + 1);
setitimer (0, &t, 0);
if (play_sound)
change_bell_frequency (update_interval + 24);
}
/* play sound */
if (play_sound)
putchar ('\a');
/* update score */
score += (9 - width[NEXT_LINE]) * ((update_interval >> 3) + 1);
old_pos = pos;
/* shift x_offset */
a = x_offset;
z = *a;
while (*++a) {
y = *a;
*a = z;
z = y;
};
/* shift width */
a = width;
z = *a;
while (*++a) {
y = *a;
*a = z;
z = y;
};
/* generate new road */
n = rand ();
if (!(n & 255) && *width > 1)
--*width;
/* set tree line pointer */
if (!(**tree_line && tree_line-- || n & 7936)) {
/* find the right spot to grow */
while (abs ((tree_position = rand () % 76) - *x_offset + 2) - *width < 6)
;
++tree_position;
tree_line = &tree[4];
}
/* new offset */
n = rand () & 31;
if (n < 3)
curve = n;
if (curve == CURVE_LEFT) {
--*x_offset;
if (*x_offset <= *width) {
++*x_offset;
curve = CURVE_NONE;
}
}
else if (curve == CURVE_RIGHT) {
++*x_offset;
if (*x_offset + *width > 79) {
--*x_offset;
curve = CURVE_NONE;
}
}
signal (SIGALRM, update);
}
void end () {
signal (SIGALRM, SIG_IGN);
clear_all_LEDs ();
clear_screen ();
printf ("Score: %u\n", score);
system ("stty echo -cbreak");
}
int main (int argc, char **argv) {
atexit (end);
if (argc < 2 || *argv[1] != 'q') {
argc = *(int*) getenv ("TERM");
if (argc == (int) 0x6C696E75 || argc == (int) 0x756E696C)
play_sound = 1;
}
srand (getpid ());
system ("stty -echo cbreak");
gear = 0 << 3;
clear_all_LEDs ();
update (14);
for (;;)
switch (getchar ())
{
case 'q':
return 0;
case '[':
case 'b':
case ',':
move_x = -1;
continue;
case ' ':
case 'n':
case '.':
move_x = 0;
continue;
case ']':
case 'm':
case '/':
move_x = 1;
continue;
case '1':
gear = 0 << 3;
set_Num_Lock_LED ();
continue;
case '2':
gear = 1 << 3;
set_Caps_Lock_LED ();
continue;
case '3':
gear = 2 << 3;
set_Scroll_lock_LED ();
continue;
case '4':
gear = 3 << 3;
clear_all_LEDs ();
continue;
}
}