Many people rely only on virtualization software when protecting their binaries which is often very bad. There's plenty of information on existing VM protections on popular reversing sites, some even offer what is pretty much a 1-click devirtualization tool.
However, whether there's existing tools for fighting your choice of VM or it's still undocumented, there's absolutely no reason why you shouldn't put in extra effort to make reverse engineering harder. I will demonstrate one very simple method to do so: emulation of binary operations on the source code level.
Here's a simple implementation of binary addition for 32 bit numbers:
__forceinline uint32_t X_AddIntegers(uint32_t var1, uint32_t var2)
{
uint32_t ret, c, carry;
// ret = carry = 0;
for (c = 0; c < 32; c++) {
ret &= ~(1<<c);
carry &= ~(1<<c);
}
// ret = var1 + var2;
for (c = 0; c < 32; c++) {
if (var1 & (1<<c)) {
if (var2 & (1<<c)) {
if (carry & 1)
ret |= (1<<c);
carry |= 1;
} else {
if (!(carry & 1)) {
ret |= (1<<c);
carry &= ~1;
}
}
} else if (var2 & (1<<c)) {
if (!(carry & 1)) {
ret |= (1<<c);
carry &= ~1;
}
} else if (carry & 1) {
ret |= (1<<c);
carry &= ~1;
}
}
return ret;
}
... and here's a header file with some other operations implemented: obf_tricks.h
After emulating the simple operations, you can implement some more complex operations based on the simple ones. For example, three simple ways to compare two integers using XOR, XNOR and bit counting:
#define X_ISEQUAL1(x,y) (X_XnorIntegers(x,y) == -1)
#define X_ISEQUAL2(x,y) (X_CountHighBits(X_XnorIntegers(x,y)) == 32)
#define X_ISEQUAL3(x,y) (X_XorIntegers(x,y) == 0)
For a quick demonstration here's an extremely simple C code that will print all command line arguments:
int main(int argc, char *argv[])
{
for (int i = 0; i < argc; i++) {
printf("argv[%u] = %s\n", i, argv[i]);
}
return 0;
}
... which translates into a very straightforward code:
.text:00401000 sub_401000 proc near ; CODE XREF: start-6D
.text:00401000
.text:00401000 arg_0 = dword ptr 8
.text:00401000 arg_4 = dword ptr 0Ch
.text:00401000
.text:00401000 push ebp
.text:00401001 mov ebp, esp
.text:00401003 push esi
.text:00401004 push edi
.text:00401005 mov edi, [ebp+arg_0]
.text:00401008 xor esi, esi
.text:0040100A test edi, edi
.text:0040100C jle short loc_40102A
.text:0040100E push ebx
.text:0040100F mov ebx, [ebp+arg_4]
.text:00401012
.text:00401012 loc_401012: ; CODE XREF: sub_401000+27
.text:00401012 push dword ptr [ebx+esi*4]
.text:00401015 push esi
.text:00401016 push offset Format ; "argv[%u] = %s\n"
.text:0040101B call ds:printf
.text:00401021 inc esi
.text:00401022 add esp, 0Ch
.text:00401025 cmp esi, edi
.text:00401027 jl short loc_401012
.text:00401029 pop ebx
.text:0040102A
.text:0040102A loc_40102A: ; CODE XREF: sub_401000+C
.text:0040102A pop edi
.text:0040102B xor eax, eax
.text:0040102D pop esi
.text:0040102E pop ebp
.text:0040102F retn
.text:0040102F sub_401000 endp
Now we can rewrite the main application using these macros:
int main(int argc, char *argv[])
{
for (uint32_t i = 0; !X_ISEQUAL2(i, argc); i = X_AddIntegers(i, 1)) {
printf("argv[%u] = %s\n", i, argv[i]);
}
return 0;
}
... which now translates into something not that straightforward:
.text:00401000 sub_401000 proc near ; CODE XREF: start-6D
.text:00401000
.text:00401000 var_C = dword ptr -0Ch
.text:00401000 var_8 = dword ptr -8
.text:00401000 var_4 = dword ptr -4
.text:00401000 arg_0 = dword ptr 8
.text:00401000 arg_4 = dword ptr 0Ch
.text:00401000
.text:00401000 push ebp
.text:00401001 mov ebp, esp
.text:00401003 sub esp, 0Ch
.text:00401006 push ebx
.text:00401007 push esi
.text:00401008 mov [ebp+var_C], 0
.text:0040100F mov ebx, [ebp+var_C]
.text:00401012 push edi
.text:00401013 mov edi, [ebp+var_C]
.text:00401016
.text:00401016 loc_401016: ; CODE XREF: sub_401000+FC
.text:00401016 mov ecx, 1
.text:0040101B lea edx, [ecx+1Fh]
.text:0040101E mov edi, edi
.text:00401020
.text:00401020 loc_401020: ; CODE XREF: sub_401000+29
.text:00401020 mov eax, ecx
.text:00401022 rol ecx, 1
.text:00401024 not eax
.text:00401026 and edi, eax
.text:00401028 dec edx
.text:00401029 jnz short loc_401020
.text:0040102B mov esi, [ebp+arg_0]
.text:0040102E lea ecx, [edx+20h]
.text:00401031 mov edx, [ebp+var_C]
.text:00401034 mov eax, 1
.text:00401039 lea esp, [esp+0]
.text:00401040
.text:00401040 loc_401040: ; CODE XREF: sub_401000+53
.text:00401040 test edx, eax
.text:00401042 jz short loc_40104A
.text:00401044 test esi, eax
.text:00401046 jz short loc_401050
.text:00401048 jmp short loc_40104E
.text:0040104A ; ---------------------------------------------------------------------------
.text:0040104A
.text:0040104A loc_40104A: ; CODE XREF: sub_401000+42
.text:0040104A test esi, eax
.text:0040104C jnz short loc_401050
.text:0040104E
.text:0040104E loc_40104E: ; CODE XREF: sub_401000+48
.text:0040104E or edi, eax
.text:00401050
.text:00401050 loc_401050: ; CODE XREF: sub_401000+46
.text:00401050 ; sub_401000+4C
.text:00401050 rol eax, 1
.text:00401052 dec ecx
.text:00401053 jnz short loc_401040
.text:00401055 mov ecx, 1
.text:0040105A lea edx, [ecx+1Fh]
.text:0040105D lea ecx, [ecx+0]
.text:00401060
.text:00401060 loc_401060: ; CODE XREF: sub_401000+6A
.text:00401060 mov eax, ecx
.text:00401062 rol ecx, 1
.text:00401064 not eax
.text:00401066 and [ebp+var_8], eax
.text:00401069 dec edx
.text:0040106A jnz short loc_401060
.text:0040106C mov esi, [ebp+var_8]
.text:0040106F mov eax, 1
.text:00401074 lea ecx, [edx+20h]
.text:00401077
.text:00401077 loc_401077: ; CODE XREF: sub_401000+7F
.text:00401077 test edi, eax
.text:00401079 jz short loc_40107C
.text:0040107B inc esi
.text:0040107C
.text:0040107C loc_40107C: ; CODE XREF: sub_401000+79
.text:0040107C rol eax, 1
.text:0040107E dec ecx
.text:0040107F jnz short loc_401077
.text:00401081 mov [ebp+var_8], esi
.text:00401084 cmp [ebp+var_8], 20h
.text:00401088 mov esi, [ebp+var_4]
.text:0040108B jz short loc_401101
.text:0040108D mov eax, [ebp+var_C]
.text:00401090 mov ecx, [ebp+arg_4]
.text:00401093 push dword ptr [ecx+eax*4]
.text:00401096 push eax
.text:00401097 push offset Format ; "argv[%u] = %s\n"
.text:0040109C call ds:printf
.text:004010A2 mov ecx, 1
.text:004010A7 add esp, 0Ch
.text:004010AA lea edx, [ecx+1Fh]
.text:004010AD lea ecx, [ecx+0]
.text:004010B0
.text:004010B0 loc_4010B0: ; CODE XREF: sub_401000+BB
.text:004010B0 mov eax, ecx
.text:004010B2 rol ecx, 1
.text:004010B4 not eax
.text:004010B6 and esi, eax
.text:004010B8 and ebx, eax
.text:004010BA dec edx
.text:004010BB jnz short loc_4010B0
.text:004010BD lea ecx, [edx+20h]
.text:004010C0 mov edx, [ebp+var_C]
.text:004010C3 mov eax, 1
.text:004010C8
.text:004010C8 loc_4010C8: ; CODE XREF: sub_401000+F4
.text:004010C8 test edx, eax
.text:004010CA jz short loc_4010DC
.text:004010CC test al, 1
.text:004010CE jz short loc_4010E0
.text:004010D0 test bl, 1
.text:004010D3 jz short loc_4010D7
.text:004010D5 or esi, eax
.text:004010D7
.text:004010D7 loc_4010D7: ; CODE XREF: sub_401000+D3
.text:004010D7 or ebx, 1
.text:004010DA jmp short loc_4010F1
.text:004010DC ; ---------------------------------------------------------------------------
.text:004010DC
.text:004010DC loc_4010DC: ; CODE XREF: sub_401000+CA
.text:004010DC test al, 1
.text:004010DE jz short loc_4010E7
.text:004010E0
.text:004010E0 loc_4010E0: ; CODE XREF: sub_401000+CE
.text:004010E0 test bl, 1
.text:004010E3 jnz short loc_4010F1
.text:004010E5 jmp short loc_4010EC
.text:004010E7 ; ---------------------------------------------------------------------------
.text:004010E7
.text:004010E7 loc_4010E7: ; CODE XREF: sub_401000+DE
.text:004010E7 test bl, 1
.text:004010EA jz short loc_4010F1
.text:004010EC
.text:004010EC loc_4010EC: ; CODE XREF: sub_401000+E5
.text:004010EC or esi, eax
.text:004010EE and ebx, 0FFFFFFFEh
.text:004010F1
.text:004010F1 loc_4010F1: ; CODE XREF: sub_401000+DA
.text:004010F1 ; sub_401000+E3 ...
.text:004010F1 rol eax, 1
.text:004010F3 dec ecx
.text:004010F4 jnz short loc_4010C8
.text:004010F6 mov [ebp+var_4], esi
.text:004010F9 mov [ebp+var_C], esi
.text:004010FC jmp loc_401016
.text:00401101 ; ---------------------------------------------------------------------------
.text:00401101
.text:00401101 loc_401101: ; CODE XREF: sub_401000+8B
.text:00401101 pop edi
.text:00401102 pop esi
.text:00401103 xor eax, eax
.text:00401105 pop ebx
.text:00401106 mov esp, ebp
.text:00401108 pop ebp
.text:00401109 retn
.text:00401109 sub_401000 endp
This is a very primitive obfuscation attempt and every somewhat experienced reverser will have no real issues understanding the code. However, if you combine this with virtualization software you will complicate things further and the reverser will have to invest more time into the process of understanding how your algorithms work.
Please note that this is just a simple PoC for demonstration. In real world you should come up with your own ideas.. and it's really not hard to come up with something that does not look like the usual code modern compilers output. Start with obfuscation of some very simple operations and then you can base more complex ones on them.