Many people rely only on virtualization software when protecting their binaries which is often very bad. There's plenty of information on existing VM protections on popular reversing sites, some even offer what is pretty much a 1-click devirtualization tool.

However, whether there's existing tools for fighting your choice of VM or it's still undocumented, there's absolutely no reason why you shouldn't put in extra effort to make reverse engineering harder. I will demonstrate one very simple method to do so: emulation of binary operations on the source code level.

Here's a simple implementation of binary addition for 32 bit numbers:

__forceinline uint32_t X_AddIntegers(uint32_t var1, uint32_t var2)
{
    uint32_t ret, c, carry;

    // ret = carry = 0;
    for (c = 0; c < 32; c++) {
        ret &= ~(1<<c);
        carry &= ~(1<<c);
    }

    // ret = var1 + var2;
    for (c = 0; c < 32; c++) {
        if (var1 & (1<<c)) {
            if (var2 & (1<<c)) {
                if (carry & 1)
                    ret |= (1<<c);
                carry |= 1;
            } else {
                if (!(carry & 1)) {
                    ret |= (1<<c);
                    carry &= ~1;
                }
            }
        } else if (var2 & (1<<c)) {
            if (!(carry & 1)) {
                ret |= (1<<c);
                carry &= ~1;
            }
        } else if (carry & 1) {
            ret |= (1<<c);
            carry &= ~1;
        }
    }

    return ret;
}

... and here's a header file with some other operations implemented: obf_tricks.h

After emulating the simple operations, you can implement some more complex operations based on the simple ones. For example, three simple ways to compare two integers using XOR, XNOR and bit counting:

#define X_ISEQUAL1(x,y)    (X_XnorIntegers(x,y) == -1)
#define X_ISEQUAL2(x,y)    (X_CountHighBits(X_XnorIntegers(x,y)) == 32)
#define X_ISEQUAL3(x,y)    (X_XorIntegers(x,y) == 0)

For a quick demonstration here's an extremely simple C code that will print all command line arguments:

int main(int argc, char *argv[])
{
    for (int i = 0; i < argc; i++) {
        printf("argv[%u] = %s\n", i, argv[i]);
    }

    return 0;
}

... which translates into a very straightforward code:

.text:00401000 sub_401000      proc near               ; CODE XREF: start-6D
.text:00401000
.text:00401000 arg_0           = dword ptr  8
.text:00401000 arg_4           = dword ptr  0Ch
.text:00401000
.text:00401000                 push    ebp
.text:00401001                 mov     ebp, esp
.text:00401003                 push    esi
.text:00401004                 push    edi
.text:00401005                 mov     edi, [ebp+arg_0]
.text:00401008                 xor     esi, esi
.text:0040100A                 test    edi, edi
.text:0040100C                 jle     short loc_40102A
.text:0040100E                 push    ebx
.text:0040100F                 mov     ebx, [ebp+arg_4]
.text:00401012
.text:00401012 loc_401012:                             ; CODE XREF: sub_401000+27
.text:00401012                 push    dword ptr [ebx+esi*4]
.text:00401015                 push    esi
.text:00401016                 push    offset Format   ; "argv[%u] = %s\n"
.text:0040101B                 call    ds:printf
.text:00401021                 inc     esi
.text:00401022                 add     esp, 0Ch
.text:00401025                 cmp     esi, edi
.text:00401027                 jl      short loc_401012
.text:00401029                 pop     ebx
.text:0040102A
.text:0040102A loc_40102A:                             ; CODE XREF: sub_401000+C
.text:0040102A                 pop     edi
.text:0040102B                 xor     eax, eax
.text:0040102D                 pop     esi
.text:0040102E                 pop     ebp
.text:0040102F                 retn
.text:0040102F sub_401000      endp

Now we can rewrite the main application using these macros:

int main(int argc, char *argv[])
{
    for (uint32_t i = 0; !X_ISEQUAL2(i, argc); i = X_AddIntegers(i, 1)) {
        printf("argv[%u] = %s\n", i, argv[i]);
    }

    return 0;
}

... which now translates into something not that straightforward:

.text:00401000 sub_401000      proc near               ; CODE XREF: start-6D
.text:00401000
.text:00401000 var_C           = dword ptr -0Ch
.text:00401000 var_8           = dword ptr -8
.text:00401000 var_4           = dword ptr -4
.text:00401000 arg_0           = dword ptr  8
.text:00401000 arg_4           = dword ptr  0Ch
.text:00401000
.text:00401000                 push    ebp
.text:00401001                 mov     ebp, esp
.text:00401003                 sub     esp, 0Ch
.text:00401006                 push    ebx
.text:00401007                 push    esi
.text:00401008                 mov     [ebp+var_C], 0
.text:0040100F                 mov     ebx, [ebp+var_C]
.text:00401012                 push    edi
.text:00401013                 mov     edi, [ebp+var_C]
.text:00401016
.text:00401016 loc_401016:                             ; CODE XREF: sub_401000+FC
.text:00401016                 mov     ecx, 1
.text:0040101B                 lea     edx, [ecx+1Fh]
.text:0040101E                 mov     edi, edi
.text:00401020
.text:00401020 loc_401020:                             ; CODE XREF: sub_401000+29
.text:00401020                 mov     eax, ecx
.text:00401022                 rol     ecx, 1
.text:00401024                 not     eax
.text:00401026                 and     edi, eax
.text:00401028                 dec     edx
.text:00401029                 jnz     short loc_401020
.text:0040102B                 mov     esi, [ebp+arg_0]
.text:0040102E                 lea     ecx, [edx+20h]
.text:00401031                 mov     edx, [ebp+var_C]
.text:00401034                 mov     eax, 1
.text:00401039                 lea     esp, [esp+0]
.text:00401040
.text:00401040 loc_401040:                             ; CODE XREF: sub_401000+53
.text:00401040                 test    edx, eax
.text:00401042                 jz      short loc_40104A
.text:00401044                 test    esi, eax
.text:00401046                 jz      short loc_401050
.text:00401048                 jmp     short loc_40104E
.text:0040104A ; ---------------------------------------------------------------------------
.text:0040104A
.text:0040104A loc_40104A:                             ; CODE XREF: sub_401000+42
.text:0040104A                 test    esi, eax
.text:0040104C                 jnz     short loc_401050
.text:0040104E
.text:0040104E loc_40104E:                             ; CODE XREF: sub_401000+48
.text:0040104E                 or      edi, eax
.text:00401050
.text:00401050 loc_401050:                             ; CODE XREF: sub_401000+46
.text:00401050                                         ; sub_401000+4C
.text:00401050                 rol     eax, 1
.text:00401052                 dec     ecx
.text:00401053                 jnz     short loc_401040
.text:00401055                 mov     ecx, 1
.text:0040105A                 lea     edx, [ecx+1Fh]
.text:0040105D                 lea     ecx, [ecx+0]
.text:00401060
.text:00401060 loc_401060:                             ; CODE XREF: sub_401000+6A
.text:00401060                 mov     eax, ecx
.text:00401062                 rol     ecx, 1
.text:00401064                 not     eax
.text:00401066                 and     [ebp+var_8], eax
.text:00401069                 dec     edx
.text:0040106A                 jnz     short loc_401060
.text:0040106C                 mov     esi, [ebp+var_8]
.text:0040106F                 mov     eax, 1
.text:00401074                 lea     ecx, [edx+20h]
.text:00401077
.text:00401077 loc_401077:                             ; CODE XREF: sub_401000+7F
.text:00401077                 test    edi, eax
.text:00401079                 jz      short loc_40107C
.text:0040107B                 inc     esi
.text:0040107C
.text:0040107C loc_40107C:                             ; CODE XREF: sub_401000+79
.text:0040107C                 rol     eax, 1
.text:0040107E                 dec     ecx
.text:0040107F                 jnz     short loc_401077
.text:00401081                 mov     [ebp+var_8], esi
.text:00401084                 cmp     [ebp+var_8], 20h
.text:00401088                 mov     esi, [ebp+var_4]
.text:0040108B                 jz      short loc_401101
.text:0040108D                 mov     eax, [ebp+var_C]
.text:00401090                 mov     ecx, [ebp+arg_4]
.text:00401093                 push    dword ptr [ecx+eax*4]
.text:00401096                 push    eax
.text:00401097                 push    offset Format   ; "argv[%u] = %s\n"
.text:0040109C                 call    ds:printf
.text:004010A2                 mov     ecx, 1
.text:004010A7                 add     esp, 0Ch
.text:004010AA                 lea     edx, [ecx+1Fh]
.text:004010AD                 lea     ecx, [ecx+0]
.text:004010B0
.text:004010B0 loc_4010B0:                             ; CODE XREF: sub_401000+BB
.text:004010B0                 mov     eax, ecx
.text:004010B2                 rol     ecx, 1
.text:004010B4                 not     eax
.text:004010B6                 and     esi, eax
.text:004010B8                 and     ebx, eax
.text:004010BA                 dec     edx
.text:004010BB                 jnz     short loc_4010B0
.text:004010BD                 lea     ecx, [edx+20h]
.text:004010C0                 mov     edx, [ebp+var_C]
.text:004010C3                 mov     eax, 1
.text:004010C8
.text:004010C8 loc_4010C8:                             ; CODE XREF: sub_401000+F4
.text:004010C8                 test    edx, eax
.text:004010CA                 jz      short loc_4010DC
.text:004010CC                 test    al, 1
.text:004010CE                 jz      short loc_4010E0
.text:004010D0                 test    bl, 1
.text:004010D3                 jz      short loc_4010D7
.text:004010D5                 or      esi, eax
.text:004010D7
.text:004010D7 loc_4010D7:                             ; CODE XREF: sub_401000+D3
.text:004010D7                 or      ebx, 1
.text:004010DA                 jmp     short loc_4010F1
.text:004010DC ; ---------------------------------------------------------------------------
.text:004010DC
.text:004010DC loc_4010DC:                             ; CODE XREF: sub_401000+CA
.text:004010DC                 test    al, 1
.text:004010DE                 jz      short loc_4010E7
.text:004010E0
.text:004010E0 loc_4010E0:                             ; CODE XREF: sub_401000+CE
.text:004010E0                 test    bl, 1
.text:004010E3                 jnz     short loc_4010F1
.text:004010E5                 jmp     short loc_4010EC
.text:004010E7 ; ---------------------------------------------------------------------------
.text:004010E7
.text:004010E7 loc_4010E7:                             ; CODE XREF: sub_401000+DE
.text:004010E7                 test    bl, 1
.text:004010EA                 jz      short loc_4010F1
.text:004010EC
.text:004010EC loc_4010EC:                             ; CODE XREF: sub_401000+E5
.text:004010EC                 or      esi, eax
.text:004010EE                 and     ebx, 0FFFFFFFEh
.text:004010F1
.text:004010F1 loc_4010F1:                             ; CODE XREF: sub_401000+DA
.text:004010F1                                         ; sub_401000+E3 ...
.text:004010F1                 rol     eax, 1
.text:004010F3                 dec     ecx
.text:004010F4                 jnz     short loc_4010C8
.text:004010F6                 mov     [ebp+var_4], esi
.text:004010F9                 mov     [ebp+var_C], esi
.text:004010FC                 jmp     loc_401016
.text:00401101 ; ---------------------------------------------------------------------------
.text:00401101
.text:00401101 loc_401101:                             ; CODE XREF: sub_401000+8B
.text:00401101                 pop     edi
.text:00401102                 pop     esi
.text:00401103                 xor     eax, eax
.text:00401105                 pop     ebx
.text:00401106                 mov     esp, ebp
.text:00401108                 pop     ebp
.text:00401109                 retn
.text:00401109 sub_401000      endp

This is a very primitive obfuscation attempt and every somewhat experienced reverser will have no real issues understanding the code. However, if you combine this with virtualization software you will complicate things further and the reverser will have to invest more time into the process of understanding how your algorithms work.

Please note that this is just a simple PoC for demonstration. In real world you should come up with your own ideas.. and it's really not hard to come up with something that does not look like the usual code modern compilers output. Start with obfuscation of some very simple operations and then you can base more complex ones on them.

Next Post Previous Post