---+ Exe-2-C DOS/286 Decompiler Tests This is the beta version of an experimental decompiler. The tests are from [[http://www.itee.uq.edu.au/~cristina/dcc/distribution/test.zip][test.zip]] in the [[http://www.itee.uq.edu.au/~cristina/dcc][dcc]] distribution. %TOC% ---++ Strlen The original C source for this program is as follows:
main()
{ char *s = "test";
	 strlen(s);
}
strlen(char *s)
{ int n = 0;
	 while (*s++)
		  n++;
	 return (n);
}
This disassembled as follows (amongst much other code):
proc_10			proc	 near
					 push	 SI							 
					 mov	  SI,194h					  
					 push	 SI							 
					 call	 near ptr proc_11		  
					 pop	  CX							 
					 pop	  SI							 
					 retn									 
proc_10			endp

proc_11			proc	 near
					 push	 BP							 
					 mov	  BP,SP						 
					 push	 SI							 
					 xor	  SI,SI						 
					 jmp	  short loc_12				
loc_11:			; N-Ref=1
					 inc	  SI							 
loc_12:			; N-Ref=1
					 mov	  BX,Word Ptr [BP+4]		
					 inc	  Word Ptr [BP+4]			
					 cmp	  Byte Ptr [BX],0			
					 jne	  loc_11						; Jump if not equal ( != )
					 mov	  AX,SI						 
					 jmp	  short loc_13				
loc_13:			; N-Ref=1
					 pop	  SI							 
					 pop	  BP							 
					 retn									 
proc_11			endp
The output was as follows:
/****************************************************************************/
					 near proc_10()
/****************************************************************************/
{
register char *reg1 ;

	 push(0x194);
	 proc_11();
	 cx = pop();
}
/****************************************************************************/
					 near proc_11(int	arg0)
/****************************************************************************/
{
register char *reg1 ;

	 reg1 = 0;
	 while(bx = arg0, ++arg0, *bx != 0)	
		  ++reg1;
	 ax = reg1;
}
It analysed that proc_11 takes an int argument (actually a char*), but it did not pass the actual argument (0x194, the pointer to the string). It has guessed incorrectly that reg1 in proc11 is a char*. It may have been able to do better if main made use of the return value. There is nothing to indicate the size of *bx (in fact, 8 bits), so this would never compile. The while loop does look good, though. ---++ Fibo The original C source code is:
int main()
{ int i, numtimes, number;
  unsigned value, fib();

	 printf("Input number of iterations: ");
	 scanf ("%d", &numtimes);
	 for (i = 1; i <= numtimes; i++)
	 {
		  printf ("Input number: ");
		  scanf ("%d", &number);
		  value = fib(number);
		  printf("fibonacci(%d) = %u\n", number, value);
	 }
	 exit(0);
}

unsigned fib(x)					  /* compute fibonacci number recursively */
int x;
{
	 if (x > 2)
		  return (fib(x - 1) + fib(x - 2));
	 else
		  return (1);
}
The disassembly for the fib function is
proc_11			proc	 near
					 push	 BP							 
					 mov	  BP,SP						 
					 push	 SI							 
					 mov	  SI,Word Ptr [BP+4]		
					 cmp	  SI,+2						 
					 jle	  loc_13						; Jump if not greater ( <= )
					 mov	  AX,SI						 
					 dec	  AX							 
					 push	 AX							 
					 call	 near ptr proc_11		  
					 pop	  CX							 
					 push	 AX							 
					 mov	  AX,SI						 
					 add	  AX,0FFFEh					
					 push	 AX							 
					 call	 near ptr proc_11		  
					 pop	  CX							 
					 mov	  DX,AX						 
					 pop	  AX							 
					 add	  AX,DX						 
					 jmp	  short loc_14				

					 dw		5EBh
loc_13:			; N-Ref=1
					 mov	  AX,1						  
					 jmp	  short loc_14				
loc_14:			; N-Ref=2
					 pop	  SI							 
					 pop	  BP							 
					 retn									 
proc_11			endp
It correctly did not attempt to disassemble the unreachable code before loc_13. The decompiled output is as follows. Again, main is proc_10; proc_11 is fib:
/****************************************************************************/
					 near proc_10()
/****************************************************************************/
{
register char *reg1 ;
register char *reg2 ;
char  *loc0;
char  *loc1;

		  push(0x194);
		  proc_41();
		  cx = pop();
		  ax = &loc0;
		  push(ax);
		  push(0x1B1)
		  proc_55();
		  cx = pop();
		  cx = pop();
		  DELETE: reg1 = 1;
		  si = 1;  /*PCH : RM_Table_init*/
		  while(reg1 <= loc0)	{
					 push(0x1B4);
					 proc_41();
					 cx = pop();
					 ax = &loc1;
					 push(ax);
					 push(0x1C3);
					 proc_55();
					 cx = pop();
					 cx = pop();
					 push(loc1);
					 proc_11();
					 cx = pop();
					 push(ax);
					 push(loc1);
					 push(0x1C6);
					 proc_41();
					 sp = sp + 6;
					 ++reg1;
		  }
		  ax = 0;
		  push(ax);
		  proc_13();
		  cx = pop();
}

/****************************************************************************/
					 near proc_11(int	arg0)
/****************************************************************************/
{
register char *reg1 ;

		  reg1 = arg0;
		  if(reg1 > 2)	{
					 ax = reg1;
					 --ax;
					 push(ax);
					 proc_11();
					 cx = pop();
					 push(ax);
					 ax = reg1;
					 ax = ax +  - 2;
					 push(ax);
					 proc_11();
					 cx = pop();
					 dx = ax;
					 ax = pop();
					 ax = ax + dx;
		  }
		  else	{
					 DELETE: ax = 1;
					 ax = 1;  /*PCH : RM_Table_init*/
					 return;
		  }
}
Here the "instruction by instruction" nature of the decompilation is evident. Forward substitution (necessitating data flow analysis to ensure safety) would merge the individual instruction results into more readable and complex expressions. The condition codes (status flags) have been successfully removed. No attempt has been made to determine the return value of function fib (here proc_11). No attempt is made to recognise the library functions =printf= and =scanf=. In proc_10, the decompiler seems to forget whether register =SI= is represented by variable =si= or variable =reg1=. CategoryDecompilation