/*
 * libOp
 *
 * Copyright (C) 2000 Patrick Alken
 * This library comes with absolutely NO WARRANTY
 *
 * Should you choose to use and/or modify this source code, please
 * do so under the terms of the GNU General Public License under which
 * this program is distributed.
 *
 * $Id: disasm-x86.c,v 1.5 2002/05/27 15:11:28 cosine Exp $
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

#include "args-x86.h"
#include "disasm.h"
#include "disasm-x86.h"
#include "modsib-x86.h"
#include "prefix-x86.h"

/*
 * libString includes
 */
#include "Strn.h"

static long FindOpcode(unsigned char *data, char *outstr,
                       struct OpCode **bestmatch);
static int UsesRegisterCode(struct OpCode **codes, int *unique);

/*
 * Global: Pointer to buffer offset containing ModR/M byte
 *         *ModRMOffset will be the actual byte value.
 *         (args-x86.c needs this)
 */
unsigned char         *ModRMOffset = 0;

/*
 * Global: ModSib structure pointer for ModRMOffset
 *         (args-x86.c needs this)
 */
struct ModSib         *ModRMPtr = 0;

/*
 * Global: Pointer to buffer offset containing SIB byte
 *         *SibOffset will be th actual value.
 */
unsigned char         *SibOffset = 0;

/*
 * Global: ModSib structure pointer for SibOffset
 */
struct ModSib         *SibPtr = 0;

/*
 * Global: Special register code for +rb/+rw/+rd opcodes
 *         (args-x86.c needs this)
 */
int                   RegisterCode = (-1);

/*
 * Global: A number 0-7 added to the opcode in +i instructions
 *         (args-x86.c needs this)
 */
int                   FPUCode = (-1);

/*
 * Global: index of Registers[] corresponding to current
 *         segment override
 */
int                   SegmentOverride = (-1);

/*
x86DisAssemble()
 Disassemble opcodes into assembly language instructions

Inputs: data   - buffer containing opcodes
        outbuf - buffer to store resulting assembly instruction in
        flags  - bitmask of OP_xxx bits

Return: number of bytes disassembled. If an error occurs, -1 is
        returned and the error message is printed to 'outbuf'
*/

long
x86DisAssemble(unsigned char *data, char *outbuf, unsigned int flags)

{
  unsigned char *origdata;
  struct OpCode *match;   /* the best match we've found */
  long BytesMatched;      /* number of bytes matched in the opcode */

  assert(data && outbuf);

  origdata = data;
  match = 0;

  /*
   * These non-local variables need to be initialized before
   * the FindOpcode() call
   */
  ModRMOffset = 0;
  ModRMPtr = 0;
  SibOffset = 0;
  SibPtr = 0;
  RegisterCode = (-1);
  FPUCode = (-1);
  SegmentOverride = (-1);
  PrefixFlags = 0;

  /*
   * If flags has the OP_BITS32 bit set, it indicates that we
   * will want to always use the 32 bit forms of instructions,
   * and 32 bit ModR/M addressing - this occurs when we
   * disassemble executable files.
   */
  if (flags & OP_BITS32)
    PrefixFlags |= (PX_32BITDATA | PX_32BITADDR);

  BytesMatched = FindOpcode(data, outbuf, &match);

  if (BytesMatched == (-1))
  {
    /*
     * 'outbuf' will already contain the error message
     */
    return (-1);
  }
  else if (BytesMatched == 0)
  {
    /*
     * No match found
     */
    return (0);
  }

  /*
   * We found a good match
   */

  assert (match != 0);

  data += BytesMatched;

  ConstructArguments((unsigned char **) &data, outbuf, match);

  return (data - origdata);
} /* x86DisAssemble() */

/*
FindOpcode()
  Attempt to locate best matching opcode for given string in
the OpCodes[] array.

Inputs: data       - actual opcode
        outstr     - string in which to store error messages
        bestmatch  - OpCode structure in which to store best matching
                     opcode

Return: Number of bytes matched if matching opcode is found
        0 if no matching opcode found
        -1 if error encountered

Side effects: On a good match, 'bestmatch' is modified to point to
              the matching OpCode structure.

              On an error, -1 is returned and 'outstr' is modified
              to contain the error message.

              Non-local variables: RegisterCode, FPUCode, ModRMPtr,
              SibPtr may be modified to their appropriate values.
*/

static long
FindOpcode(unsigned char *data, char *outstr, struct OpCode **bestmatch)

{
  struct OpCode **OpPtr;    /* looping */
  unsigned char index;      /* index in OpCodes[] */
  int BytesMatched;         /* bytes matched so far */
  int MaxBytesMatched;      /* maximum bytes matched so far */
  int tmpreg;               /* for use with +rb/+rw/+rd opcodes */
  int tmpfpu;               /* for use with +i opcodes */
  int unique;               /* 1 if +rb/+rw/+rd opcode is unique */
  unsigned int pbytes;      /* number of bytes in prefix */
  struct ModSib *mptr;      /* structure containing ModR/M info */
  struct ModSib *sibptr;    /* structure containing SIB info */
  unsigned char *codeptr;   /* pointer to actual opcode string */
  int pret;                 /* return value from MatchPrefix */
  int PrefixPriority;
  int BetterMatch;          /* set to 1 if we find a better match than the previous */
  int ExactMatch;           /* set to 1 if we find an exact match */

  assert(data && outstr && bestmatch);

  MaxBytesMatched = 0;
  tmpreg = (-1);
  tmpfpu = (-1);
  mptr = 0;
  sibptr = 0;
  PrefixPriority = 0;

  /*
   * Check for an opcode prefix, and if one is found, advance
   * data by the number of bytes in the prefix
   */
  pbytes = CheckPrefix(data, &PrefixFlags);
  data += pbytes;

  /*
   * Normally we will use *data as the index to start looking
   * in our table, but there is a nasty case in which the opcode
   * itself is changed which would mean OpCodes[*data] is
   * incorrect. This is the case where a number between 0 and 7
   * is added to the opcode to reflect a register code. In these
   * special cases, we must demand that no opcode exists which
   * is less than 7 more than the opcode in question. That is,
   * the values: opcode ... opcode + 7 are off limits to all other
   * instructions, or there would be ambiguity. We can use this
   * fact to our advantage to determine the original opcode value
   * that we need in order to go to the correct spot in our table
   * to start looking.
   */
  index = (unsigned char) *data;

  while (!*OpCodes[index])
  {
    --index;
    if (((unsigned char) *data - index) > 7)
    {
      /*
       * This should never happen
       */
      Sprintf(outstr,
        "Array indices of OpCodes[] ranging from %d to %d are all NULL (looking for register code)",
        index,
        (unsigned char) *data);
      return (-1);
    }
  } /* while (!*OpCodes[index]) */

  /*
   * If this (new) index of OpCodes[] accepts a REGCODE
   * offset, it will be the difference of *data and index.
   * Otherwise, index should be reset to *data, so the below
   * loop will return an "unknown opcode" error.
   */
  unique = 1;
  if (UsesRegisterCode(OpCodes[index], &unique))
  {
    if (unique || (*data != index))
      RegisterCode = tmpreg = (unsigned char) *data - index;
  }
  else
    index = (unsigned char) *data;

  /*
   * This loop simply looks for the best opcode that matches
   * 'data' - it is stored in 'bestmatch' if found
   */
  for (OpPtr = OpCodes[index]; *OpPtr; ++OpPtr)
  {
    BetterMatch = 0;
    ExactMatch = 0;

    /*
     * We automatically know that the first byte of data matches
     * because it was used to get the array index, so set
     * BytesMatched to 1
     */
    BytesMatched = 1;

    /*
     * If there are more bytes to the opcode, we must check them
     * against the values stored in 'data' to get the best match
     */

    codeptr = (unsigned char *) (*OpPtr)->mcode;
    while (*++codeptr)
    {
      if (*codeptr == (unsigned char) *(data + BytesMatched))
      {
        ++BytesMatched;

        if (*(codeptr + 1) == '\0')
        {
          /*
           * We got an exact match - check if the 'digit' field
           * is either REGCODE or FPUCODE - if it is, it means
           * that the register/fpu code for this byte is 0
           * (since 0 was added to the byte).
           */
          if ((*OpPtr)->digit == REGCODE)
            tmpreg = 0;
          else if ((*OpPtr)->digit == FPUCODE)
            tmpfpu = 0;

          ExactMatch = 1;
        } /* if (*(codeptr + 1) == '\0') */
      }
      else if (*(codeptr + 1) == '\0')
      {
        if ((*OpPtr)->digit == REGCODE)
        {
          /*
           * If the 'digit' field of OpPtr is REGCODE *and* this
           * is the last byte of the opcode, it is possible that
           * the byte stored in 'data' is up to 7 numbers larger
           * than the opcode stored in OpPtr. This means that
           * the difference between the 'data' byte and the 'OpPtr'
           * byte is a special value corresponding to a specific
           * register. These registers are listed in Table 3.1 of
           * the Intel Instruction Set Reference. If we indeed find
           * the difference to be a number between 0 and 7, store
           * it into 'tmpreg' for future reference.
           */
          tmpreg = (unsigned char) *(data + BytesMatched) -
                   (unsigned char) *codeptr;

          if ((tmpreg >= 0) && (tmpreg <= 7))
          {
            /*
             * We have just found a good match on the last byte
             * of the opcode - therefore the data string matches
             * the stored opcode byte for byte - it is an exact
             * match
             */
            ++BytesMatched;
            ExactMatch = 1;
          }
          else
          {
            /*
             * tmpreg is larger than 8, so this opcode does not
             * satisfy the +rb/+rw/+rd nomenclature - reset
             * tmpreg and BytesMatched.
             */
            tmpreg = (-1);
            BytesMatched = 0;

            break;
          }
        } /* if ((*OpPtr)->digit == REGCODE) */
        else if ((*OpPtr)->digit == FPUCODE)
        {
          /*
           * Since this is the last byte of the opcode, and since
           * 'digit' is FPUCODE, this last byte of the opcode may
           * have been altered by adding a number from 0-7, in
           * order to reflect an fpu stack register. Figure out
           * what the difference is, and assign it to tmpfpu for
           * later use.
           */
          tmpfpu = (unsigned char) *(data + BytesMatched) -
                   (unsigned char) *codeptr;

          if ((tmpfpu >= 0) && (tmpfpu <= 7))
          {
            ++BytesMatched;
            ExactMatch = 1;
          }
          else
          {
            /*
             * tmpfpu is larger than 8, and so it cannot represent
             * a fpu stack register - reset tmpfpu and BytesMatched.
             */
            tmpfpu = (-1);
            BytesMatched = 0;

            break;
          }
        } /* if ((*OpPtr)->digit == FPUCODE) */
      }
      else
      {
        /*
         * We failed to match a byte of data's opcode against codeptr -
         * this is not a good match
         */
        BytesMatched = 0;
        break;
      }
    } /* while (*++codeptr) */

    /*if (BytesMatched < strlen((*OpPtr)->mcode))*/
    if (BytesMatched < (*OpPtr)->oplen)
    {
      /*
       * We did not match all of the necessary bytes against
       * OpPtr's opcode - bad match.
       */
      continue;
    }

    /*
     * If tmpreg has been assigned a register code value,
     * and this opcode is not expecting a register code,
     * it is a bad match. This does not happen too often,
     * but it will happen in cases of opcode ambiguity,
     * notably XCHG and NOP. XCHG ax,ax and NOP both
     * have an opcode of 0x90, so if we come across a
     * 0x91, the initial register code checking routine
     * will assign a value of 1 to tmpreg, but NOP will not
     * be expecting a register code, so we want to go to
     * the next opcode (XCHG).
     */
    if ((tmpreg != (-1)) && ((*OpPtr)->digit != REGCODE))
      continue;

    /*
     * Alternatively, if the opcode is expecting a register code
     * and we do not have one, it is a bad match. The only
     * case I can think of in which this will happen is again
     * with NOP and XCHG and we get a "66 90" to indicate
     * a 32 bit version of NOP. Since there is technically no
     * 32 bit version of NOP, we would normally move onto
     * XCHG (where there is a 32 bit version) and use it, but
     * we do not want to do that since 0x90 is specifically
     * NOP.
     */
    if ((tmpreg == (-1)) && ((*OpPtr)->digit == REGCODE))
      continue;

    mptr = 0;
    sibptr = 0;

    /*
     * If the opcode expects a ModR/M byte, check to make sure
     * that the byte matches up with the correct column in the
     * ModR/M table (as given by (*OpPtr)->digit).
     */
    if (((*OpPtr)->digit >= 0) && ((*OpPtr)->digit <= REGRM))
    {
      if (FindModSib(data, *OpPtr, BytesMatched, &mptr, &sibptr) < 0)
        continue;
    }

    /*
     * Make sure this opcode meets the prefix requirements.
     * This check should be done last, or we would have to
     * reset PrefixPriority in each check that fails afterward.
     */
    pret = MatchPrefix(*OpPtr, PrefixFlags);
    if (pret == 0)
      continue; /* bad match */

    /*
     * The PrefixPriority business is to handle cases where you
     * prefix an opcode with no operands with 0x66 (32 bit addressing).
     * If there is no 32 bit form of the opcode, we would normally
     * skip it, so this will give the opcode a low priority, so if
     * we do not find a 32 bit version of it later, we will use this
     * one.
     */
    if (pret > PrefixPriority)
    {
      PrefixPriority = pret;
      BetterMatch = 1; /* this is a better match than our previous */
    }
    else if ((pret < PrefixPriority) && !ExactMatch)
    {
      /*
       * This opcode has a lower prefix priority than the previous best match, so
       * reject it
       */
      continue;
    }

    if ((BytesMatched > MaxBytesMatched) || BetterMatch || ExactMatch)
    {
      /*
       * This opcode passed all the tests, and we have more
       * bytes matched than the previous match, so it is a
       * better match.
       */
      MaxBytesMatched = BytesMatched;
      *bestmatch = *OpPtr;
      RegisterCode = tmpreg;
      FPUCode = tmpfpu;

      if (((*OpPtr)->digit >= 0) && ((*OpPtr)->digit <= REGRM))
      {
        assert(mptr != 0);

        ModRMPtr = mptr;
        ModRMOffset = data + BytesMatched;

        if (sibptr)
        {
          SibPtr = sibptr;
          SibOffset = data + BytesMatched + 1;
        }
      }
      else
      {
        /*
         * The opcode does not require a ModR/M byte so reset
         * Mod/Sib variables in case a previous match set them to
         * certain values
         */
        ModRMOffset = 0;
        ModRMPtr = 0;
        SibOffset = 0;
        SibPtr = 0;
      }
    } /* if (BytesMatched > MaxBytesMatched) */
  } /* for (OpPtr = OpCodes[index]; *OpPtr; ++OpPtr) */

  if (!MaxBytesMatched)
    return (0);
  else
    return ((long) MaxBytesMatched + (long) pbytes);
} /* FindOpcode() */

/*
UsesRegisterCode()
  Determine if any indices of 'codes' accepts a register code.

Inputs: codes  - array of opcodes
        unique - set to 0 if there is a non-REGCODE opcode in the
                 list
Return: 1 if so
        0 if not
*/

static int
UsesRegisterCode(struct OpCode **codes, int *unique)

{
  struct OpCode **tmp;

  for (tmp = codes; *tmp; ++tmp)
  {
    if ((*tmp)->digit == REGCODE)
      return (1);
    else
      *unique = 0;
  }

  return (0);
} /* UsesRegisterCode() */
