/***************************************************************************
  TWOFISH.C  -- C API calls for TWOFISH AES submission

  Submitters:
    Bruce Schneier, Counterpane Systems
    Doug Whiting,  Hi/fn
    John Kelsey,  Counterpane Systems
    Chris Hall,    Counterpane Systems
    David Wagner,  UC Berkeley
      
  Code Author:    Doug Whiting,  Hi/fn
    
  Version  1.00    April 1998
    
  Copyright 1998, Hi/fn and Counterpane Systems.  All rights reserved.
    
  Notes:
    *  Pedagogical version (non-optimized)
    *  Tab size is set to 4 characters in this file

***************************************************************************/

#include  "aes.h"
#include  "table.h"

/*
+*****************************************************************************
*      Constants/Macros/Tables
-****************************************************************************/

#define    VALIDATE_PARMS  1    /* nonzero --> check all parameters */
#define    FEISTEL      0    /* nonzero --> use Feistel version (slow) */

int  tabEnable=0;          /* are we gathering stats? */
BYTE tabUsed[256];          /* one bit per table */

#if FEISTEL
CONST    char *moduleDescription="Pedagogical C code (Feistel)";
#else
CONST    char *moduleDescription="Pedagogical C code";
#endif
CONST    char *modeString = "";

#define  P0_USED    0x01
#define  P1_USED    0x02
#define  B0_USED    0x04
#define  B1_USED    0x08
#define  B2_USED    0x10
#define  B3_USED    0x20
#define  ALL_USED  0x3F

/* number of rounds for various key sizes: 128, 192, 256 */
int      numRounds[4]= {0,ROUNDS_128,ROUNDS_192,ROUNDS_256};

#ifndef  DEBUG
#ifdef GetCodeSize
#define  DEBUG  1          /* force debug */
#endif
#endif
#include  "debug.h"        /* debug display macros */

#ifdef GetCodeSize
extern DWORD Here(DWORD x);      /* return caller's address! */
DWORD TwofishCodeStart(void) { return Here(0); };
#endif

/*
+*****************************************************************************
*
* Function Name:  TableOp
*
* Function:      Handle table use checking
*
* Arguments:    op  =  what to do  (see TAB_* defns in AES.H)
*
* Return:      TRUE --> done (for TAB_QUERY)    
*
* Notes: This routine is for use in generating the tables KAT file.
*
-****************************************************************************/
int TableOp(int op)
  {
  static int queryCnt=0;
  int i;
  switch (op)
    {
    case TAB_DISABLE:
      tabEnable=0;
      break;
    case TAB_ENABLE:
      tabEnable=1;
      break;
    case TAB_RESET:
      queryCnt=0;
      for (i=0;i<256;i++)
        tabUsed[i]=0;
      break;
    case TAB_QUERY:
      queryCnt++;
      for (i=0;i<256;i++)
        if (tabUsed[i] != ALL_USED)
          return FALSE;
      if (queryCnt < TAB_MIN_QUERY)  /* do a certain minimum number */
        return FALSE;
      break;
    }
  return TRUE;
  }


/*
+*****************************************************************************
*
* Function Name:  ParseHexDword
*
* Function:      Parse ASCII hex nibbles and fill in key/iv dwords
*
* Arguments:    bit      =  # bits to read
*          srcTxt    =  ASCII source
*          d      =  ptr to dwords to fill in
*          dstTxt    =  where to make a copy of ASCII source
*                  (NULL ok)
*
* Return:      Zero if no error.  Nonzero --> invalid hex or length
*
* Notes:  Note that the parameter d is a DWORD array, not a byte array.
*  This routine is coded to work both for little-endian and big-endian
*  architectures.  The character stream is interpreted as a LITTLE-ENDIAN
*  byte stream, since that is how the Pentium works, but the conversion
*  happens automatically below. 
*
-****************************************************************************/
int ParseHexDword(int bits,CONST char *srcTxt,DWORD *d,char *dstTxt)
  {
  int i;
  DWORD b;
  char c;
#if ALIGN32
  char alignDummy[3];  /* keep dword alignment */
#endif

  union  /* make sure LittleEndian is defined correctly */
    {
    BYTE  b[4];
    DWORD d[1];
    } v;
  v.d[0]=1;
  if (v.b[0 ^ ADDR_XOR] != 1)  /* sanity check on compile-time switch */
    return BAD_ENDIAN;

#if VALIDATE_PARMS
  #if ALIGN32
  if (((int)d) & 3)
    return BAD_ALIGN32;
  #endif
#endif

  for (i=0;i*32= '0') && (c <= '9'))
      b=c-'0';
    else if ((c >= 'a') && (c <= 'f'))
      b=c-'a'+10;
    else if ((c >= 'A') && (c <= 'F'))
      b=c-'A'+10;
    else
      return BAD_KEY_MAT;  /* invalid hex character */
    /* works for big and little endian! */
    d[i/8] |= b << (4*((i^1)&7));    
    }

  return 0;          /* no error */
  }


/*
+*****************************************************************************
*
* Function Name:  f32
*
* Function:      Run four bytes through keyed S-boxes and apply MDS matrix
*
* Arguments:    x      =  input to f function
*          k32      =  pointer to key dwords
*          keyLen    =  total key length (k32 --> keyLey/2 bits)
*
* Return:      The output of the keyed permutation applied to x.
*
* Notes:
*  This function is a keyed 32-bit permutation.  It is the major building
*  block for the Twofish round function, including the four keyed 8x8 
*  permutations and the 4x4 MDS matrix multiply.  This function is used
*  both for generating round subkeys and within the round function on the
*  block being encrypted.  
*
*  This version is fairly slow and pedagogical, although a smartcard would
*  probably perform the operation exactly this way in firmware.   For
*  ultimate performance, the entire operation can be completed with four
*  lookups into four 256x32-bit tables, with three dword xors.
*
*  The MDS matrix is defined in TABLE.H.  To multiply by Mij, just use the
*  macro Mij(x).
*
-****************************************************************************/
DWORD f32(DWORD x,CONST DWORD *k32,int keyLen)
  {
  BYTE  b[4];
  
  /* Run each byte thru 8x8 S-boxes, xoring with key byte at each stage. */
  /* Note that each byte goes through a different combination of S-boxes.*/

  *((DWORD *)b) = Bswap(x);  /* make b[0] = LSB, b[3] = MSB */
  switch (((keyLen + 63)/64) & 3)
    {
    case 0:    /* 256 bits of key */
      b[0] = p8(04)[b[0]] ^ b0(k32[3]);
      b[1] = p8(14)[b[1]] ^ b1(k32[3]);
      b[2] = p8(24)[b[2]] ^ b2(k32[3]);
      b[3] = p8(34)[b[3]] ^ b3(k32[3]);
      /* fall thru, having pre-processed b[0]..b[3] with k32[3] */
    case 3:    /* 192 bits of key */
      b[0] = p8(03)[b[0]] ^ b0(k32[2]);
      b[1] = p8(13)[b[1]] ^ b1(k32[2]);
      b[2] = p8(23)[b[2]] ^ b2(k32[2]);
      b[3] = p8(33)[b[3]] ^ b3(k32[2]);
      /* fall thru, having pre-processed b[0]..b[3] with k32[2] */
    case 2:    /* 128 bits of key */
      b[0] = p8(00)[p8(01)[p8(02)[b[0]] ^ b0(k32[1])] ^ b0(k32[0])];
      b[1] = p8(10)[p8(11)[p8(12)[b[1]] ^ b1(k32[1])] ^ b1(k32[0])];
      b[2] = p8(20)[p8(21)[p8(22)[b[2]] ^ b2(k32[1])] ^ b2(k32[0])];
      b[3] = p8(30)[p8(31)[p8(32)[b[3]] ^ b3(k32[1])] ^ b3(k32[0])];
    }

  if (tabEnable)
    {  /* we could give a "tighter" bound, but this works acceptably well */
    tabUsed[b0(x)] |= (P_00 == 0) ? P0_USED : P1_USED;
    tabUsed[b1(x)] |= (P_10 == 0) ? P0_USED : P1_USED;
    tabUsed[b2(x)] |= (P_20 == 0) ? P0_USED : P1_USED;
    tabUsed[b3(x)] |= (P_30 == 0) ? P0_USED : P1_USED;

    tabUsed[b[0] ] |= B0_USED;
    tabUsed[b[1] ] |= B1_USED;
    tabUsed[b[2] ] |= B2_USED;
    tabUsed[b[3] ] |= B3_USED;
    }

  /* Now perform the MDS matrix multiply inline. */
  return  ((M00(b[0]) ^ M01(b[1]) ^ M02(b[2]) ^ M03(b[3]))    ) ^
      ((M10(b[0]) ^ M11(b[1]) ^ M12(b[2]) ^ M13(b[3])) <<  8) ^
      ((M20(b[0]) ^ M21(b[1]) ^ M22(b[2]) ^ M23(b[3])) << 16) ^
      ((M30(b[0]) ^ M31(b[1]) ^ M32(b[2]) ^ M33(b[3])) << 24) ;
  }

/*
+*****************************************************************************
*
* Function Name:  RS_MDS_Encode
*
* Function:      Use (12,8) Reed-Solomon code over GF(256) to produce
*          a key S-box dword from two key material dwords.
*
* Arguments:    k0  =  1st dword
*          k1  =  2nd dword
*
* Return:      Remainder polynomial generated using RS code
*
* Notes:
*  Since this computation is done only once per reKey per 64 bits of key,
*  the performance impact of this routine is imperceptible. The RS code
*  chosen has "simple" coefficients to allow smartcard/hardware implementation
*  without lookup tables.
*
-****************************************************************************/
DWORD RS_MDS_Encode(DWORD k0,DWORD k1)
  {
  int i,j;
  DWORD r;

  for (i=r=0;i<2;i++)
    {
    r ^= (i) ? k0 : k1;      /* merge in 32 more key bits */
    for (j=0;j<4;j++)      /* shift one byte at a time */
      RS_rem(r);        
    }
  return r;
  }

/*
+*****************************************************************************
*
* Function Name:  reKey
*
* Function:      Initialize the Twofish key schedule from key32
*
* Arguments:    key      =  ptr to keyInstance to be initialized
*
* Return:      TRUE on success
*
* Notes:
*  Here we precompute all the round subkeys, although that is not actually
*  required.  For example, on a smartcard, the round subkeys can 
*  be generated on-the-fly  using f32()
*
-****************************************************************************/
int reKey(keyInstance *key)
  {
  int    i,k64Cnt;
  int    keyLen    = key->keyLen;
  int    subkeyCnt = ROUND_SUBKEYS + 2*key->numRounds;
  DWORD  A,B;
  DWORD  k32e[MAX_KEY_BITS/64],k32o[MAX_KEY_BITS/64]; /* even/odd key dwords */

#if VALIDATE_PARMS
  #if ALIGN32
  if ((((int)key) & 3) || (((int)key->key32) & 3))
    return BAD_ALIGN32;
  #endif
  if ((key->keyLen % 64) || (key->keyLen < MIN_KEY_BITS))
    return BAD_KEY_INSTANCE;
  if (subkeyCnt > TOTAL_SUBKEYS)
    return BAD_KEY_INSTANCE;
#endif

  k64Cnt=(keyLen+63)/64;    /* round up to next multiple of 64 bits */
  for (i=0;ikey32[2*i  ];
    k32o[i]=key->key32[2*i+1];
    /* compute S-box keys using (12,8) Reed-Solomon code over GF(256) */
    key->sboxKeys[k64Cnt-1-i]=RS_MDS_Encode(k32e[i],k32o[i]); /* reverse order */
    }

  for (i=0;isubKeys[2*i  ] = A+  B;      /* combine with a PHT */
    key->subKeys[2*i+1] = ROL(A+2*B,SK_ROTL);
    }

  DebugDumpKey(key);

  return TRUE;
  }
/*
+*****************************************************************************
*
* Function Name:  makeKey
*
* Function:      Initialize the Twofish key schedule
*
* Arguments:    key      =  ptr to keyInstance to be initialized
*          direction  =  DIR_ENCRYPT or DIR_DECRYPT
*          keyLen    =  # bits of key text at *keyMaterial
*          keyMaterial  =  ptr to hex ASCII chars representing key bits
*
* Return:      TRUE on success
*          else error code (e.g., BAD_KEY_DIR)
*
* Notes:
*  This parses the key bits from keyMaterial.  No crypto stuff happens here.
*  The function reKey() is called to actually build the key schedule after
*  the keyMaterial has been parsed.
*
-****************************************************************************/
int makeKey(keyInstance *key, BYTE direction, int keyLen,CONST char *keyMaterial)
  {
  int i;

#if VALIDATE_PARMS        /* first, sanity check on parameters */
  if (key == NULL)      
    return BAD_KEY_INSTANCE;/* must have a keyInstance to initialize */
  if ((direction != DIR_ENCRYPT) && (direction != DIR_DECRYPT))
    return BAD_KEY_DIR;    /* must have valid direction */
  if ((keyLen > MAX_KEY_BITS) || (keyLen < 8))  
    return BAD_KEY_MAT;    /* length must be valid */
  key->keySig = VALID_SIG;  /* show that we are initialized */
  #if ALIGN32
  if ((((int)key) & 3) || (((int)key->key32) & 3))
    return BAD_ALIGN32;
  #endif
#endif

  key->direction  = direction;  /* set our cipher direction */
  key->keyLen    = (keyLen+63) & ~63;    /* round up to multiple of 64 */
  key->numRounds  = numRounds[(keyLen-1)/64];
  for (i=0;ikey32[i]=0;
  key->keyMaterial[MAX_KEY_SIZE]=0;  /* terminate ASCII string */

  if ((keyMaterial == NULL) || (keyMaterial[0]==0))
    return TRUE;      /* allow a "dummy" call */
    
  if (ParseHexDword(keyLen,keyMaterial,key->key32,key->keyMaterial))
    return BAD_KEY_MAT;  

  return reKey(key);      /* generate round subkeys */
  }


/*
+*****************************************************************************
*
* Function Name:  cipherInit
*
* Function:      Initialize the Twofish cipher in a given mode
*
* Arguments:    cipher    =  ptr to cipherInstance to be initialized
*          mode    =  MODE_ECB, MODE_CBC, or MODE_CFB1
*          IV      =  ptr to hex ASCII test representing IV bytes
*
* Return:      TRUE on success
*          else error code (e.g., BAD_CIPHER_MODE)
*
-****************************************************************************/
int cipherInit(cipherInstance *cipher, BYTE mode,CONST char *IV)
  {
  int i;
#if VALIDATE_PARMS        /* first, sanity check on parameters */
  if (cipher == NULL)      
    return BAD_PARAMS;    /* must have a cipherInstance to initialize */
  if ((mode != MODE_ECB) && (mode != MODE_CBC) && (mode != MODE_CFB1))
    return BAD_CIPHER_MODE;  /* must have valid cipher mode */
  cipher->cipherSig  =  VALID_SIG;
  #if ALIGN32
  if ((((int)cipher) & 3) || (((int)cipher->IV) & 3) || (((int)cipher->iv32) & 3))
    return BAD_ALIGN32;
  #endif
#endif

  if ((mode != MODE_ECB) && (IV))  /* parse the IV */
    {
    if (ParseHexDword(BLOCK_SIZE,IV,cipher->iv32,NULL))
      return BAD_IV_MAT;
    for (i=0;iIV)[i] = Bswap(cipher->iv32[i]);
    }

  cipher->mode    =  mode;

  return TRUE;
  }

/*
+*****************************************************************************
*
* Function Name:  blockEncrypt
*
* Function:      Encrypt block(s) of data using Twofish
*
* Arguments:    cipher    =  ptr to already initialized cipherInstance
*          key      =  ptr to already initialized keyInstance
*          input    =  ptr to data blocks to be encrypted
*          inputLen  =  # bits to encrypt (multiple of blockSize)
*          outBuffer  =  ptr to where to put encrypted blocks
*
* Return:      # bits ciphered (>= 0)
*          else error code (e.g., BAD_CIPHER_STATE, BAD_KEY_MATERIAL)
*
* Notes: The only supported block size for ECB/CBC modes is BLOCK_SIZE bits.
*     If inputLen is not a multiple of BLOCK_SIZE bits in those modes,
*     an error BAD_INPUT_LEN is returned.  In CFB1 mode, all block 
*     sizes can be supported.
*
-****************************************************************************/
int blockEncrypt(cipherInstance *cipher, keyInstance *key,CONST BYTE *input,
        int inputLen, BYTE *outBuffer)
  {
  int   i,n,r;          /* loop variables */
  DWORD x[BLOCK_SIZE/32];      /* block being encrypted */
  DWORD t0,t1,tmp;        /* temp variables */
  int    rounds=key->numRounds;  /* number of rounds */
  BYTE  bit,ctBit,carry;      /* temps for CFB */
#if ALIGN32
  BYTE alignDummy;        /* keep 32-bit variable alignment on stack */
#endif

#if VALIDATE_PARMS
  if ((cipher == NULL) || (cipher->cipherSig != VALID_SIG))
    return BAD_CIPHER_STATE;
  if ((key == NULL) || (key->keySig != VALID_SIG))
    return BAD_KEY_INSTANCE;
  if ((rounds < 2) || (rounds > MAX_ROUNDS) || (rounds&1))
    return BAD_KEY_INSTANCE;
  if ((cipher->mode != MODE_CFB1) && (inputLen % BLOCK_SIZE))
    return BAD_INPUT_LEN;
  #if ALIGN32
  if ( (((int)cipher) & 3) || (((int)key      ) & 3) ||
     (((int)input ) & 3) || (((int)outBuffer) & 3))
    return BAD_ALIGN32;
  #endif
#endif

  if (cipher->mode == MODE_CFB1)
    {  /* use recursion here to handle CFB, one block at a time */
    cipher->mode = MODE_ECB;  /* do encryption in ECB */
    for (n=0;nIV,BLOCK_SIZE,(BYTE *)x);
      bit    = 0x80 >> (n & 7);/* which bit position in byte */
      ctBit = (input[n/8] & bit) ^ ((((BYTE *) x)[0] & 0x80) >> (n&7));
      outBuffer[n/8] = (outBuffer[n/8] & ~ bit) | ctBit;
      carry = ctBit >> (7 - (n&7));
      for (i=BLOCK_SIZE/8-1;i>=0;i--)
        {
        bit = cipher->IV[i] >> 7;  /* save next "carry" from shift */
        cipher->IV[i] = (cipher->IV[i] << 1) ^ carry;
        carry = bit;
        }
      }
    cipher->mode = MODE_CFB1;  /* restore mode for next time */
    return inputLen;
    }

  /* here for ECB, CBC modes */
  for (n=0;nmode == MODE_CBC)
      DebugDump(cipher->iv32,"",IV_ROUND,0,0,0,0);
#endif
    for (i=0;isubKeys[INPUT_WHITEN+i];
      if (cipher->mode == MODE_CBC)
        x[i] ^= cipher->iv32[i];
      }

    DebugDump(x,"",0,0,0,0,0);
    for (r=0;rsboxKeys,key->keyLen);
      t1   = f32(ROL(x[1],8+(r+1)/2),key->sboxKeys,key->keyLen);
                    /* PHT, round keys */
      x[2]^= ROL(t0 +   t1 + key->subKeys[ROUND_SUBKEYS+2*r  ], r    /2);
      x[3]^= ROR(t0 + 2*t1 + key->subKeys[ROUND_SUBKEYS+2*r+1],(r+2) /2);

      DebugDump(x,"",r+1,2*(r&1),1,1,0);
#else
      t0   = f32(    x[0]   ,key->sboxKeys,key->keyLen);
      t1   = f32(ROL(x[1],8),key->sboxKeys,key->keyLen);

      x[3] = ROL(x[3],1);
      x[2]^= t0 +   t1 + key->subKeys[ROUND_SUBKEYS+2*r  ]; /* PHT, round keys */
      x[3]^= t0 + 2*t1 + key->subKeys[ROUND_SUBKEYS+2*r+1];
      x[2] = ROR(x[2],1);

      DebugDump(x,"",r+1,2*(r&1),0,1,0);/* make format compatible with optimized code */
#endif
      if (r < rounds-1)            /* swap for next round */
        {
        tmp = x[0]; x[0]= x[2]; x[2] = tmp;
        tmp = x[1]; x[1]= x[3]; x[3] = tmp;
        }
      }
#if FEISTEL
    x[0] = ROR(x[0],8);                     /* "final permutation" */
    x[1] = ROL(x[1],8);
    x[2] = ROR(x[2],8);
    x[3] = ROL(x[3],8);
#endif
    for (i=0;isubKeys[OUTPUT_WHITEN+i]);
      if (cipher->mode == MODE_CBC)
        cipher->iv32[i] = Bswap(((DWORD *)outBuffer)[i]);
      }
#ifdef DEBUG
    DebugDump(outBuffer,"",rounds+1,0,0,0,1);
    if (cipher->mode == MODE_CBC)
      DebugDump(cipher->iv32,"",IV_ROUND,0,0,0,0);
#endif
    }

  return inputLen;
  }

/*
+*****************************************************************************
*
* Function Name:  blockDecrypt
*
* Function:      Decrypt block(s) of data using Twofish
*
* Arguments:    cipher    =  ptr to already initialized cipherInstance
*          key      =  ptr to already initialized keyInstance
*          input    =  ptr to data blocks to be decrypted
*          inputLen  =  # bits to encrypt (multiple of blockSize)
*          outBuffer  =  ptr to where to put decrypted blocks
*
* Return:      # bits ciphered (>= 0)
*          else error code (e.g., BAD_CIPHER_STATE, BAD_KEY_MATERIAL)
*
* Notes: The only supported block size for ECB/CBC modes is BLOCK_SIZE bits.
*     If inputLen is not a multiple of BLOCK_SIZE bits in those modes,
*     an error BAD_INPUT_LEN is returned.  In CFB1 mode, all block 
*     sizes can be supported.
*
-****************************************************************************/
int blockDecrypt(cipherInstance *cipher, keyInstance *key,CONST BYTE *input,
        int inputLen, BYTE *outBuffer)
  {
  int   i,n,r;          /* loop counters */
  DWORD x[BLOCK_SIZE/32];      /* block being encrypted */
  DWORD t0,t1;          /* temp variables */
  int    rounds=key->numRounds;  /* number of rounds */
  BYTE  bit,ctBit,carry;      /* temps for CFB */
#if ALIGN32
  BYTE alignDummy;        /* keep 32-bit variable alignment on stack */
#endif

#if VALIDATE_PARMS
  if ((cipher == NULL) || (cipher->cipherSig != VALID_SIG))
    return BAD_CIPHER_STATE;
  if ((key == NULL) || (key->keySig != VALID_SIG))
    return BAD_KEY_INSTANCE;
  if ((rounds < 2) || (rounds > MAX_ROUNDS) || (rounds&1))
    return BAD_KEY_INSTANCE;
  if ((cipher->mode != MODE_CFB1) && (inputLen % BLOCK_SIZE))
    return BAD_INPUT_LEN;
  #if ALIGN32
  if ( (((int)cipher) & 3) || (((int)key      ) & 3) ||
     (((int)input)  & 3) || (((int)outBuffer) & 3))
    return BAD_ALIGN32;
  #endif
#endif

  if (cipher->mode == MODE_CFB1)
    {  /* use blockEncrypt here to handle CFB, one block at a time */
    cipher->mode = MODE_ECB;  /* do encryption in ECB */
    for (n=0;nIV,BLOCK_SIZE,(BYTE *)x);
      bit    = 0x80 >> (n & 7);
      ctBit = input[n/8] & bit;
      outBuffer[n/8] = (outBuffer[n/8] & ~ bit) |
               (ctBit ^ ((((BYTE *) x)[0] & 0x80) >> (n&7)));
      carry = ctBit >> (7 - (n&7));
      for (i=BLOCK_SIZE/8-1;i>=0;i--)
        {
        bit = cipher->IV[i] >> 7;  /* save next "carry" from shift */
        cipher->IV[i] = (cipher->IV[i] << 1) ^ carry;
        carry = bit;
        }
      }
    cipher->mode = MODE_CFB1;  /* restore mode for next time */
    return inputLen;
    }

  /* here for ECB, CBC modes */
  for (n=0;nsubKeys[OUTPUT_WHITEN+i];

    for (r=rounds-1;r>=0;r--)      /* main Twofish decryption loop */
      {
      t0   = f32(    x[0]   ,key->sboxKeys,key->keyLen);
      t1   = f32(ROL(x[1],8),key->sboxKeys,key->keyLen);

      DebugDump(x,"",r+1,2*(r&1),0,1,0);/* make format compatible with optimized code */
      x[2] = ROL(x[2],1);
      x[2]^= t0 +   t1 + key->subKeys[ROUND_SUBKEYS+2*r  ]; /* PHT, round keys */
      x[3]^= t0 + 2*t1 + key->subKeys[ROUND_SUBKEYS+2*r+1];
      x[3] = ROR(x[3],1);

      if (r)                  /* unswap, except for last round */
        {
        t0   = x[0]; x[0]= x[2]; x[2] = t0;  
        t1   = x[1]; x[1]= x[3]; x[3] = t1;
        }
      }
    DebugDump(x,"",0,0,0,0,0);/* make final output match encrypt initial output */

    for (i=0;isubKeys[INPUT_WHITEN+i];
      if (cipher->mode == MODE_CBC)
        {
        x[i] ^= cipher->iv32[i];
        cipher->iv32[i] = Bswap(((DWORD *)input)[i]);
        }
      ((DWORD *)outBuffer)[i] = Bswap(x[i]);
      }
    DebugDump(outBuffer,"",-1,0,0,0,1);
    }

  return inputLen;
  }


#ifdef GetCodeSize
DWORD TwofishCodeSize(void) { return Here(0)-TwofishCodeStart(); };
#endif