/* Copyright (C) 2001 John J. Chew, III jjchew@math.utoronto.ca */
/* worst case evaluation of prefix reversal sorting */
/* see N.J.A. Sloane "My Favorite Integer Sequences" */

#include <stdio.h>
#include <stdlib.h>
#include <strings.h>

#define INTS_ARE_64_BITS

#ifdef OLD_CODE
#define GETBIT(base,bitnumber) (((base)[(bitnumber)>>3]) & g_bit_masks[(bitnumber) & 7])
#define SETBIT(base,bitnumber) ((base)[(bitnumber)>>3]) |= g_bit_masks[(bitnumber) & 7]
#define CLRBIT(base,bitnumber) ((base)[(bitnumber)>>3]) &= ^g_bit_masks[(bitnumber) & 7]
#endif

#define GETNIBLET(base,nibletnumber) (((base)[(nibletnumber)>>2]) >> (((nibletnumber)&3)*2) & 3)

/* MAX_FACTORIAL is the largest number whose factorial will fit into a word */
/* FACTORIAL_TYPE is an integer type that will hold -1..MAX_FACTORIAL */
#ifdef INTS_ARE_64_BITS
#define MAX_FACTORIAL 20
#define FACTORIAL_TYPE long long
#else
#define MAX_FACTORIAL 12
#define FACTORIAL_TYPE int
#endif

#ifdef DEBUG
#define IFDEBUG(x) (x)
#else
#define IFDEBUG(x) /**/
#endif

void DoN(int);
void Initialise_Data(void);
void KnuthPDecode(int, FACTORIAL_TYPE, int *);
FACTORIAL_TYPE KnuthPEncode(int, int *);
int main();
#ifdef OLD_CODE
void print_bitmap(FACTORIAL_TYPE, unsigned char *);
#endif
void print_nibletmap(FACTORIAL_TYPE, unsigned char *);
void print_permutation(int, int *);

int g_bit_masks[8] = { 1, 2, 4, 8, 16, 32, 64, 128 };
int g_niblet_masks[4] = { 0x03, 0x0C, 0x30, 0xC0 };
int g_niblet_names[4] = { '.', 'X', '=', '+' };

/* a precalculated row of a state transition matrix ,
 * does what needs to be done when depth++
 */
int g_change_state_to_increment_depth[256];

/* precalculated index of lowest-order niblet equal to 10 */
int g_first_code_at_this_level[256];

/* g_factorial gives precalculated values for all the factorials that we can handle */
FACTORIAL_TYPE g_factorial[MAX_FACTORIAL+1] 
  = { 1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800, 39916800, 479001600 
#ifdef INTS_ARE_64_BITS
  , 6227020800, 87178291200, 1307674368000, 20922789888000,
  355687428096000, 6402373705728000, 121645100408832000L
#endif
  };

#ifdef OLD_CODE
unsigned char g_clear_lowest_bit[256];
int g_lowest_bit[256];
#endif

/* a precalculated row of a state transition matrix ,
 * changes lowest 10 (depth) to 01 (done)
 */
int g_mark_lowest_as_done[256];

/* DoN(int n)
 *
 * Calculate the maximum number of prefix reversals necessary
 * to sort an arbitrarily ordered stack of n pancakes
 */
void DoN(int n) {
  /*** variable declarations ***/

  /* n_factorial:= n! for the sake of clarity and efficiency */
  FACTORIAL_TYPE n_factorial;

  /* state[] represents the state of each permutation (indexed by
     Knuth P-value) as follows:

     00: permutation not yet seen
     01: permutation has already been processed
     10: permutation has depth equal to current depth and needs processing
     11: permutation has depth greater than current depth
  */
  unsigned char *state;

  /* size of state[] */
  int state_size;

  /* state_end points one past the end of state */
  unsigned char *state_end;

  /* permutation_being_looked_at is an array of ints */
  int *permutation_being_looked_at;

  /* how many permutation codes we have left to look at in total */
  FACTORIAL_TYPE codes_remaining;

  /* how many permutation codes we have left at this depth */
  FACTORIAL_TYPE number_of_codes_being_looked_at;

  /* how many permutation codes to look at at depth+1 */
  FACTORIAL_TYPE number_of_codes_to_look_at_later;

  /* cursor used to scan state looking for unprocessed data */
  register unsigned char *state_cursor;

  /* current search depth */
  int depth;

  /* initialise data structures */
  if (n > MAX_FACTORIAL) 
    { fputs("n! is bigger than an int.\n", stderr); exit(__LINE__); }
  n_factorial = g_factorial[n];
  state_size = n_factorial/4 + 1;
  state = calloc(state_size, 1);
  if (!state) { 
    fprintf(stderr, "Out of memory allocating state vector (asked for %d!/4+1 bytes).\n", n);
    exit(__LINE__); 
    }
  state[0] = 3;
  codes_remaining = n_factorial - 1;
  number_of_codes_being_looked_at = 0;
  state_end = state + state_size;
  number_of_codes_to_look_at_later = 1;
  depth = 0;
  permutation_being_looked_at = calloc(n, sizeof(int));
  if (!permutation_being_looked_at) {
    fprintf(stderr, "Out of memory allocating permutation vector (asked for %d bytes).\n", n);
    exit(__LINE__); 
    }

#ifdef DEBUG
  KnuthPDecode(n, 0, permutation_being_looked_at);
  print_permutation(n, permutation_being_looked_at);
  printf(" takes %d flip(s).\n", depth);
#endif

  while (codes_remaining > 0) {
    FACTORIAL_TYPE code_being_looked_at = -1;
    int prefix_length;
    
    /* if we've run out of codes of permutations at this depth */
    if (number_of_codes_being_looked_at <= 0) {
#ifdef INTS_ARE_64_BITS
      printf("n=%d m=%d count=%lld\n", n, depth, number_of_codes_to_look_at_later);
#else
      printf("n=%d m=%d count=%d\n", n, depth, number_of_codes_to_look_at_later);
#endif
      /* increment depth */
      depth++;
      IFDEBUG(printf("Depth is now: %d\n", depth));

      /* adjust state vector:
       *
       * 00 unseen  -> 00 unseen
       * 01 done    -> 01 done
       * 10 depth   -> 01 done
       * 11 depth+1 -> 10 depth
       */
      for (state_cursor = state; state_cursor < state_end; state_cursor++) 
        { *state_cursor = g_change_state_to_increment_depth[*state_cursor]; }

      /* move counts around */
      number_of_codes_being_looked_at = number_of_codes_to_look_at_later;
#ifdef DEBUG
      if (number_of_codes_being_looked_at <= 0) {
        fprintf(stderr, "Ran out of codes to look at later at depth %d.\n",
	  depth);
	exit(__LINE__);
        }
#endif
      number_of_codes_to_look_at_later = 0;

      /* point cursor at beginning of state vector again */
      state_cursor = state;
      }

    IFDEBUG(fputs("    state: ", stdout));
    IFDEBUG(print_nibletmap(n_factorial, state));
    IFDEBUG(printf(" (%d to do)\n", number_of_codes_being_looked_at));

    /* find next code to look at */
    while (state_cursor < state_end) {
      register unsigned char this_byte = *state_cursor;
      code_being_looked_at = g_first_code_at_this_level[this_byte];
      if (code_being_looked_at >= 0) {
        code_being_looked_at += ((state_cursor-state)<<2);
        IFDEBUG(printf("Found code %d at byte %d bit %d.\n",
	  code_being_looked_at, (state_cursor-state),
	  g_first_code_at_this_level[this_byte]));
	*state_cursor = g_mark_lowest_as_done[this_byte];
	number_of_codes_being_looked_at--;
	break;
        }
      else { 
        state_cursor++; 
#ifdef DEBUG
	if (state_cursor >= state_end) {
	  fputs("Oops: state_cursor ran off the end of state.\n", stderr);
	  exit(__LINE__);
	  }
#endif
	}
      } /* while (state_cursor < state_end) */
#ifdef DEBUG
    if (code_being_looked_at < 0) {
      fputs("Oops: couldn't find a code to look at.\n", stderr);
      exit(__LINE__);
      }
    else if (code_being_looked_at >= n_factorial) {
      fprintf(stderr, "Oops: next code to look at is too big: %d >= %d.\n",
        code_being_looked_at, n_factorial);
      exit(__LINE__);
      }
    printf("Looking at code: %d\n", code_being_looked_at);
#endif

    /* decode Knuth integer representation of permutation */
    KnuthPDecode(n, code_being_looked_at, permutation_being_looked_at);
    IFDEBUG(fputs("  ", stdout));
    IFDEBUG(print_permutation(n, permutation_being_looked_at));
    IFDEBUG(fputs("\n", stdout));

    /* for each possible prefix reversal */
    for (prefix_length = 2; prefix_length <= n; prefix_length++) {
      int flipped_permutation[MAX_FACTORIAL];
      FACTORIAL_TYPE flipped_code;
      int i;

      /* calculate the prefix-reversed permutation */
      for (i=0; i<prefix_length; i++) {
        flipped_permutation[i] = permutation_being_looked_at[prefix_length-1-i];
        }
      while (i<n) {
        flipped_permutation[i] = permutation_being_looked_at[i];
	i++; 
	}
      
      /* calculate the Knuth code of the flipped permutation */
      flipped_code = KnuthPEncode(n, flipped_permutation);
      IFDEBUG(printf("    %d = ", flipped_code));
      IFDEBUG(print_permutation(n, flipped_permutation));
      IFDEBUG(fputs("\n", stdout));

      {
      unsigned char *this_code_state = state + (flipped_code >> 2);
      int this_code_niblet = flipped_code & 3;
      int this_code_mask = g_niblet_masks[this_code_niblet];
      /* if we haven't seen it before */
      if (!(*this_code_state & this_code_mask)) { /* state 00 -> unseen */

        /* one less code to look at */
        codes_remaining--;

	/* make a note to look at it later */
	*this_code_state |= 3 << (this_code_niblet*2); /* state 11 -> later */
	number_of_codes_to_look_at_later++;
        IFDEBUG(print_permutation(n, flipped_permutation));
	IFDEBUG(printf(" takes %d flip(s).\n", depth));
        IFDEBUG(printf("    %d is now seen.\n", flipped_code));
	IFDEBUG(fputs("    state: ", stdout));
	IFDEBUG(print_nibletmap(n_factorial, state));
	IFDEBUG(printf(" (%d later)\n", number_of_codes_to_look_at_later));
        }
      /* if we have seen the flipped permutation before */
      else {
        IFDEBUG(printf("    %d was seen.\n", flipped_code));
	IFDEBUG(fputs("    state: ", stdout));
	IFDEBUG(print_nibletmap(n_factorial, state));
	IFDEBUG(fputs("\n", stdout));
        }
      } /* unsigned char *this_code_state */
      }
    
    } /* while (codes_remaining > 0 */
#ifdef INTS_ARE_64_BITS
  printf("n=%d m=%d count=%lld\n", n, depth, number_of_codes_to_look_at_later);
#else
  printf("n=%d m=%d count=%d\n", n, depth, number_of_codes_to_look_at_later);
#endif

  free(permutation_being_looked_at);
  free(state);
  } /* DoN */

void KnuthPDecode(int n, FACTORIAL_TYPE code, int *permutation) {
  register int i;
  register int *p;
  register unsigned char *q;
  unsigned char letters_used[MAX_FACTORIAL];
  FACTORIAL_TYPE index, place;

  p = permutation; q = letters_used; 
  for (i=1; i<=n; i++) { *p++ = i; *q++ = 0; }

  place = g_factorial[n];
  p = permutation;
  for (i=n; i>0; i--) {
    place /= i;
    index = code/place;
    code -= index*place;
#if 0
    printf("j=%d code=%d index=%d place=%d\n", i, code, index, place);   
#endif
    q = letters_used;
    while (index-- >= 0) {
      while (*q++) { }
      }
    *p++ = q - letters_used;
    *--q = 1;
    }
  }

FACTORIAL_TYPE KnuthPEncode (int n, int *permutation) {
  FACTORIAL_TYPE place = n;
  FACTORIAL_TYPE encoding = 0;
  int i, j;

  for (i=0; i<n; i++) {
    int this_letter = permutation[i];
    encoding *= place--;
    for (j=i; j<n; j++) {
      permutation[j] < this_letter && encoding++;
      }
    }
  return encoding;
  }

void Initialise_Data() {
  int i;
#ifdef OLD_CODE
  g_lowest_bit[0] = 999999999;
  for (i=1; i<256; i++) {
    int lowest_power = i & ~ (i-1);
    g_clear_lowest_bit[i] = i ^ lowest_power;
    if (lowest_power == 1) { g_lowest_bit[i] = 0; }
    else if (lowest_power == 2) { g_lowest_bit[i] = 1; }
    else if (lowest_power == 4) { g_lowest_bit[i] = 2; }
    else if (lowest_power == 8) { g_lowest_bit[i] = 3; }
    else if (lowest_power == 16) { g_lowest_bit[i] = 4; }
    else if (lowest_power == 32) { g_lowest_bit[i] = 5; }
    else if (lowest_power == 64) { g_lowest_bit[i] = 6; }
    else if (lowest_power == 128) { g_lowest_bit[i] = 7; }
    else { 
      fprintf(stderr, "Oops: i=%d lowest_power=%d\n", i, lowest_power);
      exit(__LINE__); 
      }
    }
#endif
  
  /* g_change_state_to_increment_depth has to change each of the
   * four niblets in a word so that 
   *
   *   00 unseen  -> 00 unseen
   *   01 done    -> 01 done
   *   10 depth   -> 01 done
   *   11 depth+1 -> 10 depth
   */
  for (i=0; i<256; i++) {
    int a = i & 0xAA; /* high bits of each niblet */
    int b = i & 0x55; /* low bits of each niblet */
    g_change_state_to_increment_depth[i] = (a&(b<<1)) | ((a>>1)^b);
    }
  
  /* g_first_code_at_this_level gives the index of the first niblet
     whose value is 10 in this byte, or -1 if none are found */
  for (i=0; i<256; i++) {
    g_first_code_at_this_level[i] = 
      GETNIBLET(&i, 0) == 2 ? 0 :
      GETNIBLET(&i, 1) == 2 ? 1 :
      GETNIBLET(&i, 2) == 2 ? 2 :
      GETNIBLET(&i, 3) == 2 ? 3 :
      -1;
    }

  /* g_mark_lowest_as_done changes the lowest 10 to a 01 */
  for (i=0; i<256; i++) {
    int lowest = g_first_code_at_this_level[i];
    int value = i;
    if (lowest >= 0) {
      value &= ~g_niblet_masks[lowest];
      value |= 1 << (lowest*2);
      }
    g_mark_lowest_as_done[i] = value;
    }
  }

int main(argc, argv) 
  int argc;
  char **argv; {
  Initialise_Data();
#if 0
  { int perm[4]; for (n=0; n<24; n++) { KnuthPDecode(4, n, perm); printf("%2d: ", n); print_permutation(4, perm); fputs("\n", stdout); }  }
#endif
  while (--argc > 0) {
    int n = atoi(*++argv);
    DoN(n);
    }
  return 0;
  }

#ifdef OLD_CODE
void print_bitmap(FACTORIAL_TYPE n, unsigned char *bitmap) {
  FACTORIAL_TYPE i;
  for (i=0; i<n; i++) {
    i && (!(i%10)) && putchar(' ');
    if (GETBIT(bitmap, i)) { putchar('1'); }
    else { putchar('0'); }
    }
  }
#endif

void print_nibletmap(FACTORIAL_TYPE n, unsigned char *nibletmap) {
  FACTORIAL_TYPE i;
  for (i=0; i<n; i++) {
    i && (!(i%10)) && putchar(' ');
    putchar(g_niblet_names[GETNIBLET(nibletmap, i)]);
    }
  }

void print_permutation(int n, int *perm) {
  int i;
  for (i=0; i<n; i++) { i && putchar(' '); printf("%d", *perm++); }
  }

